i386: Remove Xeon Phi ISA support
gcc/ChangeLog: * common/config/i386/cpuinfo.h (get_intel_cpu): Remove Xeon Phi cpus. (get_available_features): Remove Xeon Phi ISAs. * common/config/i386/i386-common.cc (OPTION_MASK_ISA_AVX512PF_SET): Removed. (OPTION_MASK_ISA_AVX512ER_SET): Ditto. (OPTION_MASK_ISA2_AVX5124FMAPS_SET): Ditto. (OPTION_MASK_ISA2_AVX5124VNNIW_SET): Ditto. (OPTION_MASK_ISA_PREFETCHWT1_SET): Ditto. (OPTION_MASK_ISA_AVX512F_UNSET): Remove AVX512PF and AVX512ER. (OPTION_MASK_ISA_AVX512PF_UNSET): Removed. (OPTION_MASK_ISA_AVX512ER_UNSET): Ditto. (OPTION_MASK_ISA2_AVX5124FMAPS_UNSET): Ditto. (OPTION_MASK_ISA2_AVX5124VNNIW_UNSET): Ditto. (OPTION_MASK_ISA_PREFETCHWT1_UNSET): Ditto. (OPTION_MASK_ISA2_AVX512F_UNSET): Remove AVX5124FMAPS and AVX5125VNNIW. (ix86_handle_option): Remove Xeon Phi options. (processor_names): Remove Xeon Phi cpus. (processor_alias_table): Ditto. * common/config/i386/i386-cpuinfo.h (enum processor_types): Ditto. (enum processor_features): Remove Xeon Phi ISAs. * common/config/i386/i386-isas.h: Ditto. * config.gcc: Remove Xeon Phi cpus and ISAs. * config/i386/avx5124fmapsintrin.h: Remove intrin support. * config/i386/avx5124vnniwintrin.h: Ditto. * config/i386/avx512erintrin.h: Ditto. * config/i386/avx512pfintrin.h: Ditto. * config/i386/cpuid.h (bit_AVX512PF): Removed. (bit_AVX512ER): Ditto. (bit_PREFETCHWT1): Ditto. (bit_AVX5124VNNIW): Ditto. (bit_AVX5124FMAPS): Ditto. * config/i386/driver-i386.cc (host_detect_local_cpu): Remove Xeon Phi. * config/i386/i386-builtin-types.def: Remove unused types. * config/i386/i386-builtin.def (BDESC): Remove builtins. * config/i386/i386-builtins.cc (ix86_init_mmx_sse_builtins): Ditto. * config/i386/i386-c.cc (ix86_target_macros_internal): Remove Xeon Phi cpus and ISAs. * config/i386/i386-expand.cc (ix86_expand_builtin): Remove Xeon Phi related handlers. (ix86_emit_swdivsf): Ditto. (ix86_emit_swsqrtsf): Ditto. * config/i386/i386-isa.def: Remove Xeon Phi ISAs. * config/i386/i386-options.cc (m_KNL): Removed. (m_KNM): Ditto. (isa2_opts): Remove Xeon Phi ISAs. (isa_opts): Ditto. (processor_cost_table): Remove Xeon Phi cpus. (ix86_valid_target_attribute_inner_p): Remove Xeon Phi ISAs. (ix86_option_override_internal): Remove Xeon Phi related handlers. * config/i386/i386-rust.cc (ix86_rust_target_cpu_info): Remove Xeon Phi ISAs. * config/i386/i386.cc (ix86_hard_regno_mode_ok): Remove Xeon Phi related handler. * config/i386/i386.h (TARGET_EMIT_VZEROUPPER): Removed. (enum processor_type): Remove Xeon Phi cpus. * config/i386/i386.md (prefetch): Remove PREFETCHWT1. (*prefetch_3dnow): Ditto. (*prefetch_prefetchwt1): Removed. * config/i386/i386.opt: Remove Xeon Phi ISAs. * config/i386/immintrin.h: Ditto. * config/i386/sse.md (VF1_AVX512ER_128_256): Removed. (rsqrt<mode>2): Change iterator from VF1_AVX512ER_128_256 to VF1_128_256. (GATHER_SCATTER_SF_MEM_MODE): Removed. (avx512pf_gatherpf<mode>sf): Ditto. (*avx512pf_gatherpf<VI48_512:mode>sf_mask): Ditto. (avx512pf_gatherpf<mode>df): Ditto. (*avx512pf_gatherpf<VI4_256_8_512:mode>df_mask): Ditto. (avx512pf_scatterpf<mode>sf): Ditto. (*avx512pf_scatterpf<VI48_512:mode>sf_mask): Ditto. (avx512pf_scatterpf<mode>df): Ditto. (*avx512pf_scatterpf<VI4_256_8_512:mode>df_mask): Ditto. (exp2<mode>2): Ditto. (avx512er_exp2<mode><mask_name><round_saeonly_name>): Ditto. (<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>): Ditto. (avx512er_vmrcp28<mode><mask_name><round_saeonly_name>): Ditto. (<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>): Ditto. (avx512er_vmrsqrt28<mode><mask_name><round_saeonly_name>): Ditto. (IMOD4): Ditto. (imod4_narrow): Ditto. (mov<mode>): Ditto. (*mov<mode>_internal): Ditto. (avx5124fmaddps_4fmaddps): Ditto. (avx5124fmaddps_4fmaddps_mask): Ditto. (avx5124fmaddps_4fmaddps_maskz): Ditto. (avx5124fmaddps_4fmaddss): Ditto. (avx5124fmaddps_4fmaddss_mask): Ditto. (avx5124fmaddps_4fmaddss_maskz): Ditto. (avx5124fmaddps_4fnmaddps): Ditto. (avx5124fmaddps_4fnmaddps_mask): Ditto. (avx5124fmaddps_4fnmaddps_maskz): Ditto. (avx5124fmaddps_4fnmaddss): Ditto. (avx5124fmaddps_4fnmaddss_mask): Ditto. (avx5124fmaddps_4fnmaddss_maskz): Ditto. (avx5124vnniw_vp4dpwssd): Ditto. (avx5124vnniw_vp4dpwssd_mask): Ditto. (avx5124vnniw_vp4dpwssd_maskz): Ditto. (avx5124vnniw_vp4dpwssds): Ditto. (avx5124vnniw_vp4dpwssds_mask): Ditto. (avx5124vnniw_vp4dpwssds_maskz): Ditto. * config/i386/x86-tune-sched.cc (ix86_issue_rate): Remove Xeon Phi cpus. (ix86_adjust_cost): Ditto. * config/i386/x86-tune.def (X86_TUNE_SCHEDULE): Ditto. (X86_TUNE_PARTIAL_REG_DEPENDENCY): Ditto. (X86_TUNE_MOVX): Ditto. (X86_TUNE_MEMORY_MISMATCH_STALL): Ditto. (X86_TUNE_ACCUMULATE_OUTGOING_ARGS): Ditto. (X86_TUNE_FOUR_JUMP_LIMIT): Ditto. (X86_TUNE_USE_INCDEC): Ditto. (X86_TUNE_INTEGER_DFMODE_MOVES): Ditto. (X86_TUNE_OPT_AGU): Ditto. (X86_TUNE_AVOID_LEA_FOR_ADDR): Ditto. (X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE): Ditto. (X86_TUNE_USE_SAHF): Ditto. (X86_TUNE_USE_CLTD): Ditto. (X86_TUNE_USE_BT): Ditto. (X86_TUNE_ONE_IF_CONV_INSN): Ditto. (X86_TUNE_EXPAND_ABS): Ditto. (X86_TUNE_USE_SIMODE_FIOP): Ditto. (X86_TUNE_EXT_80387_CONSTANTS): Ditto. (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL): Ditto. (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL): Ditto. (X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS): Ditto. (X86_TUNE_SLOW_PSHUFB): Ditto. (X86_TUNE_EMIT_VZEROUPPER): Removed. * config/i386/xmmintrin.h (enum _mm_hint): Remove _MM_HINT_ET1. * doc/extend.texi: Remove Xeon Phi. * doc/invoke.texi: Ditto. gcc/testsuite/ChangeLog: * g++.dg/other/i386-2.C: Remove Xeon Phi ISAs. * g++.dg/other/i386-3.C: Ditto. * g++.target/i386/mv28.C: Ditto. * gcc.target/i386/builtin_target.c: Ditto. * gcc.target/i386/sse-12.c: Ditto. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-14.c: Ditto. * gcc.target/i386/sse-22.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. * gcc.target/i386/sse-26.c: Ditto. * gcc.target/i386/avx5124fmadd-v4fmaddps-1.c: Removed. * gcc.target/i386/avx5124fmadd-v4fmaddps-2.c: Ditto. * gcc.target/i386/avx5124fmadd-v4fmaddss-1.c: Ditto. * gcc.target/i386/avx5124fmadd-v4fnmaddps-1.c: Ditto. * gcc.target/i386/avx5124fmadd-v4fnmaddps-2.c: Ditto. * gcc.target/i386/avx5124fmadd-v4fnmaddss-1.c: Ditto. * gcc.target/i386/avx5124vnniw-vp4dpwssd-1.c: Ditto. * gcc.target/i386/avx5124vnniw-vp4dpwssd-2.c: Ditto. * gcc.target/i386/avx5124vnniw-vp4dpwssds-1.c: Ditto. * gcc.target/i386/avx5124vnniw-vp4dpwssds-2.c: Ditto. * gcc.target/i386/avx512er-check.h: Ditto. * gcc.target/i386/avx512er-vexp2pd-1.c: Ditto. * gcc.target/i386/avx512er-vexp2pd-2.c: Ditto. * gcc.target/i386/avx512er-vexp2ps-1.c: Ditto. * gcc.target/i386/avx512er-vexp2ps-2.c: Ditto. * gcc.target/i386/avx512er-vrcp28pd-1.c: Ditto. * gcc.target/i386/avx512er-vrcp28pd-2.c: Ditto. * gcc.target/i386/avx512er-vrcp28ps-1.c: Ditto. * gcc.target/i386/avx512er-vrcp28ps-2.c: Ditto. * gcc.target/i386/avx512er-vrcp28ps-3.c: Ditto. * gcc.target/i386/avx512er-vrcp28ps-4.c: Ditto. * gcc.target/i386/avx512er-vrcp28sd-1.c: Ditto. * gcc.target/i386/avx512er-vrcp28sd-2.c: Ditto. * gcc.target/i386/avx512er-vrcp28ss-1.c: Ditto. * gcc.target/i386/avx512er-vrcp28ss-2.c: Ditto. * gcc.target/i386/avx512er-vrsqrt28pd-1.c: Ditto. * gcc.target/i386/avx512er-vrsqrt28pd-2.c: Ditto. * gcc.target/i386/avx512er-vrsqrt28ps-1.c: Ditto. * gcc.target/i386/avx512er-vrsqrt28ps-2.c: Ditto. * gcc.target/i386/avx512er-vrsqrt28ps-3.c: Ditto. * gcc.target/i386/avx512er-vrsqrt28ps-4.c: Ditto. * gcc.target/i386/avx512er-vrsqrt28ps-5.c: Ditto. * gcc.target/i386/avx512er-vrsqrt28ps-6.c: Ditto. * gcc.target/i386/avx512er-vrsqrt28sd-1.c: Ditto. * gcc.target/i386/avx512er-vrsqrt28sd-2.c: Ditto. * gcc.target/i386/avx512er-vrsqrt28ss-1.c: Ditto. * gcc.target/i386/avx512er-vrsqrt28ss-2.c: Ditto. * gcc.target/i386/avx512pf-vgatherpf0dpd-1.c: Ditto. * gcc.target/i386/avx512pf-vgatherpf0dps-1.c: Ditto. * gcc.target/i386/avx512pf-vgatherpf0qpd-1.c: Ditto. * gcc.target/i386/avx512pf-vgatherpf0qps-1.c: Ditto. * gcc.target/i386/avx512pf-vgatherpf1dpd-1.c: Ditto. * gcc.target/i386/avx512pf-vgatherpf1dps-1.c: Ditto. * gcc.target/i386/avx512pf-vgatherpf1qpd-1.c: Ditto. * gcc.target/i386/avx512pf-vgatherpf1qps-1.c: Ditto. * gcc.target/i386/avx512pf-vscatterpf0dpd-1.c: Ditto. * gcc.target/i386/avx512pf-vscatterpf0dps-1.c: Ditto. * gcc.target/i386/avx512pf-vscatterpf0qpd-1.c: Ditto. * gcc.target/i386/avx512pf-vscatterpf0qps-1.c: Ditto. * gcc.target/i386/avx512pf-vscatterpf1dpd-1.c: Ditto. * gcc.target/i386/avx512pf-vscatterpf1dps-1.c: Ditto. * gcc.target/i386/avx512pf-vscatterpf1qpd-1.c: Ditto. * gcc.target/i386/avx512pf-vscatterpf1qps-1.c: Ditto. * gcc.target/i386/pr104448.c: Ditto. * gcc.target/i386/pr82941-2.c: Ditto. * gcc.target/i386/pr82942-2.c: Ditto. * gcc.target/i386/pr82990-1.c: Ditto. * gcc.target/i386/pr82990-3.c: Ditto. * gcc.target/i386/pr82990-6.c: Ditto. * gcc.target/i386/pr82990-7.c: Ditto. * gcc.target/i386/pr89523-5.c: Ditto. * gcc.target/i386/pr89523-6.c: Ditto. * gcc.target/i386/pr91033.c: Ditto. * gcc.target/i386/prefetchwt1-1.c: Ditto.
This commit is contained in:
parent
88b3f83238
commit
e1a7e2c54d
104 changed files with 97 additions and 4445 deletions
|
@ -390,18 +390,6 @@ get_intel_cpu (struct __processor_model *cpu_model,
|
|||
CHECK___builtin_cpu_is ("tremont");
|
||||
cpu_model->__cpu_type = INTEL_TREMONT;
|
||||
break;
|
||||
case 0x57:
|
||||
/* Knights Landing. */
|
||||
cpu = "knl";
|
||||
CHECK___builtin_cpu_is ("knl");
|
||||
cpu_model->__cpu_type = INTEL_KNL;
|
||||
break;
|
||||
case 0x85:
|
||||
/* Knights Mill. */
|
||||
cpu = "knm";
|
||||
CHECK___builtin_cpu_is ("knm");
|
||||
cpu_model->__cpu_type = INTEL_KNM;
|
||||
break;
|
||||
case 0x1a:
|
||||
case 0x1e:
|
||||
case 0x1f:
|
||||
|
@ -844,8 +832,6 @@ get_available_features (struct __processor_model *cpu_model,
|
|||
set_feature (FEATURE_CLFLUSHOPT);
|
||||
if (ebx & bit_CLWB)
|
||||
set_feature (FEATURE_CLWB);
|
||||
if (ecx & bit_PREFETCHWT1)
|
||||
set_feature (FEATURE_PREFETCHWT1);
|
||||
/* NB: bit_OSPKE indicates that OS supports PKU. */
|
||||
if (ecx & bit_OSPKE)
|
||||
set_feature (FEATURE_PKU);
|
||||
|
@ -898,10 +884,6 @@ get_available_features (struct __processor_model *cpu_model,
|
|||
set_feature (FEATURE_AVX512DQ);
|
||||
if (ebx & bit_AVX512CD)
|
||||
set_feature (FEATURE_AVX512CD);
|
||||
if (ebx & bit_AVX512PF)
|
||||
set_feature (FEATURE_AVX512PF);
|
||||
if (ebx & bit_AVX512ER)
|
||||
set_feature (FEATURE_AVX512ER);
|
||||
if (ebx & bit_AVX512IFMA)
|
||||
set_feature (FEATURE_AVX512IFMA);
|
||||
if (ecx & bit_AVX512VBMI)
|
||||
|
@ -914,10 +896,6 @@ get_available_features (struct __processor_model *cpu_model,
|
|||
set_feature (FEATURE_AVX512BITALG);
|
||||
if (ecx & bit_AVX512VPOPCNTDQ)
|
||||
set_feature (FEATURE_AVX512VPOPCNTDQ);
|
||||
if (edx & bit_AVX5124VNNIW)
|
||||
set_feature (FEATURE_AVX5124VNNIW);
|
||||
if (edx & bit_AVX5124FMAPS)
|
||||
set_feature (FEATURE_AVX5124FMAPS);
|
||||
if (edx & bit_AVX512VP2INTERSECT)
|
||||
set_feature (FEATURE_AVX512VP2INTERSECT);
|
||||
if (edx & bit_AVX512FP16)
|
||||
|
|
|
@ -64,10 +64,6 @@ along with GCC; see the file COPYING3. If not see
|
|||
(OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX2_SET)
|
||||
#define OPTION_MASK_ISA_AVX512CD_SET \
|
||||
(OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512F_SET)
|
||||
#define OPTION_MASK_ISA_AVX512PF_SET \
|
||||
(OPTION_MASK_ISA_AVX512PF | OPTION_MASK_ISA_AVX512F_SET)
|
||||
#define OPTION_MASK_ISA_AVX512ER_SET \
|
||||
(OPTION_MASK_ISA_AVX512ER | OPTION_MASK_ISA_AVX512F_SET)
|
||||
#define OPTION_MASK_ISA_AVX512DQ_SET \
|
||||
(OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512F_SET)
|
||||
#define OPTION_MASK_ISA_AVX512BW_SET \
|
||||
|
@ -79,8 +75,6 @@ along with GCC; see the file COPYING3. If not see
|
|||
#define OPTION_MASK_ISA2_AVXIFMA_SET OPTION_MASK_ISA2_AVXIFMA
|
||||
#define OPTION_MASK_ISA_AVX512VBMI_SET \
|
||||
(OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512BW_SET)
|
||||
#define OPTION_MASK_ISA2_AVX5124FMAPS_SET OPTION_MASK_ISA2_AVX5124FMAPS
|
||||
#define OPTION_MASK_ISA2_AVX5124VNNIW_SET OPTION_MASK_ISA2_AVX5124VNNIW
|
||||
#define OPTION_MASK_ISA_AVX512VBMI2_SET \
|
||||
(OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW_SET)
|
||||
#define OPTION_MASK_ISA_AVX512FP16_SET OPTION_MASK_ISA_AVX512BW_SET
|
||||
|
@ -97,7 +91,6 @@ along with GCC; see the file COPYING3. If not see
|
|||
#define OPTION_MASK_ISA_PRFCHW_SET OPTION_MASK_ISA_PRFCHW
|
||||
#define OPTION_MASK_ISA_RDSEED_SET OPTION_MASK_ISA_RDSEED
|
||||
#define OPTION_MASK_ISA_ADX_SET OPTION_MASK_ISA_ADX
|
||||
#define OPTION_MASK_ISA_PREFETCHWT1_SET OPTION_MASK_ISA_PREFETCHWT1
|
||||
#define OPTION_MASK_ISA_CLFLUSHOPT_SET OPTION_MASK_ISA_CLFLUSHOPT
|
||||
#define OPTION_MASK_ISA_XSAVES_SET \
|
||||
(OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_XSAVE_SET)
|
||||
|
@ -242,14 +235,11 @@ along with GCC; see the file COPYING3. If not see
|
|||
| OPTION_MASK_ISA2_AVX10_1_256_UNSET)
|
||||
#define OPTION_MASK_ISA_AVX512F_UNSET \
|
||||
(OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD_UNSET \
|
||||
| OPTION_MASK_ISA_AVX512PF_UNSET | OPTION_MASK_ISA_AVX512ER_UNSET \
|
||||
| OPTION_MASK_ISA_AVX512DQ_UNSET | OPTION_MASK_ISA_AVX512BW_UNSET \
|
||||
| OPTION_MASK_ISA_AVX512VL_UNSET | OPTION_MASK_ISA_AVX512IFMA_UNSET \
|
||||
| OPTION_MASK_ISA_AVX512VNNI_UNSET \
|
||||
| OPTION_MASK_ISA_AVX512VPOPCNTDQ_UNSET)
|
||||
#define OPTION_MASK_ISA_AVX512CD_UNSET OPTION_MASK_ISA_AVX512CD
|
||||
#define OPTION_MASK_ISA_AVX512PF_UNSET OPTION_MASK_ISA_AVX512PF
|
||||
#define OPTION_MASK_ISA_AVX512ER_UNSET OPTION_MASK_ISA_AVX512ER
|
||||
#define OPTION_MASK_ISA_AVX512DQ_UNSET OPTION_MASK_ISA_AVX512DQ
|
||||
#define OPTION_MASK_ISA_AVX512BW_UNSET \
|
||||
(OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VBMI_UNSET \
|
||||
|
@ -258,8 +248,6 @@ along with GCC; see the file COPYING3. If not see
|
|||
#define OPTION_MASK_ISA_AVX512IFMA_UNSET OPTION_MASK_ISA_AVX512IFMA
|
||||
#define OPTION_MASK_ISA2_AVXIFMA_UNSET OPTION_MASK_ISA2_AVXIFMA
|
||||
#define OPTION_MASK_ISA_AVX512VBMI_UNSET OPTION_MASK_ISA_AVX512VBMI
|
||||
#define OPTION_MASK_ISA2_AVX5124FMAPS_UNSET OPTION_MASK_ISA2_AVX5124FMAPS
|
||||
#define OPTION_MASK_ISA2_AVX5124VNNIW_UNSET OPTION_MASK_ISA2_AVX5124VNNIW
|
||||
#define OPTION_MASK_ISA_AVX512VBMI2_UNSET OPTION_MASK_ISA_AVX512VBMI2
|
||||
#define OPTION_MASK_ISA_AVX512FP16_UNSET OPTION_MASK_ISA_AVX512BW_UNSET
|
||||
#define OPTION_MASK_ISA2_AVX512FP16_UNSET OPTION_MASK_ISA2_AVX512FP16
|
||||
|
@ -272,7 +260,6 @@ along with GCC; see the file COPYING3. If not see
|
|||
#define OPTION_MASK_ISA_PRFCHW_UNSET OPTION_MASK_ISA_PRFCHW
|
||||
#define OPTION_MASK_ISA_RDSEED_UNSET OPTION_MASK_ISA_RDSEED
|
||||
#define OPTION_MASK_ISA_ADX_UNSET OPTION_MASK_ISA_ADX
|
||||
#define OPTION_MASK_ISA_PREFETCHWT1_UNSET OPTION_MASK_ISA_PREFETCHWT1
|
||||
#define OPTION_MASK_ISA_CLFLUSHOPT_UNSET OPTION_MASK_ISA_CLFLUSHOPT
|
||||
#define OPTION_MASK_ISA_XSAVEC_UNSET OPTION_MASK_ISA_XSAVEC
|
||||
#define OPTION_MASK_ISA_XSAVES_UNSET OPTION_MASK_ISA_XSAVES
|
||||
|
@ -364,8 +351,6 @@ along with GCC; see the file COPYING3. If not see
|
|||
|
||||
#define OPTION_MASK_ISA2_AVX512F_UNSET \
|
||||
(OPTION_MASK_ISA2_AVX512BW_UNSET \
|
||||
| OPTION_MASK_ISA2_AVX5124FMAPS_UNSET \
|
||||
| OPTION_MASK_ISA2_AVX5124VNNIW_UNSET \
|
||||
| OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET)
|
||||
#define OPTION_MASK_ISA2_GENERAL_REGS_ONLY_UNSET \
|
||||
OPTION_MASK_ISA2_SSE_UNSET
|
||||
|
@ -641,32 +626,6 @@ ix86_handle_option (struct gcc_options *opts,
|
|||
}
|
||||
return true;
|
||||
|
||||
case OPT_mavx512pf:
|
||||
if (value)
|
||||
{
|
||||
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF_SET;
|
||||
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512PF_SET;
|
||||
}
|
||||
else
|
||||
{
|
||||
opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AVX512PF_UNSET;
|
||||
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512PF_UNSET;
|
||||
}
|
||||
return true;
|
||||
|
||||
case OPT_mavx512er:
|
||||
if (value)
|
||||
{
|
||||
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER_SET;
|
||||
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512ER_SET;
|
||||
}
|
||||
else
|
||||
{
|
||||
opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AVX512ER_UNSET;
|
||||
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512ER_UNSET;
|
||||
}
|
||||
return true;
|
||||
|
||||
case OPT_mrdpid:
|
||||
if (value)
|
||||
{
|
||||
|
@ -872,36 +831,6 @@ ix86_handle_option (struct gcc_options *opts,
|
|||
}
|
||||
return true;
|
||||
|
||||
case OPT_mavx5124fmaps:
|
||||
if (value)
|
||||
{
|
||||
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVX5124FMAPS_SET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX5124FMAPS_SET;
|
||||
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F_SET;
|
||||
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512F_SET;
|
||||
}
|
||||
else
|
||||
{
|
||||
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX5124FMAPS_UNSET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX5124FMAPS_UNSET;
|
||||
}
|
||||
return true;
|
||||
|
||||
case OPT_mavx5124vnniw:
|
||||
if (value)
|
||||
{
|
||||
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVX5124VNNIW_SET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX5124VNNIW_SET;
|
||||
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F_SET;
|
||||
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512F_SET;
|
||||
}
|
||||
else
|
||||
{
|
||||
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX5124VNNIW_UNSET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX5124VNNIW_UNSET;
|
||||
}
|
||||
return true;
|
||||
|
||||
case OPT_mavx512vbmi2:
|
||||
if (value)
|
||||
{
|
||||
|
@ -1858,19 +1787,6 @@ ix86_handle_option (struct gcc_options *opts,
|
|||
}
|
||||
return true;
|
||||
|
||||
case OPT_mprefetchwt1:
|
||||
if (value)
|
||||
{
|
||||
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1_SET;
|
||||
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_PREFETCHWT1_SET;
|
||||
}
|
||||
else
|
||||
{
|
||||
opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_PREFETCHWT1_UNSET;
|
||||
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_PREFETCHWT1_UNSET;
|
||||
}
|
||||
return true;
|
||||
|
||||
case OPT_mclflushopt:
|
||||
if (value)
|
||||
{
|
||||
|
@ -2131,8 +2047,6 @@ const char *const processor_names[] =
|
|||
"sierraforest",
|
||||
"grandridge",
|
||||
"clearwaterforest",
|
||||
"knl",
|
||||
"knm",
|
||||
"skylake",
|
||||
"skylake-avx512",
|
||||
"cannonlake",
|
||||
|
@ -2307,10 +2221,6 @@ const pta processor_alias_table[] =
|
|||
M_CPU_TYPE (INTEL_GRANDRIDGE), P_PROC_AVX2},
|
||||
{"clearwaterforest", PROCESSOR_CLEARWATERFOREST, CPU_HASWELL,
|
||||
PTA_CLEARWATERFOREST, M_CPU_TYPE (INTEL_CLEARWATERFOREST), P_PROC_AVX2},
|
||||
{"knl", PROCESSOR_KNL, CPU_SLM, PTA_KNL,
|
||||
M_CPU_TYPE (INTEL_KNL), P_PROC_AVX512F},
|
||||
{"knm", PROCESSOR_KNM, CPU_SLM, PTA_KNM,
|
||||
M_CPU_TYPE (INTEL_KNM), P_PROC_AVX512F},
|
||||
{"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM,
|
||||
M_VENDOR (VENDOR_INTEL), P_NONE},
|
||||
{"geode", PROCESSOR_GEODE, CPU_GEODE,
|
||||
|
|
|
@ -50,12 +50,10 @@ enum processor_types
|
|||
AMDFAM10H,
|
||||
AMDFAM15H,
|
||||
INTEL_SILVERMONT,
|
||||
INTEL_KNL,
|
||||
AMD_BTVER1,
|
||||
AMD_BTVER1 = 8,
|
||||
AMD_BTVER2,
|
||||
AMDFAM17H,
|
||||
INTEL_KNM,
|
||||
INTEL_GOLDMONT,
|
||||
INTEL_GOLDMONT = 12,
|
||||
INTEL_GOLDMONT_PLUS,
|
||||
INTEL_TREMONT,
|
||||
AMDFAM19H,
|
||||
|
@ -179,13 +177,9 @@ enum processor_features
|
|||
FEATURE_AVX512BW,
|
||||
FEATURE_AVX512DQ,
|
||||
FEATURE_AVX512CD,
|
||||
FEATURE_AVX512ER,
|
||||
FEATURE_AVX512PF,
|
||||
FEATURE_AVX512VBMI,
|
||||
FEATURE_AVX512VBMI = 26,
|
||||
FEATURE_AVX512IFMA,
|
||||
FEATURE_AVX5124VNNIW,
|
||||
FEATURE_AVX5124FMAPS,
|
||||
FEATURE_AVX512VPOPCNTDQ,
|
||||
FEATURE_AVX512VPOPCNTDQ = 30,
|
||||
FEATURE_AVX512VBMI2,
|
||||
FEATURE_GFNI,
|
||||
FEATURE_VPCLMULQDQ,
|
||||
|
@ -220,8 +214,7 @@ enum processor_features
|
|||
FEATURE_OSXSAVE,
|
||||
FEATURE_PCONFIG,
|
||||
FEATURE_PKU,
|
||||
FEATURE_PREFETCHWT1,
|
||||
FEATURE_PRFCHW,
|
||||
FEATURE_PRFCHW = 66,
|
||||
FEATURE_PTWRITE,
|
||||
FEATURE_RDPID,
|
||||
FEATURE_RDRND,
|
||||
|
|
|
@ -76,18 +76,10 @@ ISA_NAMES_TABLE_START
|
|||
"-mavx512dq")
|
||||
ISA_NAMES_TABLE_ENTRY("avx512cd", FEATURE_AVX512CD, P_NONE,
|
||||
"-mavx512cd")
|
||||
ISA_NAMES_TABLE_ENTRY("avx512er", FEATURE_AVX512ER, P_NONE,
|
||||
"-mavx512er")
|
||||
ISA_NAMES_TABLE_ENTRY("avx512pf", FEATURE_AVX512PF, P_NONE,
|
||||
"-mavx512pf")
|
||||
ISA_NAMES_TABLE_ENTRY("avx512vbmi", FEATURE_AVX512VBMI, P_NONE,
|
||||
"-mavx512vbmi")
|
||||
ISA_NAMES_TABLE_ENTRY("avx512ifma", FEATURE_AVX512IFMA, P_NONE,
|
||||
"-mavx512ifma")
|
||||
ISA_NAMES_TABLE_ENTRY("avx5124vnniw", FEATURE_AVX5124VNNIW, P_NONE,
|
||||
"-mavx5124vnniw")
|
||||
ISA_NAMES_TABLE_ENTRY("avx5124fmaps", FEATURE_AVX5124FMAPS, P_NONE,
|
||||
"-mavx5124fmaps")
|
||||
ISA_NAMES_TABLE_ENTRY("avx512vpopcntdq", FEATURE_AVX512VPOPCNTDQ,
|
||||
P_NONE, "-mavx512vpopcntdq")
|
||||
ISA_NAMES_TABLE_ENTRY("avx512vbmi2", FEATURE_AVX512VBMI2, P_NONE,
|
||||
|
@ -135,8 +127,6 @@ ISA_NAMES_TABLE_START
|
|||
ISA_NAMES_TABLE_ENTRY("osxsave", FEATURE_OSXSAVE, P_NONE, NULL)
|
||||
ISA_NAMES_TABLE_ENTRY("pconfig", FEATURE_PCONFIG, P_NONE, "-mpconfig")
|
||||
ISA_NAMES_TABLE_ENTRY("pku", FEATURE_PKU, P_NONE, "-mpku")
|
||||
ISA_NAMES_TABLE_ENTRY("prefetchwt1", FEATURE_PREFETCHWT1, P_NONE,
|
||||
"-mprefetchwt1")
|
||||
ISA_NAMES_TABLE_ENTRY("prfchw", FEATURE_PRFCHW, P_NONE, "-mprfchw")
|
||||
ISA_NAMES_TABLE_ENTRY("ptwrite", FEATURE_PTWRITE, P_NONE, "-mptwrite")
|
||||
ISA_NAMES_TABLE_ENTRY("rdpid", FEATURE_RDPID, P_NONE, "-mrdpid")
|
||||
|
|
|
@ -428,12 +428,11 @@ i[34567]86-*-* | x86_64-*-*)
|
|||
avx2intrin.h avx512fintrin.h fmaintrin.h f16cintrin.h
|
||||
rtmintrin.h xtestintrin.h rdseedintrin.h prfchwintrin.h
|
||||
adxintrin.h fxsrintrin.h xsaveintrin.h xsaveoptintrin.h
|
||||
avx512cdintrin.h avx512erintrin.h avx512pfintrin.h
|
||||
shaintrin.h clflushoptintrin.h xsavecintrin.h
|
||||
xsavesintrin.h avx512dqintrin.h avx512bwintrin.h
|
||||
avx512vlintrin.h avx512vlbwintrin.h avx512vldqintrin.h
|
||||
avx512ifmaintrin.h avx512ifmavlintrin.h avx512vbmiintrin.h
|
||||
avx512vbmivlintrin.h avx5124fmapsintrin.h avx5124vnniwintrin.h
|
||||
avx512cdintrin.h shaintrin.h clflushoptintrin.h
|
||||
xsavecintrin.h xsavesintrin.h avx512dqintrin.h
|
||||
avx512bwintrin.h avx512vlintrin.h avx512vlbwintrin.h
|
||||
avx512vldqintrin.h avx512ifmaintrin.h avx512ifmavlintrin.h
|
||||
avx512vbmiintrin.h avx512vbmivlintrin.h
|
||||
avx512vpopcntdqintrin.h clwbintrin.h mwaitxintrin.h
|
||||
clzerointrin.h pkuintrin.h sgxintrin.h cetintrin.h
|
||||
gfniintrin.h cet.h avx512vbmi2intrin.h
|
||||
|
@ -709,7 +708,7 @@ x86_64_archs="amdfam10 athlon64 athlon64-sse3 barcelona bdver1 bdver2 \
|
|||
bdver3 bdver4 znver1 znver2 znver3 znver4 znver5 btver1 btver2 k8 k8-sse3 \
|
||||
opteron opteron-sse3 nocona core2 corei7 corei7-avx core-avx-i core-avx2 \
|
||||
atom slm nehalem westmere sandybridge ivybridge haswell broadwell bonnell \
|
||||
silvermont knl knm skylake-avx512 cannonlake icelake-client icelake-server \
|
||||
silvermont skylake-avx512 cannonlake icelake-client icelake-server \
|
||||
skylake goldmont goldmont-plus tremont cascadelake tigerlake cooperlake \
|
||||
sapphirerapids alderlake rocketlake eden-x2 nano nano-1000 nano-2000 nano-3000 \
|
||||
nano-x2 eden-x4 nano-x4 lujiazui yongfeng x86-64 x86-64-v2 x86-64-v3 x86-64-v4 \
|
||||
|
|
|
@ -21,196 +21,4 @@
|
|||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
# error "Never use <avx5124fmapsintrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX5124FMAPSINTRIN_H_INCLUDED
|
||||
#define _AVX5124FMAPSINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX5124FMAPS__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx5124fmaps,evex512")
|
||||
#define __DISABLE_AVX5124FMAPS__
|
||||
#endif /* __AVX5124FMAPS__ */
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_4fmadd_ps (__m512 __A, __m512 __B, __m512 __C,
|
||||
__m512 __D, __m512 __E, __m128 *__F)
|
||||
{
|
||||
return (__m512) __builtin_ia32_4fmaddps ((__v16sf) __B,
|
||||
(__v16sf) __C,
|
||||
(__v16sf) __D,
|
||||
(__v16sf) __E,
|
||||
(__v16sf) __A,
|
||||
(const __v4sf *) __F);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_4fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B,
|
||||
__m512 __C, __m512 __D, __m512 __E, __m128 *__F)
|
||||
{
|
||||
return (__m512) __builtin_ia32_4fmaddps_mask ((__v16sf) __B,
|
||||
(__v16sf) __C,
|
||||
(__v16sf) __D,
|
||||
(__v16sf) __E,
|
||||
(__v16sf) __A,
|
||||
(const __v4sf *) __F,
|
||||
(__v16sf) __A,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_4fmadd_ps (__mmask16 __U,
|
||||
__m512 __A, __m512 __B, __m512 __C,
|
||||
__m512 __D, __m512 __E, __m128 *__F)
|
||||
{
|
||||
return (__m512) __builtin_ia32_4fmaddps_mask ((__v16sf) __B,
|
||||
(__v16sf) __C,
|
||||
(__v16sf) __D,
|
||||
(__v16sf) __E,
|
||||
(__v16sf) __A,
|
||||
(const __v4sf *) __F,
|
||||
(__v16sf) _mm512_setzero_ps (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_4fmadd_ss (__m128 __A, __m128 __B, __m128 __C,
|
||||
__m128 __D, __m128 __E, __m128 *__F)
|
||||
{
|
||||
return (__m128) __builtin_ia32_4fmaddss ((__v4sf) __B,
|
||||
(__v4sf) __C,
|
||||
(__v4sf) __D,
|
||||
(__v4sf) __E,
|
||||
(__v4sf) __A,
|
||||
(const __v4sf *) __F);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_4fmadd_ss (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C,
|
||||
__m128 __D, __m128 __E, __m128 *__F)
|
||||
{
|
||||
return (__m128) __builtin_ia32_4fmaddss_mask ((__v4sf) __B,
|
||||
(__v4sf) __C,
|
||||
(__v4sf) __D,
|
||||
(__v4sf) __E,
|
||||
(__v4sf) __A,
|
||||
(const __v4sf *) __F,
|
||||
(__v4sf) __A,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_4fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C,
|
||||
__m128 __D, __m128 __E, __m128 *__F)
|
||||
{
|
||||
return (__m128) __builtin_ia32_4fmaddss_mask ((__v4sf) __B,
|
||||
(__v4sf) __C,
|
||||
(__v4sf) __D,
|
||||
(__v4sf) __E,
|
||||
(__v4sf) __A,
|
||||
(const __v4sf *) __F,
|
||||
(__v4sf) _mm_setzero_ps (),
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_4fnmadd_ps (__m512 __A, __m512 __B, __m512 __C,
|
||||
__m512 __D, __m512 __E, __m128 *__F)
|
||||
{
|
||||
return (__m512) __builtin_ia32_4fnmaddps ((__v16sf) __B,
|
||||
(__v16sf) __C,
|
||||
(__v16sf) __D,
|
||||
(__v16sf) __E,
|
||||
(__v16sf) __A,
|
||||
(const __v4sf *) __F);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_4fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B,
|
||||
__m512 __C, __m512 __D, __m512 __E, __m128 *__F)
|
||||
{
|
||||
return (__m512) __builtin_ia32_4fnmaddps_mask ((__v16sf) __B,
|
||||
(__v16sf) __C,
|
||||
(__v16sf) __D,
|
||||
(__v16sf) __E,
|
||||
(__v16sf) __A,
|
||||
(const __v4sf *) __F,
|
||||
(__v16sf) __A,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_4fnmadd_ps (__mmask16 __U,
|
||||
__m512 __A, __m512 __B, __m512 __C,
|
||||
__m512 __D, __m512 __E, __m128 *__F)
|
||||
{
|
||||
return (__m512) __builtin_ia32_4fnmaddps_mask ((__v16sf) __B,
|
||||
(__v16sf) __C,
|
||||
(__v16sf) __D,
|
||||
(__v16sf) __E,
|
||||
(__v16sf) __A,
|
||||
(const __v4sf *) __F,
|
||||
(__v16sf) _mm512_setzero_ps (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_4fnmadd_ss (__m128 __A, __m128 __B, __m128 __C,
|
||||
__m128 __D, __m128 __E, __m128 *__F)
|
||||
{
|
||||
return (__m128) __builtin_ia32_4fnmaddss ((__v4sf) __B,
|
||||
(__v4sf) __C,
|
||||
(__v4sf) __D,
|
||||
(__v4sf) __E,
|
||||
(__v4sf) __A,
|
||||
(const __v4sf *) __F);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_4fnmadd_ss (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C,
|
||||
__m128 __D, __m128 __E, __m128 *__F)
|
||||
{
|
||||
return (__m128) __builtin_ia32_4fnmaddss_mask ((__v4sf) __B,
|
||||
(__v4sf) __C,
|
||||
(__v4sf) __D,
|
||||
(__v4sf) __E,
|
||||
(__v4sf) __A,
|
||||
(const __v4sf *) __F,
|
||||
(__v4sf) __A,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_4fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C,
|
||||
__m128 __D, __m128 __E, __m128 *__F)
|
||||
{
|
||||
return (__m128) __builtin_ia32_4fnmaddss_mask ((__v4sf) __B,
|
||||
(__v4sf) __C,
|
||||
(__v4sf) __D,
|
||||
(__v4sf) __E,
|
||||
(__v4sf) __A,
|
||||
(const __v4sf *) __F,
|
||||
(__v4sf) _mm_setzero_ps (),
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX5124FMAPS__
|
||||
#undef __DISABLE_AVX5124FMAPS__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX5124FMAPS__ */
|
||||
|
||||
#endif /* _AVX5124FMAPSINTRIN_H_INCLUDED */
|
||||
#error "AVX5124FMAPS support has been removed since GCC 15."
|
||||
|
|
|
@ -21,112 +21,4 @@
|
|||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
# error "Never use <avx5124vnniwintrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX5124VNNIWINTRIN_H_INCLUDED
|
||||
#define _AVX5124VNNIWINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX5124VNNIW__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx5124vnniw,evex512")
|
||||
#define __DISABLE_AVX5124VNNIW__
|
||||
#endif /* __AVX5124VNNIW__ */
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_4dpwssd_epi32 (__m512i __A, __m512i __B, __m512i __C,
|
||||
__m512i __D, __m512i __E, __m128i *__F)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vp4dpwssd ((__v16si) __B,
|
||||
(__v16si) __C,
|
||||
(__v16si) __D,
|
||||
(__v16si) __E,
|
||||
(__v16si) __A,
|
||||
(const __v4si *) __F);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_4dpwssd_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
|
||||
__m512i __C, __m512i __D, __m512i __E,
|
||||
__m128i *__F)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vp4dpwssd_mask ((__v16si) __B,
|
||||
(__v16si) __C,
|
||||
(__v16si) __D,
|
||||
(__v16si) __E,
|
||||
(__v16si) __A,
|
||||
(const __v4si *) __F,
|
||||
(__v16si) __A,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_4dpwssd_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
|
||||
__m512i __C, __m512i __D, __m512i __E,
|
||||
__m128i *__F)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vp4dpwssd_mask ((__v16si) __B,
|
||||
(__v16si) __C,
|
||||
(__v16si) __D,
|
||||
(__v16si) __E,
|
||||
(__v16si) __A,
|
||||
(const __v4si *) __F,
|
||||
(__v16si) _mm512_setzero_ps (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_4dpwssds_epi32 (__m512i __A, __m512i __B, __m512i __C,
|
||||
__m512i __D, __m512i __E, __m128i *__F)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vp4dpwssds ((__v16si) __B,
|
||||
(__v16si) __C,
|
||||
(__v16si) __D,
|
||||
(__v16si) __E,
|
||||
(__v16si) __A,
|
||||
(const __v4si *) __F);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_4dpwssds_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
|
||||
__m512i __C, __m512i __D, __m512i __E,
|
||||
__m128i *__F)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vp4dpwssds_mask ((__v16si) __B,
|
||||
(__v16si) __C,
|
||||
(__v16si) __D,
|
||||
(__v16si) __E,
|
||||
(__v16si) __A,
|
||||
(const __v4si *) __F,
|
||||
(__v16si) __A,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_4dpwssds_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
|
||||
__m512i __C, __m512i __D, __m512i __E,
|
||||
__m128i *__F)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vp4dpwssds_mask ((__v16si) __B,
|
||||
(__v16si) __C,
|
||||
(__v16si) __D,
|
||||
(__v16si) __E,
|
||||
(__v16si) __A,
|
||||
(const __v4si *) __F,
|
||||
(__v16si) _mm512_setzero_ps (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX5124VNNIW__
|
||||
#undef __DISABLE_AVX5124VNNIW__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX5124VNNIW__ */
|
||||
|
||||
#endif /* _AVX5124VNNIWINTRIN_H_INCLUDED */
|
||||
#error "AVX5124VNNIW support has been removed since GCC 15."
|
||||
|
|
|
@ -21,516 +21,4 @@
|
|||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512ERINTRIN_H_INCLUDED
|
||||
#define _AVX512ERINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX512ER__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512er,evex512")
|
||||
#define __DISABLE_AVX512ER__
|
||||
#endif /* __AVX512ER__ */
|
||||
|
||||
/* Internal data types for implementing the intrinsics. */
|
||||
typedef double __v8df __attribute__ ((__vector_size__ (64)));
|
||||
typedef float __v16sf __attribute__ ((__vector_size__ (64)));
|
||||
|
||||
/* The Intel API is flexible enough that we must allow aliasing with other
|
||||
vector types, and their scalar components. */
|
||||
typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
|
||||
typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
|
||||
|
||||
typedef unsigned char __mmask8;
|
||||
typedef unsigned short __mmask16;
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_exp2a23_round_pd (__m512d __A, int __R)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
|
||||
(__v8df) _mm512_undefined_pd (),
|
||||
(__mmask8) -1, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_exp2a23_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
|
||||
(__v8df) __W,
|
||||
(__mmask8) __U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_exp2a23_round_pd (__mmask8 __U, __m512d __A, int __R)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
|
||||
(__v8df) _mm512_setzero_pd (),
|
||||
(__mmask8) __U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_exp2a23_round_ps (__m512 __A, int __R)
|
||||
{
|
||||
return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
|
||||
(__v16sf) _mm512_undefined_ps (),
|
||||
(__mmask16) -1, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_exp2a23_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
|
||||
{
|
||||
return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
|
||||
(__v16sf) __W,
|
||||
(__mmask16) __U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_exp2a23_round_ps (__mmask16 __U, __m512 __A, int __R)
|
||||
{
|
||||
return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
|
||||
(__v16sf) _mm512_setzero_ps (),
|
||||
(__mmask16) __U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_rcp28_round_pd (__m512d __A, int __R)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
|
||||
(__v8df) _mm512_undefined_pd (),
|
||||
(__mmask8) -1, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_rcp28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
|
||||
(__v8df) __W,
|
||||
(__mmask8) __U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_rcp28_round_pd (__mmask8 __U, __m512d __A, int __R)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
|
||||
(__v8df) _mm512_setzero_pd (),
|
||||
(__mmask8) __U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_rcp28_round_ps (__m512 __A, int __R)
|
||||
{
|
||||
return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
|
||||
(__v16sf) _mm512_undefined_ps (),
|
||||
(__mmask16) -1, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_rcp28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
|
||||
{
|
||||
return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
|
||||
(__v16sf) __W,
|
||||
(__mmask16) __U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_rcp28_round_ps (__mmask16 __U, __m512 __A, int __R)
|
||||
{
|
||||
return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
|
||||
(__v16sf) _mm512_setzero_ps (),
|
||||
(__mmask16) __U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_rcp28sd_round ((__v2df) __B,
|
||||
(__v2df) __A,
|
||||
__R);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_rcp28_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
|
||||
__m128d __B, int __R)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_rcp28sd_mask_round ((__v2df) __B,
|
||||
(__v2df) __A,
|
||||
(__v2df) __W,
|
||||
__U,
|
||||
__R);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_rcp28_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __R)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_rcp28sd_mask_round ((__v2df) __B,
|
||||
(__v2df) __A,
|
||||
(__v2df)
|
||||
_mm_setzero_pd (),
|
||||
__U,
|
||||
__R);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R)
|
||||
{
|
||||
return (__m128) __builtin_ia32_rcp28ss_round ((__v4sf) __B,
|
||||
(__v4sf) __A,
|
||||
__R);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_rcp28_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
|
||||
__m128 __B, int __R)
|
||||
{
|
||||
return (__m128) __builtin_ia32_rcp28ss_mask_round ((__v4sf) __B,
|
||||
(__v4sf) __A,
|
||||
(__v4sf) __W,
|
||||
__U,
|
||||
__R);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_rcp28_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __R)
|
||||
{
|
||||
return (__m128) __builtin_ia32_rcp28ss_mask_round ((__v4sf) __B,
|
||||
(__v4sf) __A,
|
||||
(__v4sf)
|
||||
_mm_setzero_ps (),
|
||||
__U,
|
||||
__R);
|
||||
}
|
||||
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_rsqrt28_round_pd (__m512d __A, int __R)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
|
||||
(__v8df) _mm512_undefined_pd (),
|
||||
(__mmask8) -1, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_rsqrt28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
|
||||
(__v8df) __W,
|
||||
(__mmask8) __U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_rsqrt28_round_pd (__mmask8 __U, __m512d __A, int __R)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
|
||||
(__v8df) _mm512_setzero_pd (),
|
||||
(__mmask8) __U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_rsqrt28_round_ps (__m512 __A, int __R)
|
||||
{
|
||||
return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
|
||||
(__v16sf) _mm512_undefined_ps (),
|
||||
(__mmask16) -1, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_rsqrt28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
|
||||
{
|
||||
return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
|
||||
(__v16sf) __W,
|
||||
(__mmask16) __U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_rsqrt28_round_ps (__mmask16 __U, __m512 __A, int __R)
|
||||
{
|
||||
return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
|
||||
(__v16sf) _mm512_setzero_ps (),
|
||||
(__mmask16) __U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_rsqrt28sd_round ((__v2df) __B,
|
||||
(__v2df) __A,
|
||||
__R);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_rsqrt28_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
|
||||
__m128d __B, int __R)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_rsqrt28sd_mask_round ((__v2df) __B,
|
||||
(__v2df) __A,
|
||||
(__v2df) __W,
|
||||
__U,
|
||||
__R);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_rsqrt28_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __R)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_rsqrt28sd_mask_round ((__v2df) __B,
|
||||
(__v2df) __A,
|
||||
(__v2df)
|
||||
_mm_setzero_pd (),
|
||||
__U,
|
||||
__R);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
|
||||
{
|
||||
return (__m128) __builtin_ia32_rsqrt28ss_round ((__v4sf) __B,
|
||||
(__v4sf) __A,
|
||||
__R);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_rsqrt28_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
|
||||
__m128 __B, int __R)
|
||||
{
|
||||
return (__m128) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf) __B,
|
||||
(__v4sf) __A,
|
||||
(__v4sf) __W,
|
||||
__U,
|
||||
__R);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_rsqrt28_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __R)
|
||||
{
|
||||
return (__m128) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf) __B,
|
||||
(__v4sf) __A,
|
||||
(__v4sf)
|
||||
_mm_setzero_ps (),
|
||||
__U,
|
||||
__R);
|
||||
}
|
||||
|
||||
#else
|
||||
#define _mm512_exp2a23_round_pd(A, C) \
|
||||
__builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
|
||||
|
||||
#define _mm512_mask_exp2a23_round_pd(W, U, A, C) \
|
||||
__builtin_ia32_exp2pd_mask(A, W, U, C)
|
||||
|
||||
#define _mm512_maskz_exp2a23_round_pd(U, A, C) \
|
||||
__builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
|
||||
|
||||
#define _mm512_exp2a23_round_ps(A, C) \
|
||||
__builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
|
||||
|
||||
#define _mm512_mask_exp2a23_round_ps(W, U, A, C) \
|
||||
__builtin_ia32_exp2ps_mask(A, W, U, C)
|
||||
|
||||
#define _mm512_maskz_exp2a23_round_ps(U, A, C) \
|
||||
__builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
|
||||
|
||||
#define _mm512_rcp28_round_pd(A, C) \
|
||||
__builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
|
||||
|
||||
#define _mm512_mask_rcp28_round_pd(W, U, A, C) \
|
||||
__builtin_ia32_rcp28pd_mask(A, W, U, C)
|
||||
|
||||
#define _mm512_maskz_rcp28_round_pd(U, A, C) \
|
||||
__builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
|
||||
|
||||
#define _mm512_rcp28_round_ps(A, C) \
|
||||
__builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
|
||||
|
||||
#define _mm512_mask_rcp28_round_ps(W, U, A, C) \
|
||||
__builtin_ia32_rcp28ps_mask(A, W, U, C)
|
||||
|
||||
#define _mm512_maskz_rcp28_round_ps(U, A, C) \
|
||||
__builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
|
||||
|
||||
#define _mm512_rsqrt28_round_pd(A, C) \
|
||||
__builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
|
||||
|
||||
#define _mm512_mask_rsqrt28_round_pd(W, U, A, C) \
|
||||
__builtin_ia32_rsqrt28pd_mask(A, W, U, C)
|
||||
|
||||
#define _mm512_maskz_rsqrt28_round_pd(U, A, C) \
|
||||
__builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
|
||||
|
||||
#define _mm512_rsqrt28_round_ps(A, C) \
|
||||
__builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
|
||||
|
||||
#define _mm512_mask_rsqrt28_round_ps(W, U, A, C) \
|
||||
__builtin_ia32_rsqrt28ps_mask(A, W, U, C)
|
||||
|
||||
#define _mm512_maskz_rsqrt28_round_ps(U, A, C) \
|
||||
__builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
|
||||
|
||||
#define _mm_rcp28_round_sd(A, B, R) \
|
||||
__builtin_ia32_rcp28sd_round(A, B, R)
|
||||
|
||||
#define _mm_mask_rcp28_round_sd(W, U, A, B, R) \
|
||||
__builtin_ia32_rcp28sd_mask_round ((A), (B), (W), (U), (R))
|
||||
|
||||
#define _mm_maskz_rcp28_round_sd(U, A, B, R) \
|
||||
__builtin_ia32_rcp28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (), \
|
||||
(U), (R))
|
||||
|
||||
#define _mm_rcp28_round_ss(A, B, R) \
|
||||
__builtin_ia32_rcp28ss_round(A, B, R)
|
||||
|
||||
#define _mm_mask_rcp28_round_ss(W, U, A, B, R) \
|
||||
__builtin_ia32_rcp28ss_mask_round ((A), (B), (W), (U), (R))
|
||||
|
||||
#define _mm_maskz_rcp28_round_ss(U, A, B, R) \
|
||||
__builtin_ia32_rcp28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (), \
|
||||
(U), (R))
|
||||
|
||||
#define _mm_rsqrt28_round_sd(A, B, R) \
|
||||
__builtin_ia32_rsqrt28sd_round(A, B, R)
|
||||
|
||||
#define _mm_mask_rsqrt28_round_sd(W, U, A, B, R) \
|
||||
__builtin_ia32_rsqrt28sd_mask_round ((A), (B), (W), (U), (R))
|
||||
|
||||
#define _mm_maskz_rsqrt28_round_sd(U, A, B, R) \
|
||||
__builtin_ia32_rsqrt28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (),\
|
||||
(U), (R))
|
||||
|
||||
#define _mm_rsqrt28_round_ss(A, B, R) \
|
||||
__builtin_ia32_rsqrt28ss_round(A, B, R)
|
||||
|
||||
#define _mm_mask_rsqrt28_round_ss(W, U, A, B, R) \
|
||||
__builtin_ia32_rsqrt28ss_mask_round ((A), (B), (W), (U), (R))
|
||||
|
||||
#define _mm_maskz_rsqrt28_round_ss(U, A, B, R) \
|
||||
__builtin_ia32_rsqrt28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (),\
|
||||
(U), (R))
|
||||
|
||||
#endif
|
||||
|
||||
#define _mm_mask_rcp28_sd(W, U, A, B)\
|
||||
_mm_mask_rcp28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_maskz_rcp28_sd(U, A, B)\
|
||||
_mm_maskz_rcp28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_mask_rcp28_ss(W, U, A, B)\
|
||||
_mm_mask_rcp28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_maskz_rcp28_ss(U, A, B)\
|
||||
_mm_maskz_rcp28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_mask_rsqrt28_sd(W, U, A, B)\
|
||||
_mm_mask_rsqrt28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_maskz_rsqrt28_sd(U, A, B)\
|
||||
_mm_maskz_rsqrt28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_mask_rsqrt28_ss(W, U, A, B)\
|
||||
_mm_mask_rsqrt28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_maskz_rsqrt28_ss(U, A, B)\
|
||||
_mm_maskz_rsqrt28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_exp2a23_pd(A) \
|
||||
_mm512_exp2a23_round_pd(A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_exp2a23_pd(W, U, A) \
|
||||
_mm512_mask_exp2a23_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_maskz_exp2a23_pd(U, A) \
|
||||
_mm512_maskz_exp2a23_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_exp2a23_ps(A) \
|
||||
_mm512_exp2a23_round_ps(A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_exp2a23_ps(W, U, A) \
|
||||
_mm512_mask_exp2a23_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_maskz_exp2a23_ps(U, A) \
|
||||
_mm512_maskz_exp2a23_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_rcp28_pd(A) \
|
||||
_mm512_rcp28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_rcp28_pd(W, U, A) \
|
||||
_mm512_mask_rcp28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_maskz_rcp28_pd(U, A) \
|
||||
_mm512_maskz_rcp28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_rcp28_ps(A) \
|
||||
_mm512_rcp28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_rcp28_ps(W, U, A) \
|
||||
_mm512_mask_rcp28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_maskz_rcp28_ps(U, A) \
|
||||
_mm512_maskz_rcp28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_rsqrt28_pd(A) \
|
||||
_mm512_rsqrt28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_rsqrt28_pd(W, U, A) \
|
||||
_mm512_mask_rsqrt28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_maskz_rsqrt28_pd(U, A) \
|
||||
_mm512_maskz_rsqrt28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_rsqrt28_ps(A) \
|
||||
_mm512_rsqrt28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_rsqrt28_ps(W, U, A) \
|
||||
_mm512_mask_rsqrt28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_maskz_rsqrt28_ps(U, A) \
|
||||
_mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_rcp28_sd(A, B) \
|
||||
__builtin_ia32_rcp28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_rcp28_ss(A, B) \
|
||||
__builtin_ia32_rcp28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_rsqrt28_sd(A, B) \
|
||||
__builtin_ia32_rsqrt28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_rsqrt28_ss(A, B) \
|
||||
__builtin_ia32_rsqrt28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#ifdef __DISABLE_AVX512ER__
|
||||
#undef __DISABLE_AVX512ER__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512ER__ */
|
||||
|
||||
#endif /* _AVX512ERINTRIN_H_INCLUDED */
|
||||
#error "AVX512ER support has been removed since GCC 15."
|
||||
|
|
|
@ -21,249 +21,5 @@
|
|||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512pfintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
#error "AVX512PF support has been removed since GCC 15."
|
||||
|
||||
#ifndef _AVX512PFINTRIN_H_INCLUDED
|
||||
#define _AVX512PFINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX512PF__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512pf,evex512")
|
||||
#define __DISABLE_AVX512PF__
|
||||
#endif /* __AVX512PF__ */
|
||||
|
||||
/* Internal data types for implementing the intrinsics. */
|
||||
typedef long long __v8di __attribute__ ((__vector_size__ (64)));
|
||||
typedef int __v16si __attribute__ ((__vector_size__ (64)));
|
||||
|
||||
/* The Intel API is flexible enough that we must allow aliasing with other
|
||||
vector types, and their scalar components. */
|
||||
typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
|
||||
|
||||
typedef unsigned char __mmask8;
|
||||
typedef unsigned short __mmask16;
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i32gather_pd (__m256i __index, void const *__addr,
|
||||
int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i32gather_ps (__m512i __index, void const *__addr,
|
||||
int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfdps ((__mmask16) 0xFFFF, (__v16si) __index, __addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i32gather_pd (__m256i __index, __mmask8 __mask,
|
||||
void const *__addr, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfdpd (__mask, (__v8si) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i32gather_ps (__m512i __index, __mmask16 __mask,
|
||||
void const *__addr, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfdps (__mask, (__v16si) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i64gather_pd (__m512i __index, void const *__addr,
|
||||
int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfqpd ((__mmask8) 0xFF, (__v8di) __index, __addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i64gather_ps (__m512i __index, void const *__addr,
|
||||
int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfqps ((__mmask8) 0xFF, (__v8di) __index, __addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i64gather_pd (__m512i __index, __mmask8 __mask,
|
||||
void const *__addr, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfqpd (__mask, (__v8di) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i64gather_ps (__m512i __index, __mmask8 __mask,
|
||||
void const *__addr, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfqps (__mask, (__v8di) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i32scatter_pd (void *__addr, __m256i __index, int __scale,
|
||||
int __hint)
|
||||
{
|
||||
__builtin_ia32_scatterpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i32scatter_ps (void *__addr, __m512i __index, int __scale,
|
||||
int __hint)
|
||||
{
|
||||
__builtin_ia32_scatterpfdps ((__mmask16) 0xFFFF, (__v16si) __index, __addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i32scatter_pd (void *__addr, __mmask8 __mask,
|
||||
__m256i __index, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_scatterpfdpd (__mask, (__v8si) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i32scatter_ps (void *__addr, __mmask16 __mask,
|
||||
__m512i __index, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_scatterpfdps (__mask, (__v16si) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i64scatter_pd (void *__addr, __m512i __index, int __scale,
|
||||
int __hint)
|
||||
{
|
||||
__builtin_ia32_scatterpfqpd ((__mmask8) 0xFF, (__v8di) __index,__addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i64scatter_ps (void *__addr, __m512i __index, int __scale,
|
||||
int __hint)
|
||||
{
|
||||
__builtin_ia32_scatterpfqps ((__mmask8) 0xFF, (__v8di) __index, __addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i64scatter_pd (void *__addr, __mmask8 __mask,
|
||||
__m512i __index, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_scatterpfqpd (__mask, (__v8di) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i64scatter_ps (void *__addr, __mmask8 __mask,
|
||||
__m512i __index, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_scatterpfqps (__mask, (__v8di) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
#else
|
||||
#define _mm512_prefetch_i32gather_pd(INDEX, ADDR, SCALE, HINT) \
|
||||
__builtin_ia32_gatherpfdpd ((__mmask8)0xFF, (__v8si)(__m256i) (INDEX), \
|
||||
(void const *) (ADDR), (int) (SCALE), \
|
||||
(int) (HINT))
|
||||
|
||||
#define _mm512_prefetch_i32gather_ps(INDEX, ADDR, SCALE, HINT) \
|
||||
__builtin_ia32_gatherpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i) (INDEX), \
|
||||
(void const *) (ADDR), (int) (SCALE), \
|
||||
(int) (HINT))
|
||||
|
||||
#define _mm512_mask_prefetch_i32gather_pd(INDEX, MASK, ADDR, SCALE, HINT) \
|
||||
__builtin_ia32_gatherpfdpd ((__mmask8) (MASK), (__v8si)(__m256i) (INDEX), \
|
||||
(void const *) (ADDR), (int) (SCALE), \
|
||||
(int) (HINT))
|
||||
|
||||
#define _mm512_mask_prefetch_i32gather_ps(INDEX, MASK, ADDR, SCALE, HINT) \
|
||||
__builtin_ia32_gatherpfdps ((__mmask16) (MASK), (__v16si)(__m512i) (INDEX),\
|
||||
(void const *) (ADDR), (int) (SCALE), \
|
||||
(int) (HINT))
|
||||
|
||||
#define _mm512_prefetch_i64gather_pd(INDEX, ADDR, SCALE, HINT) \
|
||||
__builtin_ia32_gatherpfqpd ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX), \
|
||||
(void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
|
||||
#define _mm512_prefetch_i64gather_ps(INDEX, ADDR, SCALE, HINT) \
|
||||
__builtin_ia32_gatherpfqps ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX), \
|
||||
(void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
|
||||
#define _mm512_mask_prefetch_i64gather_pd(INDEX, MASK, ADDR, SCALE, HINT) \
|
||||
__builtin_ia32_gatherpfqpd ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), \
|
||||
(void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
|
||||
#define _mm512_mask_prefetch_i64gather_ps(INDEX, MASK, ADDR, SCALE, HINT) \
|
||||
__builtin_ia32_gatherpfqps ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), \
|
||||
(void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
|
||||
#define _mm512_prefetch_i32scatter_pd(ADDR, INDEX, SCALE, HINT) \
|
||||
__builtin_ia32_scatterpfdpd ((__mmask8)0xFF, (__v8si)(__m256i) (INDEX), \
|
||||
(void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
|
||||
#define _mm512_prefetch_i32scatter_ps(ADDR, INDEX, SCALE, HINT) \
|
||||
__builtin_ia32_scatterpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i) (INDEX),\
|
||||
(void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
|
||||
#define _mm512_mask_prefetch_i32scatter_pd(ADDR, MASK, INDEX, SCALE, HINT) \
|
||||
__builtin_ia32_scatterpfdpd ((__mmask8) (MASK), (__v8si)(__m256i) (INDEX), \
|
||||
(void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
|
||||
#define _mm512_mask_prefetch_i32scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) \
|
||||
__builtin_ia32_scatterpfdps ((__mmask16) (MASK), \
|
||||
(__v16si)(__m512i) (INDEX), \
|
||||
(void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
|
||||
#define _mm512_prefetch_i64scatter_pd(ADDR, INDEX, SCALE, HINT) \
|
||||
__builtin_ia32_scatterpfqpd ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX), \
|
||||
(void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
|
||||
#define _mm512_prefetch_i64scatter_ps(ADDR, INDEX, SCALE, HINT) \
|
||||
__builtin_ia32_scatterpfqps ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX), \
|
||||
(void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
|
||||
#define _mm512_mask_prefetch_i64scatter_pd(ADDR, MASK, INDEX, SCALE, HINT) \
|
||||
__builtin_ia32_scatterpfqpd ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), \
|
||||
(void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
|
||||
#define _mm512_mask_prefetch_i64scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) \
|
||||
__builtin_ia32_scatterpfqps ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), \
|
||||
(void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_AVX512PF__
|
||||
#undef __DISABLE_AVX512PF__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512PF__ */
|
||||
|
||||
#endif /* _AVX512PFINTRIN_H_INCLUDED */
|
||||
|
|
|
@ -88,15 +88,12 @@
|
|||
#define bit_AVX512IFMA (1 << 21)
|
||||
#define bit_CLFLUSHOPT (1 << 23)
|
||||
#define bit_CLWB (1 << 24)
|
||||
#define bit_AVX512PF (1 << 26)
|
||||
#define bit_AVX512ER (1 << 27)
|
||||
#define bit_AVX512CD (1 << 28)
|
||||
#define bit_SHA (1 << 29)
|
||||
#define bit_AVX512BW (1 << 30)
|
||||
#define bit_AVX512VL (1u << 31)
|
||||
|
||||
/* %ecx */
|
||||
#define bit_PREFETCHWT1 (1 << 0)
|
||||
#define bit_AVX512VBMI (1 << 1)
|
||||
#define bit_PKU (1 << 3)
|
||||
#define bit_OSPKE (1 << 4)
|
||||
|
@ -117,8 +114,6 @@
|
|||
#define bit_ENQCMD (1 << 29)
|
||||
|
||||
/* %edx */
|
||||
#define bit_AVX5124VNNIW (1 << 2)
|
||||
#define bit_AVX5124FMAPS (1 << 3)
|
||||
#define bit_UINTR (1 << 5)
|
||||
#define bit_AVX512VP2INTERSECT (1 << 8)
|
||||
#define bit_SERIALIZE (1 << 14)
|
||||
|
|
|
@ -23,6 +23,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
#include "system.h"
|
||||
#include "coretypes.h"
|
||||
#include "tm.h"
|
||||
#include "diagnostic.h"
|
||||
|
||||
const char *host_detect_local_cpu (int argc, const char **argv);
|
||||
|
||||
|
@ -646,12 +647,13 @@ const char *host_detect_local_cpu (int argc, const char **argv)
|
|||
/* Assume Cannon Lake. */
|
||||
else if (has_feature (FEATURE_AVX512VBMI))
|
||||
cpu = "cannonlake";
|
||||
/* Assume Knights Mill. */
|
||||
else if (has_feature (FEATURE_AVX5124VNNIW))
|
||||
cpu = "knm";
|
||||
/* Assume Knights Landing. */
|
||||
else if (has_feature (FEATURE_AVX512ER))
|
||||
cpu = "knl";
|
||||
/* Assume Xeon Phi Processors. Support has been removed
|
||||
since GCC 15. */
|
||||
else if (!has_feature (FEATURE_AVX512VL))
|
||||
error ("Xeon Phi ISA support has been removed since "
|
||||
"GCC 15, use GCC 14 for the Xeon Phi ISAs or "
|
||||
"%<-march=broadwell%> for all the other ISAs "
|
||||
"supported on this machine.");
|
||||
/* Assume Skylake with AVX-512. */
|
||||
else
|
||||
cpu = "skylake-avx512";
|
||||
|
@ -901,11 +903,6 @@ const char *host_detect_local_cpu (int argc, const char **argv)
|
|||
avoid unnecessary warnings when building librarys. */
|
||||
else if (isa_names_table[i].feature != FEATURE_AVX10_1_256
|
||||
&& isa_names_table[i].feature != FEATURE_AVX10_1_512
|
||||
&& isa_names_table[i].feature != FEATURE_AVX512PF
|
||||
&& isa_names_table[i].feature != FEATURE_AVX512ER
|
||||
&& isa_names_table[i].feature != FEATURE_AVX5124FMAPS
|
||||
&& isa_names_table[i].feature != FEATURE_AVX5124VNNIW
|
||||
&& isa_names_table[i].feature != FEATURE_PREFETCHWT1
|
||||
&& check_avx512_features (cpu_model, cpu_features2,
|
||||
isa_names_table[i].feature))
|
||||
options = concat (options, neg_option,
|
||||
|
|
|
@ -564,15 +564,6 @@ DEF_FUNCTION_TYPE (VOID, UNSIGNED, UNSIGNED)
|
|||
DEF_FUNCTION_TYPE (VOID, UNSIGNED, UNSIGNED, UNSIGNED)
|
||||
DEF_FUNCTION_TYPE (VOID, PV8DI, V8DI)
|
||||
|
||||
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SF, V16SF, V16SF, PCV4SF, V16SF, UHI)
|
||||
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SF, V16SF, V16SF, PCV4SF)
|
||||
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF, V4SF, V4SF, PCV4SF)
|
||||
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF, V4SF, V4SF, PCV4SF, V4SF, UQI)
|
||||
|
||||
DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI, V16SI, V16SI, PCV4SI, V16SI, UHI)
|
||||
DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI, V16SI, V16SI, PCV4SI)
|
||||
|
||||
|
||||
# Instructions returning mask
|
||||
DEF_FUNCTION_TYPE (UCHAR, UQI, UQI, PUCHAR)
|
||||
DEF_FUNCTION_TYPE (UCHAR, UQI, UQI)
|
||||
|
@ -1147,9 +1138,6 @@ DEF_FUNCTION_TYPE (VOID, PVOID, QI, V4DI, V4SI, INT)
|
|||
DEF_FUNCTION_TYPE (VOID, PVOID, QI, V2DI, V4SI, INT)
|
||||
DEF_FUNCTION_TYPE (VOID, PVOID, QI, V4DI, V4DI, INT)
|
||||
DEF_FUNCTION_TYPE (VOID, PVOID, QI, V2DI, V2DI, INT)
|
||||
DEF_FUNCTION_TYPE (VOID, QI, V8SI, PCVOID, INT, INT)
|
||||
DEF_FUNCTION_TYPE (VOID, HI, V16SI, PCVOID, INT, INT)
|
||||
DEF_FUNCTION_TYPE (VOID, QI, V8DI, PCVOID, INT, INT)
|
||||
DEF_FUNCTION_TYPE (VOID, PUDI, V8HI, UQI)
|
||||
DEF_FUNCTION_TYPE (VOID, PV16QI, V16HI, UHI)
|
||||
|
||||
|
|
|
@ -1571,7 +1571,6 @@ BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_copysignv16sf
|
|||
BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF)
|
||||
BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF)
|
||||
BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF)
|
||||
BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_floorph512", IX86_BUILTIN_FLOORPH512, (enum rtx_code) ROUND_FLOOR, (int) V32HF_FTYPE_V32HF_ROUND)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_ceilph512", IX86_BUILTIN_CEILPH512, (enum rtx_code) ROUND_CEIL, (int) V32HF_FTYPE_V32HF_ROUND)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_truncph512", IX86_BUILTIN_TRUNCPH512, (enum rtx_code) ROUND_TRUNC, (int) V32HF_FTYPE_V32HF_ROUND)
|
||||
|
@ -2813,20 +2812,6 @@ BDESC (OPTION_MASK_ISA_AVX512BITALG, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512vl
|
|||
BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vpshufbitqmbv32qi_mask, "__builtin_ia32_vpshufbitqmb256_mask", IX86_BUILTIN_VPSHUFBITQMB256_MASK, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI)
|
||||
BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vpshufbitqmbv16qi_mask, "__builtin_ia32_vpshufbitqmb128_mask", IX86_BUILTIN_VPSHUFBITQMB128_MASK, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI)
|
||||
|
||||
/* AVX512_4FMAPS and AVX512_4VNNIW builtins with variable number of arguments. Defined in additional ix86_isa_flags2. */
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX5124FMAPS, CODE_FOR_avx5124fmaddps_4fmaddps_mask, "__builtin_ia32_4fmaddps_mask", IX86_BUILTIN_4FMAPS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_V16SF_V16SF_PCV4SF_V16SF_UHI)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX5124FMAPS, CODE_FOR_avx5124fmaddps_4fmaddps, "__builtin_ia32_4fmaddps", IX86_BUILTIN_4FMAPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_V16SF_V16SF_PCV4SF)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX5124FMAPS, CODE_FOR_avx5124fmaddps_4fmaddss, "__builtin_ia32_4fmaddss", IX86_BUILTIN_4FMASS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_V4SF_V4SF_PCV4SF)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX5124FMAPS, CODE_FOR_avx5124fmaddps_4fmaddss_mask, "__builtin_ia32_4fmaddss_mask", IX86_BUILTIN_4FMASS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_V4SF_V4SF_PCV4SF_V4SF_UQI)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX5124FMAPS, CODE_FOR_avx5124fmaddps_4fnmaddps_mask, "__builtin_ia32_4fnmaddps_mask", IX86_BUILTIN_4FNMAPS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_V16SF_V16SF_PCV4SF_V16SF_UHI)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX5124FMAPS, CODE_FOR_avx5124fmaddps_4fnmaddps, "__builtin_ia32_4fnmaddps", IX86_BUILTIN_4FNMAPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_V16SF_V16SF_PCV4SF)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX5124FMAPS, CODE_FOR_avx5124fmaddps_4fnmaddss, "__builtin_ia32_4fnmaddss", IX86_BUILTIN_4FNMASS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_V4SF_V4SF_PCV4SF)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX5124FMAPS, CODE_FOR_avx5124fmaddps_4fnmaddss_mask, "__builtin_ia32_4fnmaddss_mask", IX86_BUILTIN_4FNMASS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_V4SF_V4SF_PCV4SF_V4SF_UQI)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX5124VNNIW, CODE_FOR_avx5124vnniw_vp4dpwssd, "__builtin_ia32_vp4dpwssd", IX86_BUILTIN_4DPWSSD, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_V16SI_V16SI_PCV4SI)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX5124VNNIW, CODE_FOR_avx5124vnniw_vp4dpwssd_mask, "__builtin_ia32_vp4dpwssd_mask", IX86_BUILTIN_4DPWSSD_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_V16SI_V16SI_PCV4SI_V16SI_UHI)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX5124VNNIW, CODE_FOR_avx5124vnniw_vp4dpwssds, "__builtin_ia32_vp4dpwssds", IX86_BUILTIN_4DPWSSDS, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_V16SI_V16SI_PCV4SI)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVX5124VNNIW, CODE_FOR_avx5124vnniw_vp4dpwssds_mask, "__builtin_ia32_vp4dpwssds_mask", IX86_BUILTIN_4DPWSSDS_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_V16SI_V16SI_PCV4SI_V16SI_UHI)
|
||||
|
||||
/* RDPID. */
|
||||
BDESC (0, OPTION_MASK_ISA2_RDPID, CODE_FOR_rdpid, "__builtin_ia32_rdpid", IX86_BUILTIN_RDPID, UNKNOWN, (int) UNSIGNED_FTYPE_VOID)
|
||||
|
||||
|
@ -3194,22 +3179,6 @@ BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsu
|
|||
BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
|
||||
BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v16sf_maskz_round, "__builtin_ia32_vfnmsubps512_maskz", IX86_BUILTIN_VFNMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
|
||||
|
||||
/* AVX512ER */
|
||||
BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT)
|
||||
BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT)
|
||||
BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT)
|
||||
BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT)
|
||||
BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
|
||||
BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_vmrcp28v2df_mask_round, "__builtin_ia32_rcp28sd_mask_round", IX86_BUILTIN_RCP28SD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
|
||||
BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
|
||||
BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_vmrcp28v4sf_mask_round, "__builtin_ia32_rcp28ss_mask_round", IX86_BUILTIN_RCP28SS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
|
||||
BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT)
|
||||
BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT)
|
||||
BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
|
||||
BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_vmrsqrt28v2df_mask_round, "__builtin_ia32_rsqrt28sd_mask_round", IX86_BUILTIN_RSQRT28SD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
|
||||
BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
|
||||
BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_vmrsqrt28v4sf_mask_round, "__builtin_ia32_rsqrt28ss_mask_round", IX86_BUILTIN_RSQRT28SS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
|
||||
|
||||
/* AVX512DQ. */
|
||||
BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_reducepv8df_mask_round, "__builtin_ia32_reducepd512_mask_round", IX86_BUILTIN_REDUCEPD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI_INT)
|
||||
BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_reducepv16sf_mask_round, "__builtin_ia32_reduceps512_mask_round", IX86_BUILTIN_REDUCEPS512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI_INT)
|
||||
|
|
|
@ -1095,32 +1095,6 @@ ix86_init_mmx_sse_builtins (void)
|
|||
VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
|
||||
IX86_BUILTIN_SCATTERALTDIV4SI);
|
||||
|
||||
/* AVX512PF */
|
||||
def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_gatherpfdpd",
|
||||
VOID_FTYPE_QI_V8SI_PCVOID_INT_INT,
|
||||
IX86_BUILTIN_GATHERPFDPD);
|
||||
def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_gatherpfdps",
|
||||
VOID_FTYPE_HI_V16SI_PCVOID_INT_INT,
|
||||
IX86_BUILTIN_GATHERPFDPS);
|
||||
def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_gatherpfqpd",
|
||||
VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
|
||||
IX86_BUILTIN_GATHERPFQPD);
|
||||
def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_gatherpfqps",
|
||||
VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
|
||||
IX86_BUILTIN_GATHERPFQPS);
|
||||
def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_scatterpfdpd",
|
||||
VOID_FTYPE_QI_V8SI_PCVOID_INT_INT,
|
||||
IX86_BUILTIN_SCATTERPFDPD);
|
||||
def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_scatterpfdps",
|
||||
VOID_FTYPE_HI_V16SI_PCVOID_INT_INT,
|
||||
IX86_BUILTIN_SCATTERPFDPS);
|
||||
def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_scatterpfqpd",
|
||||
VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
|
||||
IX86_BUILTIN_SCATTERPFQPD);
|
||||
def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_scatterpfqps",
|
||||
VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
|
||||
IX86_BUILTIN_SCATTERPFQPS);
|
||||
|
||||
/* SHA */
|
||||
def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha1msg1",
|
||||
V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
|
||||
|
|
|
@ -222,13 +222,6 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
|
|||
def_or_undef (parse_in, "__clearwaterforest");
|
||||
def_or_undef (parse_in, "__clearwaterforest__");
|
||||
break;
|
||||
case PROCESSOR_KNL:
|
||||
def_or_undef (parse_in, "__knl");
|
||||
def_or_undef (parse_in, "__knl__");
|
||||
break;
|
||||
case PROCESSOR_KNM:
|
||||
def_or_undef (parse_in, "__knm");
|
||||
def_or_undef (parse_in, "__knm__");
|
||||
break;
|
||||
case PROCESSOR_SKYLAKE:
|
||||
def_or_undef (parse_in, "__skylake");
|
||||
|
@ -440,12 +433,6 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
|
|||
case PROCESSOR_CLEARWATERFOREST:
|
||||
def_or_undef (parse_in, "__tune_clearwaterforest__");
|
||||
break;
|
||||
case PROCESSOR_KNL:
|
||||
def_or_undef (parse_in, "__tune_knl__");
|
||||
break;
|
||||
case PROCESSOR_KNM:
|
||||
def_or_undef (parse_in, "__tune_knm__");
|
||||
break;
|
||||
case PROCESSOR_SKYLAKE:
|
||||
def_or_undef (parse_in, "__tune_skylake__");
|
||||
break;
|
||||
|
@ -563,12 +550,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
|
|||
def_or_undef (parse_in, "__AVX2__");
|
||||
if (isa_flag & OPTION_MASK_ISA_AVX512F)
|
||||
def_or_undef (parse_in, "__AVX512F__");
|
||||
if (isa_flag & OPTION_MASK_ISA_AVX512ER)
|
||||
def_or_undef (parse_in, "__AVX512ER__");
|
||||
if (isa_flag & OPTION_MASK_ISA_AVX512CD)
|
||||
def_or_undef (parse_in, "__AVX512CD__");
|
||||
if (isa_flag & OPTION_MASK_ISA_AVX512PF)
|
||||
def_or_undef (parse_in, "__AVX512PF__");
|
||||
if (isa_flag & OPTION_MASK_ISA_AVX512DQ)
|
||||
def_or_undef (parse_in, "__AVX512DQ__");
|
||||
if (isa_flag & OPTION_MASK_ISA_AVX512BW)
|
||||
|
@ -582,8 +565,6 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
|
|||
def_or_undef (parse_in, "__AVX512VBMI__");
|
||||
if (isa_flag & OPTION_MASK_ISA_AVX512IFMA)
|
||||
def_or_undef (parse_in, "__AVX512IFMA__");
|
||||
if (isa_flag2 & OPTION_MASK_ISA2_AVX5124VNNIW)
|
||||
def_or_undef (parse_in, "__AVX5124VNNIW__");
|
||||
if (isa_flag & OPTION_MASK_ISA_AVX512VBMI2)
|
||||
def_or_undef (parse_in, "__AVX512VBMI2__");
|
||||
if (isa_flag & OPTION_MASK_ISA_AVX512VNNI)
|
||||
|
@ -592,8 +573,6 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
|
|||
def_or_undef (parse_in, "__PCONFIG__");
|
||||
if (isa_flag2 & OPTION_MASK_ISA2_SGX)
|
||||
def_or_undef (parse_in, "__SGX__");
|
||||
if (isa_flag2 & OPTION_MASK_ISA2_AVX5124FMAPS)
|
||||
def_or_undef (parse_in, "__AVX5124FMAPS__");
|
||||
if (isa_flag & OPTION_MASK_ISA_AVX512BITALG)
|
||||
def_or_undef (parse_in, "__AVX512BITALG__");
|
||||
if (isa_flag & OPTION_MASK_ISA_AVX512VPOPCNTDQ)
|
||||
|
@ -642,8 +621,6 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
|
|||
def_or_undef (parse_in, "__XSAVE__");
|
||||
if (isa_flag & OPTION_MASK_ISA_XSAVEOPT)
|
||||
def_or_undef (parse_in, "__XSAVEOPT__");
|
||||
if (isa_flag & OPTION_MASK_ISA_PREFETCHWT1)
|
||||
def_or_undef (parse_in, "__PREFETCHWT1__");
|
||||
if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE))
|
||||
def_or_undef (parse_in, "__SSE_MATH__");
|
||||
if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE2))
|
||||
|
|
|
@ -13648,7 +13648,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
|
|||
}
|
||||
|
||||
if (TARGET_3DNOW || TARGET_PREFETCH_SSE
|
||||
|| TARGET_PRFCHW || TARGET_PREFETCHWT1)
|
||||
|| TARGET_PRFCHW)
|
||||
emit_insn (gen_prefetch (op0, op1, op2));
|
||||
else if (!MEM_P (op0) && side_effects_p (op0))
|
||||
/* Don't do anything with direct references to volatile memory,
|
||||
|
@ -14836,9 +14836,6 @@ rdseed_step:
|
|||
case IX86_BUILTIN_SCATTERDIV2DI:
|
||||
icode = CODE_FOR_avx512vl_scatterdiv2di;
|
||||
goto scatter_gen;
|
||||
case IX86_BUILTIN_GATHERPFDPD:
|
||||
icode = CODE_FOR_avx512pf_gatherpfv8sidf;
|
||||
goto vec_prefetch_gen;
|
||||
case IX86_BUILTIN_SCATTERALTSIV8DF:
|
||||
icode = CODE_FOR_avx512f_scattersiv8df;
|
||||
goto scatter_gen;
|
||||
|
@ -14875,27 +14872,6 @@ rdseed_step:
|
|||
case IX86_BUILTIN_SCATTERALTDIV4SI:
|
||||
icode = CODE_FOR_avx512vl_scatterdiv4si;
|
||||
goto scatter_gen;
|
||||
case IX86_BUILTIN_GATHERPFDPS:
|
||||
icode = CODE_FOR_avx512pf_gatherpfv16sisf;
|
||||
goto vec_prefetch_gen;
|
||||
case IX86_BUILTIN_GATHERPFQPD:
|
||||
icode = CODE_FOR_avx512pf_gatherpfv8didf;
|
||||
goto vec_prefetch_gen;
|
||||
case IX86_BUILTIN_GATHERPFQPS:
|
||||
icode = CODE_FOR_avx512pf_gatherpfv8disf;
|
||||
goto vec_prefetch_gen;
|
||||
case IX86_BUILTIN_SCATTERPFDPD:
|
||||
icode = CODE_FOR_avx512pf_scatterpfv8sidf;
|
||||
goto vec_prefetch_gen;
|
||||
case IX86_BUILTIN_SCATTERPFDPS:
|
||||
icode = CODE_FOR_avx512pf_scatterpfv16sisf;
|
||||
goto vec_prefetch_gen;
|
||||
case IX86_BUILTIN_SCATTERPFQPD:
|
||||
icode = CODE_FOR_avx512pf_scatterpfv8didf;
|
||||
goto vec_prefetch_gen;
|
||||
case IX86_BUILTIN_SCATTERPFQPS:
|
||||
icode = CODE_FOR_avx512pf_scatterpfv8disf;
|
||||
goto vec_prefetch_gen;
|
||||
|
||||
gather_gen:
|
||||
rtx half;
|
||||
|
@ -15233,66 +15209,6 @@ rdseed_step:
|
|||
emit_insn (pat);
|
||||
return 0;
|
||||
|
||||
vec_prefetch_gen:
|
||||
arg0 = CALL_EXPR_ARG (exp, 0);
|
||||
arg1 = CALL_EXPR_ARG (exp, 1);
|
||||
arg2 = CALL_EXPR_ARG (exp, 2);
|
||||
arg3 = CALL_EXPR_ARG (exp, 3);
|
||||
arg4 = CALL_EXPR_ARG (exp, 4);
|
||||
op0 = expand_normal (arg0);
|
||||
op1 = expand_normal (arg1);
|
||||
op2 = expand_normal (arg2);
|
||||
op3 = expand_normal (arg3);
|
||||
op4 = expand_normal (arg4);
|
||||
mode0 = insn_data[icode].operand[0].mode;
|
||||
mode1 = insn_data[icode].operand[1].mode;
|
||||
mode3 = insn_data[icode].operand[3].mode;
|
||||
mode4 = insn_data[icode].operand[4].mode;
|
||||
|
||||
op0 = fixup_modeless_constant (op0, mode0);
|
||||
|
||||
if (GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
|
||||
{
|
||||
if (!insn_data[icode].operand[0].predicate (op0, mode0))
|
||||
op0 = copy_to_mode_reg (mode0, op0);
|
||||
}
|
||||
else
|
||||
{
|
||||
op0 = copy_to_reg (op0);
|
||||
op0 = lowpart_subreg (mode0, op0, GET_MODE (op0));
|
||||
}
|
||||
|
||||
if (!insn_data[icode].operand[1].predicate (op1, mode1))
|
||||
op1 = copy_to_mode_reg (mode1, op1);
|
||||
|
||||
/* Force memory operand only with base register here. But we
|
||||
don't want to do it on memory operand for other builtin
|
||||
functions. */
|
||||
op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
|
||||
|
||||
if (!insn_data[icode].operand[2].predicate (op2, Pmode))
|
||||
op2 = copy_to_mode_reg (Pmode, op2);
|
||||
|
||||
if (!insn_data[icode].operand[3].predicate (op3, mode3))
|
||||
{
|
||||
error ("the forth argument must be scale 1, 2, 4, 8");
|
||||
return const0_rtx;
|
||||
}
|
||||
|
||||
if (!insn_data[icode].operand[4].predicate (op4, mode4))
|
||||
{
|
||||
error ("incorrect hint operand");
|
||||
return const0_rtx;
|
||||
}
|
||||
|
||||
pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
|
||||
if (! pat)
|
||||
return const0_rtx;
|
||||
|
||||
emit_insn (pat);
|
||||
|
||||
return 0;
|
||||
|
||||
case IX86_BUILTIN_XABORT:
|
||||
icode = CODE_FOR_xabort;
|
||||
arg0 = CALL_EXPR_ARG (exp, 0);
|
||||
|
@ -15410,229 +15326,9 @@ rdseed_step:
|
|||
&& fcode <= IX86_BUILTIN__BDESC_ARGS_LAST)
|
||||
{
|
||||
i = fcode - IX86_BUILTIN__BDESC_ARGS_FIRST;
|
||||
rtx (*fcn) (rtx, rtx, rtx, rtx) = NULL;
|
||||
rtx (*fcn_mask) (rtx, rtx, rtx, rtx, rtx);
|
||||
rtx (*fcn_maskz) (rtx, rtx, rtx, rtx, rtx, rtx);
|
||||
int masked = 1;
|
||||
machine_mode mode, wide_mode, nar_mode;
|
||||
|
||||
nar_mode = V4SFmode;
|
||||
mode = V16SFmode;
|
||||
wide_mode = V64SFmode;
|
||||
fcn_mask = gen_avx5124fmaddps_4fmaddps_mask;
|
||||
fcn_maskz = gen_avx5124fmaddps_4fmaddps_maskz;
|
||||
|
||||
switch (fcode)
|
||||
{
|
||||
case IX86_BUILTIN_4FMAPS:
|
||||
fcn = gen_avx5124fmaddps_4fmaddps;
|
||||
masked = 0;
|
||||
goto v4fma_expand;
|
||||
|
||||
case IX86_BUILTIN_4DPWSSD:
|
||||
nar_mode = V4SImode;
|
||||
mode = V16SImode;
|
||||
wide_mode = V64SImode;
|
||||
fcn = gen_avx5124vnniw_vp4dpwssd;
|
||||
masked = 0;
|
||||
goto v4fma_expand;
|
||||
|
||||
case IX86_BUILTIN_4DPWSSDS:
|
||||
nar_mode = V4SImode;
|
||||
mode = V16SImode;
|
||||
wide_mode = V64SImode;
|
||||
fcn = gen_avx5124vnniw_vp4dpwssds;
|
||||
masked = 0;
|
||||
goto v4fma_expand;
|
||||
|
||||
case IX86_BUILTIN_4FNMAPS:
|
||||
fcn = gen_avx5124fmaddps_4fnmaddps;
|
||||
masked = 0;
|
||||
goto v4fma_expand;
|
||||
|
||||
case IX86_BUILTIN_4FNMAPS_MASK:
|
||||
fcn_mask = gen_avx5124fmaddps_4fnmaddps_mask;
|
||||
fcn_maskz = gen_avx5124fmaddps_4fnmaddps_maskz;
|
||||
goto v4fma_expand;
|
||||
|
||||
case IX86_BUILTIN_4DPWSSD_MASK:
|
||||
nar_mode = V4SImode;
|
||||
mode = V16SImode;
|
||||
wide_mode = V64SImode;
|
||||
fcn_mask = gen_avx5124vnniw_vp4dpwssd_mask;
|
||||
fcn_maskz = gen_avx5124vnniw_vp4dpwssd_maskz;
|
||||
goto v4fma_expand;
|
||||
|
||||
case IX86_BUILTIN_4DPWSSDS_MASK:
|
||||
nar_mode = V4SImode;
|
||||
mode = V16SImode;
|
||||
wide_mode = V64SImode;
|
||||
fcn_mask = gen_avx5124vnniw_vp4dpwssds_mask;
|
||||
fcn_maskz = gen_avx5124vnniw_vp4dpwssds_maskz;
|
||||
goto v4fma_expand;
|
||||
|
||||
case IX86_BUILTIN_4FMAPS_MASK:
|
||||
{
|
||||
tree args[4];
|
||||
rtx ops[4];
|
||||
rtx wide_reg;
|
||||
rtx accum;
|
||||
rtx addr;
|
||||
rtx mem;
|
||||
|
||||
v4fma_expand:
|
||||
wide_reg = gen_reg_rtx (wide_mode);
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
args[i] = CALL_EXPR_ARG (exp, i);
|
||||
ops[i] = expand_normal (args[i]);
|
||||
|
||||
emit_move_insn (gen_rtx_SUBREG (mode, wide_reg, i * 64),
|
||||
ops[i]);
|
||||
}
|
||||
|
||||
accum = expand_normal (CALL_EXPR_ARG (exp, 4));
|
||||
accum = force_reg (mode, accum);
|
||||
|
||||
addr = expand_normal (CALL_EXPR_ARG (exp, 5));
|
||||
addr = force_reg (Pmode, addr);
|
||||
|
||||
mem = gen_rtx_MEM (nar_mode, addr);
|
||||
|
||||
target = gen_reg_rtx (mode);
|
||||
|
||||
emit_move_insn (target, accum);
|
||||
|
||||
if (! masked)
|
||||
emit_insn (fcn (target, accum, wide_reg, mem));
|
||||
else
|
||||
{
|
||||
rtx merge, mask;
|
||||
merge = expand_normal (CALL_EXPR_ARG (exp, 6));
|
||||
|
||||
mask = expand_normal (CALL_EXPR_ARG (exp, 7));
|
||||
|
||||
if (CONST_INT_P (mask))
|
||||
mask = fixup_modeless_constant (mask, HImode);
|
||||
|
||||
mask = force_reg (HImode, mask);
|
||||
|
||||
if (GET_MODE (mask) != HImode)
|
||||
mask = gen_rtx_SUBREG (HImode, mask, 0);
|
||||
|
||||
/* If merge is 0 then we're about to emit z-masked variant. */
|
||||
if (const0_operand (merge, mode))
|
||||
emit_insn (fcn_maskz (target, accum, wide_reg, mem, merge, mask));
|
||||
/* If merge is the same as accum then emit merge-masked variant. */
|
||||
else if (CALL_EXPR_ARG (exp, 6) == CALL_EXPR_ARG (exp, 4))
|
||||
{
|
||||
merge = force_reg (mode, merge);
|
||||
emit_insn (fcn_mask (target, wide_reg, mem, merge, mask));
|
||||
}
|
||||
/* Merge with something unknown might happen if we z-mask w/ -O0. */
|
||||
else
|
||||
{
|
||||
target = gen_reg_rtx (mode);
|
||||
emit_move_insn (target, merge);
|
||||
emit_insn (fcn_mask (target, wide_reg, mem, target, mask));
|
||||
}
|
||||
}
|
||||
return target;
|
||||
}
|
||||
|
||||
case IX86_BUILTIN_4FNMASS:
|
||||
fcn = gen_avx5124fmaddps_4fnmaddss;
|
||||
masked = 0;
|
||||
goto s4fma_expand;
|
||||
|
||||
case IX86_BUILTIN_4FMASS:
|
||||
fcn = gen_avx5124fmaddps_4fmaddss;
|
||||
masked = 0;
|
||||
goto s4fma_expand;
|
||||
|
||||
case IX86_BUILTIN_4FNMASS_MASK:
|
||||
fcn_mask = gen_avx5124fmaddps_4fnmaddss_mask;
|
||||
fcn_maskz = gen_avx5124fmaddps_4fnmaddss_maskz;
|
||||
goto s4fma_expand;
|
||||
|
||||
case IX86_BUILTIN_4FMASS_MASK:
|
||||
{
|
||||
tree args[4];
|
||||
rtx ops[4];
|
||||
rtx wide_reg;
|
||||
rtx accum;
|
||||
rtx addr;
|
||||
rtx mem;
|
||||
|
||||
fcn_mask = gen_avx5124fmaddps_4fmaddss_mask;
|
||||
fcn_maskz = gen_avx5124fmaddps_4fmaddss_maskz;
|
||||
|
||||
s4fma_expand:
|
||||
mode = V4SFmode;
|
||||
wide_reg = gen_reg_rtx (V64SFmode);
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
rtx tmp;
|
||||
args[i] = CALL_EXPR_ARG (exp, i);
|
||||
ops[i] = expand_normal (args[i]);
|
||||
|
||||
tmp = gen_reg_rtx (SFmode);
|
||||
emit_move_insn (tmp, gen_rtx_SUBREG (SFmode, ops[i], 0));
|
||||
|
||||
emit_move_insn (gen_rtx_SUBREG (V16SFmode, wide_reg, i * 64),
|
||||
gen_rtx_SUBREG (V16SFmode, tmp, 0));
|
||||
}
|
||||
|
||||
accum = expand_normal (CALL_EXPR_ARG (exp, 4));
|
||||
accum = force_reg (V4SFmode, accum);
|
||||
|
||||
addr = expand_normal (CALL_EXPR_ARG (exp, 5));
|
||||
addr = force_reg (Pmode, addr);
|
||||
|
||||
mem = gen_rtx_MEM (V4SFmode, addr);
|
||||
|
||||
target = gen_reg_rtx (V4SFmode);
|
||||
|
||||
emit_move_insn (target, accum);
|
||||
|
||||
if (! masked)
|
||||
emit_insn (fcn (target, accum, wide_reg, mem));
|
||||
else
|
||||
{
|
||||
rtx merge, mask;
|
||||
merge = expand_normal (CALL_EXPR_ARG (exp, 6));
|
||||
|
||||
mask = expand_normal (CALL_EXPR_ARG (exp, 7));
|
||||
|
||||
if (CONST_INT_P (mask))
|
||||
mask = fixup_modeless_constant (mask, QImode);
|
||||
|
||||
mask = force_reg (QImode, mask);
|
||||
|
||||
if (GET_MODE (mask) != QImode)
|
||||
mask = gen_rtx_SUBREG (QImode, mask, 0);
|
||||
|
||||
/* If merge is 0 then we're about to emit z-masked variant. */
|
||||
if (const0_operand (merge, mode))
|
||||
emit_insn (fcn_maskz (target, accum, wide_reg, mem, merge, mask));
|
||||
/* If merge is the same as accum then emit merge-masked
|
||||
variant. */
|
||||
else if (CALL_EXPR_ARG (exp, 6) == CALL_EXPR_ARG (exp, 4))
|
||||
{
|
||||
merge = force_reg (mode, merge);
|
||||
emit_insn (fcn_mask (target, wide_reg, mem, merge, mask));
|
||||
}
|
||||
/* Merge with something unknown might happen if we z-mask
|
||||
w/ -O0. */
|
||||
else
|
||||
{
|
||||
target = gen_reg_rtx (mode);
|
||||
emit_move_insn (target, merge);
|
||||
emit_insn (fcn_mask (target, wide_reg, mem, target, mask));
|
||||
}
|
||||
}
|
||||
return target;
|
||||
}
|
||||
case IX86_BUILTIN_RDPID:
|
||||
return ix86_expand_special_args_builtin (bdesc_args + i, exp,
|
||||
target);
|
||||
|
@ -18977,17 +18673,8 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
|
|||
/* x0 = rcp(b) estimate */
|
||||
if (mode == V16SFmode || mode == V8DFmode)
|
||||
{
|
||||
if (TARGET_AVX512ER)
|
||||
{
|
||||
emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
|
||||
UNSPEC_RCP28)));
|
||||
/* res = a * x0 */
|
||||
emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x0)));
|
||||
return;
|
||||
}
|
||||
else
|
||||
emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
|
||||
UNSPEC_RCP14)));
|
||||
emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
|
||||
UNSPEC_RCP14)));
|
||||
}
|
||||
else
|
||||
emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
|
||||
|
@ -19025,24 +18712,6 @@ ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip)
|
|||
e2 = gen_reg_rtx (mode);
|
||||
e3 = gen_reg_rtx (mode);
|
||||
|
||||
if (TARGET_AVX512ER && mode == V16SFmode)
|
||||
{
|
||||
if (recip)
|
||||
/* res = rsqrt28(a) estimate */
|
||||
emit_insn (gen_rtx_SET (res, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
|
||||
UNSPEC_RSQRT28)));
|
||||
else
|
||||
{
|
||||
/* x0 = rsqrt28(a) estimate */
|
||||
emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
|
||||
UNSPEC_RSQRT28)));
|
||||
/* res = rcp28(x0) estimate */
|
||||
emit_insn (gen_rtx_SET (res, gen_rtx_UNSPEC (mode, gen_rtvec (1, x0),
|
||||
UNSPEC_RCP28)));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
real_from_integer (&r, VOIDmode, -3, SIGNED);
|
||||
mthree = const_double_from_real_value (r, SFmode);
|
||||
|
||||
|
|
|
@ -58,12 +58,9 @@ DEF_PTA(FXSR)
|
|||
DEF_PTA(XSAVE)
|
||||
DEF_PTA(XSAVEOPT)
|
||||
DEF_PTA(AVX512F)
|
||||
DEF_PTA(AVX512ER)
|
||||
DEF_PTA(AVX512PF)
|
||||
DEF_PTA(AVX512CD)
|
||||
DEF_PTA(NO_TUNE)
|
||||
DEF_PTA(SHA)
|
||||
DEF_PTA(PREFETCHWT1)
|
||||
DEF_PTA(CLFLUSHOPT)
|
||||
DEF_PTA(XSAVEC)
|
||||
DEF_PTA(XSAVES)
|
||||
|
@ -77,8 +74,6 @@ DEF_PTA(MWAITX)
|
|||
DEF_PTA(CLZERO)
|
||||
DEF_PTA(NO_80387)
|
||||
DEF_PTA(PKU)
|
||||
DEF_PTA(AVX5124VNNIW)
|
||||
DEF_PTA(AVX5124FMAPS)
|
||||
DEF_PTA(AVX512VPOPCNTDQ)
|
||||
DEF_PTA(SGX)
|
||||
DEF_PTA(AVX512VNNI)
|
||||
|
|
|
@ -115,8 +115,6 @@ along with GCC; see the file COPYING3. If not see
|
|||
#define m_HASWELL (HOST_WIDE_INT_1U<<PROCESSOR_HASWELL)
|
||||
#define m_BONNELL (HOST_WIDE_INT_1U<<PROCESSOR_BONNELL)
|
||||
#define m_SILVERMONT (HOST_WIDE_INT_1U<<PROCESSOR_SILVERMONT)
|
||||
#define m_KNL (HOST_WIDE_INT_1U<<PROCESSOR_KNL)
|
||||
#define m_KNM (HOST_WIDE_INT_1U<<PROCESSOR_KNM)
|
||||
#define m_SKYLAKE (HOST_WIDE_INT_1U<<PROCESSOR_SKYLAKE)
|
||||
#define m_SKYLAKE_AVX512 (HOST_WIDE_INT_1U<<PROCESSOR_SKYLAKE_AVX512)
|
||||
#define m_CANNONLAKE (HOST_WIDE_INT_1U<<PROCESSOR_CANNONLAKE)
|
||||
|
@ -224,8 +222,6 @@ static struct ix86_target_opts isa2_opts[] =
|
|||
{ "-mwbnoinvd", OPTION_MASK_ISA2_WBNOINVD },
|
||||
{ "-mavx512vp2intersect", OPTION_MASK_ISA2_AVX512VP2INTERSECT },
|
||||
{ "-msgx", OPTION_MASK_ISA2_SGX },
|
||||
{ "-mavx5124vnniw", OPTION_MASK_ISA2_AVX5124VNNIW },
|
||||
{ "-mavx5124fmaps", OPTION_MASK_ISA2_AVX5124FMAPS },
|
||||
{ "-mhle", OPTION_MASK_ISA2_HLE },
|
||||
{ "-mmovbe", OPTION_MASK_ISA2_MOVBE },
|
||||
{ "-mclzero", OPTION_MASK_ISA2_CLZERO },
|
||||
|
@ -278,8 +274,6 @@ static struct ix86_target_opts isa_opts[] =
|
|||
{ "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
|
||||
{ "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
|
||||
{ "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
|
||||
{ "-mavx512er", OPTION_MASK_ISA_AVX512ER },
|
||||
{ "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
|
||||
{ "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
|
||||
{ "-mavx512f", OPTION_MASK_ISA_AVX512F },
|
||||
{ "-mavx2", OPTION_MASK_ISA_AVX2 },
|
||||
|
@ -306,7 +300,6 @@ static struct ix86_target_opts isa_opts[] =
|
|||
{ "-mprfchw", OPTION_MASK_ISA_PRFCHW },
|
||||
{ "-mrdseed", OPTION_MASK_ISA_RDSEED },
|
||||
{ "-madx", OPTION_MASK_ISA_ADX },
|
||||
{ "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
|
||||
{ "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
|
||||
{ "-mxsaves", OPTION_MASK_ISA_XSAVES },
|
||||
{ "-mxsavec", OPTION_MASK_ISA_XSAVEC },
|
||||
|
@ -781,8 +774,6 @@ static const struct processor_costs *processor_cost_table[] =
|
|||
&alderlake_cost,
|
||||
&alderlake_cost,
|
||||
&alderlake_cost,
|
||||
&slm_cost,
|
||||
&slm_cost,
|
||||
&skylake_cost,
|
||||
&skylake_cost,
|
||||
&icelake_cost,
|
||||
|
@ -1030,8 +1021,6 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
|
|||
IX86_ATTR_ISA ("pconfig", OPT_mpconfig),
|
||||
IX86_ATTR_ISA ("wbnoinvd", OPT_mwbnoinvd),
|
||||
IX86_ATTR_ISA ("sgx", OPT_msgx),
|
||||
IX86_ATTR_ISA ("avx5124fmaps", OPT_mavx5124fmaps),
|
||||
IX86_ATTR_ISA ("avx5124vnniw", OPT_mavx5124vnniw),
|
||||
IX86_ATTR_ISA ("avx512vpopcntdq", OPT_mavx512vpopcntdq),
|
||||
IX86_ATTR_ISA ("avx512vbmi2", OPT_mavx512vbmi2),
|
||||
IX86_ATTR_ISA ("avx512vnni", OPT_mavx512vnni),
|
||||
|
@ -1043,8 +1032,6 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
|
|||
IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
|
||||
IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
|
||||
IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
|
||||
IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
|
||||
IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
|
||||
IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
|
||||
IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
|
||||
IX86_ATTR_ISA ("avx2", OPT_mavx2),
|
||||
|
@ -1071,7 +1058,6 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
|
|||
IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
|
||||
IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
|
||||
IX86_ATTR_ISA ("adx", OPT_madx),
|
||||
IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
|
||||
IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
|
||||
IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
|
||||
IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
|
||||
|
@ -2103,18 +2089,6 @@ ix86_option_override_internal (bool main_args_p,
|
|||
: G_("%<target(\"tune=x86-64\")%> is deprecated; use "
|
||||
"%<target(\"tune=k8\")%> or %<target(\"tune=generic\")%>"
|
||||
" instead as appropriate"));
|
||||
else if (!strcmp (opts->x_ix86_tune_string, "knl"))
|
||||
warning (OPT_Wdeprecated,
|
||||
main_args_p
|
||||
? G_("%<-mtune=knl%> support will be removed in GCC 15")
|
||||
: G_("%<target(\"tune=knl\")%> support will be removed in "
|
||||
"GCC 15"));
|
||||
else if (!strcmp (opts->x_ix86_tune_string, "knm"))
|
||||
warning (OPT_Wdeprecated,
|
||||
main_args_p
|
||||
? G_("%<-mtune=knm%> support will be removed in GCC 15")
|
||||
: G_("%<target(\"tune=knm\")%> support will be removed in "
|
||||
"GCC 15"));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -2326,19 +2300,6 @@ ix86_option_override_internal (bool main_args_p,
|
|||
return false;
|
||||
}
|
||||
|
||||
if (!strcmp (opts->x_ix86_arch_string, "knl"))
|
||||
warning (OPT_Wdeprecated,
|
||||
main_args_p
|
||||
? G_("%<-march=knl%> support will be removed in GCC 15")
|
||||
: G_("%<target(\"arch=knl\")%> support will be removed in "
|
||||
"GCC 15"));
|
||||
else if (!strcmp (opts->x_ix86_arch_string, "knm"))
|
||||
warning (OPT_Wdeprecated,
|
||||
main_args_p
|
||||
? G_("%<-march=knm%> support will be removed in GCC 15")
|
||||
: G_("%<target(\"arch=knm\")%> support will be removed in "
|
||||
"GCC 15"));
|
||||
|
||||
ix86_schedule = processor_alias_table[i].schedule;
|
||||
ix86_arch = processor_alias_table[i].processor;
|
||||
|
||||
|
@ -2631,8 +2592,7 @@ ix86_option_override_internal (bool main_args_p,
|
|||
/* Enable SSE prefetch. */
|
||||
if (TARGET_SSE_P (opts->x_ix86_isa_flags)
|
||||
|| (TARGET_PRFCHW_P (opts->x_ix86_isa_flags)
|
||||
&& !TARGET_3DNOW_P (opts->x_ix86_isa_flags))
|
||||
|| TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
|
||||
&& !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
|
||||
ix86_prefetch_sse = true;
|
||||
|
||||
/* Enable mwait/monitor instructions for -msse3. */
|
||||
|
@ -2757,15 +2717,6 @@ ix86_option_override_internal (bool main_args_p,
|
|||
opts->x_ix86_isa_flags2 |= avx512_isa_flags2;
|
||||
}
|
||||
|
||||
/* Disable AVX512{PF,ER,4VNNIW,4FAMPS} for -mno-evex512. */
|
||||
if (!TARGET_EVEX512_P(opts->x_ix86_isa_flags2))
|
||||
{
|
||||
opts->x_ix86_isa_flags
|
||||
&= ~(OPTION_MASK_ISA_AVX512PF | OPTION_MASK_ISA_AVX512ER);
|
||||
opts->x_ix86_isa_flags2
|
||||
&= ~(OPTION_MASK_ISA2_AVX5124FMAPS | OPTION_MASK_ISA2_AVX5124VNNIW);
|
||||
}
|
||||
|
||||
/* Validate -mpreferred-stack-boundary= value or default it to
|
||||
PREFERRED_STACK_BOUNDARY_DEFAULT. */
|
||||
ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
|
||||
|
@ -2982,7 +2933,6 @@ ix86_option_override_internal (bool main_args_p,
|
|||
sorry ("%<-mcall-ms2sysv-xlogues%> isn%'t currently supported with SEH");
|
||||
|
||||
if (!(opts_set->x_target_flags & MASK_VZEROUPPER)
|
||||
&& TARGET_EMIT_VZEROUPPER
|
||||
&& flag_expensive_optimizations
|
||||
&& !optimize_size)
|
||||
opts->x_target_flags |= MASK_VZEROUPPER;
|
||||
|
|
|
@ -59,12 +59,8 @@ ix86_rust_target_cpu_info (void)
|
|||
rust_add_target_info ("target_feature", "avx2");
|
||||
if (TARGET_AVX512F)
|
||||
rust_add_target_info ("target_feature", "avx512f");
|
||||
if (TARGET_AVX512ER)
|
||||
rust_add_target_info ("target_feature", "avx512er");
|
||||
if (TARGET_AVX512CD)
|
||||
rust_add_target_info ("target_feature", "avx512cd");
|
||||
if (TARGET_AVX512PF)
|
||||
rust_add_target_info ("target_feature", "avx512pf");
|
||||
if (TARGET_AVX512DQ)
|
||||
rust_add_target_info ("target_feature", "avx512dq");
|
||||
if (TARGET_AVX512BW)
|
||||
|
|
|
@ -20881,13 +20881,6 @@ ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
|
|||
|| VALID_AVX512F_SCALAR_MODE (mode)))
|
||||
return true;
|
||||
|
||||
/* For AVX-5124FMAPS or AVX-5124VNNIW
|
||||
allow V64SF and V64SI modes for special regnos. */
|
||||
if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW)
|
||||
&& (mode == V64SFmode || mode == V64SImode)
|
||||
&& MOD4_SSE_REGNO_P (regno))
|
||||
return true;
|
||||
|
||||
/* TODO check for QI/HI scalars. */
|
||||
/* AVX512VL allows sse regs16+ for 128/256 bit modes. */
|
||||
if (TARGET_AVX512VL
|
||||
|
|
|
@ -447,8 +447,6 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
|
|||
#define TARGET_ONE_IF_CONV_INSN \
|
||||
ix86_tune_features[X86_TUNE_ONE_IF_CONV_INSN]
|
||||
#define TARGET_AVOID_MFENCE ix86_tune_features[X86_TUNE_AVOID_MFENCE]
|
||||
#define TARGET_EMIT_VZEROUPPER \
|
||||
ix86_tune_features[X86_TUNE_EMIT_VZEROUPPER]
|
||||
#define TARGET_EXPAND_ABS \
|
||||
ix86_tune_features[X86_TUNE_EXPAND_ABS]
|
||||
#define TARGET_V2DF_REDUCTION_PREFER_HADDPD \
|
||||
|
@ -2284,8 +2282,6 @@ enum processor_type
|
|||
PROCESSOR_SIERRAFOREST,
|
||||
PROCESSOR_GRANDRIDGE,
|
||||
PROCESSOR_CLEARWATERFOREST,
|
||||
PROCESSOR_KNL,
|
||||
PROCESSOR_KNM,
|
||||
PROCESSOR_SKYLAKE,
|
||||
PROCESSOR_SKYLAKE_AVX512,
|
||||
PROCESSOR_CANNONLAKE,
|
||||
|
@ -2395,8 +2391,6 @@ constexpr wide_int_bitmask PTA_SAPPHIRERAPIDS = PTA_ICELAKE_SERVER | PTA_MOVDIRI
|
|||
| PTA_MOVDIR64B | PTA_ENQCMD | PTA_CLDEMOTE | PTA_PTWRITE | PTA_WAITPKG
|
||||
| PTA_SERIALIZE | PTA_TSXLDTRK | PTA_AMX_TILE | PTA_AMX_INT8 | PTA_AMX_BF16
|
||||
| PTA_UINTR | PTA_AVXVNNI | PTA_AVX512FP16 | PTA_AVX512BF16;
|
||||
constexpr wide_int_bitmask PTA_KNL = PTA_BROADWELL | PTA_AVX512PF
|
||||
| PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD | PTA_PREFETCHWT1;
|
||||
constexpr wide_int_bitmask PTA_BONNELL = PTA_CORE2 | PTA_MOVBE;
|
||||
constexpr wide_int_bitmask PTA_SILVERMONT = PTA_WESTMERE | PTA_MOVBE
|
||||
| PTA_RDRND | PTA_PRFCHW;
|
||||
|
@ -2426,8 +2420,6 @@ constexpr wide_int_bitmask PTA_CLEARWATERFOREST = PTA_SIERRAFOREST
|
|||
| PTA_AVXVNNIINT16 | PTA_SHA512 | PTA_SM3 | PTA_SM4 | PTA_USER_MSR
|
||||
| PTA_PREFETCHI;
|
||||
constexpr wide_int_bitmask PTA_PANTHERLAKE = PTA_ARROWLAKE_S | PTA_PREFETCHI;
|
||||
constexpr wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW
|
||||
| PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ;
|
||||
constexpr wide_int_bitmask PTA_ZNVER1 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
|
||||
| PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
|
||||
| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2 | PTA_BMI | PTA_BMI2
|
||||
|
|
|
@ -27045,7 +27045,7 @@
|
|||
[(prefetch (match_operand 0 "address_operand")
|
||||
(match_operand:SI 1 "const_int_operand")
|
||||
(match_operand:SI 2 "const_int_operand"))]
|
||||
"TARGET_3DNOW || TARGET_PREFETCH_SSE || TARGET_PRFCHW || TARGET_PREFETCHWT1"
|
||||
"TARGET_3DNOW || TARGET_PREFETCH_SSE || TARGET_PRFCHW"
|
||||
{
|
||||
bool write = operands[1] != const0_rtx;
|
||||
int locality = INTVAL (operands[2]);
|
||||
|
@ -27059,9 +27059,7 @@
|
|||
|
||||
if (write)
|
||||
{
|
||||
if (TARGET_PREFETCHWT1)
|
||||
operands[2] = GEN_INT (MAX (locality, 2));
|
||||
else if (TARGET_PRFCHW)
|
||||
if (TARGET_PRFCHW)
|
||||
operands[2] = GEN_INT (3);
|
||||
else if (TARGET_3DNOW && !TARGET_SSE2)
|
||||
operands[2] = GEN_INT (3);
|
||||
|
@ -27110,7 +27108,7 @@
|
|||
[(prefetch (match_operand 0 "address_operand" "p")
|
||||
(match_operand:SI 1 "const_int_operand")
|
||||
(const_int 3))]
|
||||
"TARGET_3DNOW || TARGET_PRFCHW || TARGET_PREFETCHWT1"
|
||||
"TARGET_3DNOW || TARGET_PRFCHW"
|
||||
{
|
||||
if (operands[1] == const0_rtx)
|
||||
return "prefetch\t%a0";
|
||||
|
@ -27122,17 +27120,6 @@
|
|||
(symbol_ref "memory_address_length (operands[0], false)"))
|
||||
(set_attr "memory" "none")])
|
||||
|
||||
(define_insn "*prefetch_prefetchwt1"
|
||||
[(prefetch (match_operand 0 "address_operand" "p")
|
||||
(const_int 1)
|
||||
(const_int 2))]
|
||||
"TARGET_PREFETCHWT1"
|
||||
"prefetchwt1\t%a0";
|
||||
[(set_attr "type" "sse")
|
||||
(set (attr "length_address")
|
||||
(symbol_ref "memory_address_length (operands[0], false)"))
|
||||
(set_attr "memory" "none")])
|
||||
|
||||
(define_insn "prefetchi"
|
||||
[(unspec_volatile [(match_operand 0 "local_func_symbolic_operand" "p")
|
||||
(match_operand:SI 1 "const_int_operand")]
|
||||
|
|
|
@ -741,14 +741,6 @@ mavx512f
|
|||
Target Mask(ISA_AVX512F) Var(ix86_isa_flags) Save
|
||||
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F built-in functions and code generation.
|
||||
|
||||
mavx512pf
|
||||
Target Mask(ISA_AVX512PF) Var(ix86_isa_flags) Save Warn(AVX512PF support will be removed in GCC 15)
|
||||
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512PF built-in functions and code generation.
|
||||
|
||||
mavx512er
|
||||
Target Mask(ISA_AVX512ER) Var(ix86_isa_flags) Save Warn(AVX512ER support will be removed in GCC 15)
|
||||
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512ER built-in functions and code generation.
|
||||
|
||||
mavx512cd
|
||||
Target Mask(ISA_AVX512CD) Var(ix86_isa_flags) Save
|
||||
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512CD built-in functions and code generation.
|
||||
|
@ -773,14 +765,6 @@ mavx512vbmi
|
|||
Target Mask(ISA_AVX512VBMI) Var(ix86_isa_flags) Save
|
||||
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512VBMI built-in functions and code generation.
|
||||
|
||||
mavx5124fmaps
|
||||
Target Mask(ISA2_AVX5124FMAPS) Var(ix86_isa_flags2) Save Warn(AVX5124FMAPS support will be removed in GCC 15)
|
||||
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX512F and AVX5124FMAPS built-in functions and code generation.
|
||||
|
||||
mavx5124vnniw
|
||||
Target Mask(ISA2_AVX5124VNNIW) Var(ix86_isa_flags2) Save Warn(AVX5124VNNIW support will be removed in GCC 15)
|
||||
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX512F and AVX5124VNNIW built-in functions and code generation.
|
||||
|
||||
mavx512vpopcntdq
|
||||
Target Mask(ISA_AVX512VPOPCNTDQ) Var(ix86_isa_flags) Save
|
||||
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX512F and AVX512VPOPCNTDQ built-in functions and code generation.
|
||||
|
@ -972,10 +956,6 @@ mf16c
|
|||
Target Mask(ISA_F16C) Var(ix86_isa_flags) Save
|
||||
Support F16C built-in functions and code generation.
|
||||
|
||||
mprefetchwt1
|
||||
Target Mask(ISA_PREFETCHWT1) Var(ix86_isa_flags) Save Warn(PREFETCHWT1 support will be removed in GCC 15)
|
||||
Support PREFETCHWT1 built-in functions and code generation.
|
||||
|
||||
mfentry
|
||||
Target Save Var(flag_fentry)
|
||||
Emit profiling counter call at function entry before prologue.
|
||||
|
|
|
@ -54,10 +54,6 @@
|
|||
|
||||
#include <avx512fintrin.h>
|
||||
|
||||
#include <avx512erintrin.h>
|
||||
|
||||
#include <avx512pfintrin.h>
|
||||
|
||||
#include <avx512cdintrin.h>
|
||||
|
||||
#include <avx512vlintrin.h>
|
||||
|
@ -78,10 +74,6 @@
|
|||
|
||||
#include <avx512vbmivlintrin.h>
|
||||
|
||||
#include <avx5124fmapsintrin.h>
|
||||
|
||||
#include <avx5124vnniwintrin.h>
|
||||
|
||||
#include <avx512vpopcntdqintrin.h>
|
||||
|
||||
#include <avx512vbmi2intrin.h>
|
||||
|
|
|
@ -109,15 +109,6 @@
|
|||
;; For embed. rounding feature
|
||||
UNSPEC_EMBEDDED_ROUNDING
|
||||
|
||||
;; For AVX512PF support
|
||||
UNSPEC_GATHER_PREFETCH
|
||||
UNSPEC_SCATTER_PREFETCH
|
||||
|
||||
;; For AVX512ER support
|
||||
UNSPEC_EXP2
|
||||
UNSPEC_RCP28
|
||||
UNSPEC_RSQRT28
|
||||
|
||||
;; For SHA support
|
||||
UNSPEC_SHA1MSG1
|
||||
UNSPEC_SHA1MSG2
|
||||
|
@ -147,12 +138,6 @@
|
|||
;; For AVX512VBMI support
|
||||
UNSPEC_VPMULTISHIFT
|
||||
|
||||
;; For AVX5124FMAPS/AVX5124VNNIW support
|
||||
UNSPEC_VP4FMADD
|
||||
UNSPEC_VP4FNMADD
|
||||
UNSPEC_VP4DPWSSD
|
||||
UNSPEC_VP4DPWSSDS
|
||||
|
||||
;; For GFNI support
|
||||
UNSPEC_GF2P8AFFINEINV
|
||||
UNSPEC_GF2P8AFFINE
|
||||
|
@ -458,10 +443,6 @@
|
|||
[(V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
|
||||
(V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
|
||||
|
||||
;; AVX512ER SF plus 128- and 256-bit SF vector modes
|
||||
(define_mode_iterator VF1_AVX512ER_128_256
|
||||
[(V16SF "TARGET_AVX512ER") (V8SF "TARGET_AVX") V4SF])
|
||||
|
||||
(define_mode_iterator VFH_AVX512VL
|
||||
[(V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")
|
||||
(V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
|
||||
|
@ -2785,9 +2766,9 @@
|
|||
(set_attr "mode" "<ssescalarmode>")])
|
||||
|
||||
(define_expand "rsqrt<mode>2"
|
||||
[(set (match_operand:VF1_AVX512ER_128_256 0 "register_operand")
|
||||
(unspec:VF1_AVX512ER_128_256
|
||||
[(match_operand:VF1_AVX512ER_128_256 1 "vector_operand")]
|
||||
[(set (match_operand:VF1_128_256 0 "register_operand")
|
||||
(unspec:VF1_128_256
|
||||
[(match_operand:VF1_128_256 1 "vector_operand")]
|
||||
UNSPEC_RSQRT))]
|
||||
"TARGET_SSE && TARGET_SSE_MATH"
|
||||
{
|
||||
|
@ -24869,271 +24850,6 @@
|
|||
(set_attr "btver2_decode" "vector,vector,vector,vector")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
;; Packed float variants
|
||||
(define_mode_attr GATHER_SCATTER_SF_MEM_MODE
|
||||
[(V8DI "V8SF") (V16SI "V16SF")])
|
||||
|
||||
(define_expand "avx512pf_gatherpf<mode>sf"
|
||||
[(unspec
|
||||
[(match_operand:<avx512fmaskmode> 0 "register_operand")
|
||||
(mem:<GATHER_SCATTER_SF_MEM_MODE>
|
||||
(match_par_dup 5
|
||||
[(match_operand 2 "vsib_address_operand")
|
||||
(match_operand:VI48_512 1 "register_operand")
|
||||
(match_operand:SI 3 "const1248_operand")]))
|
||||
(match_operand:SI 4 "const_2_to_3_operand")]
|
||||
UNSPEC_GATHER_PREFETCH)]
|
||||
"TARGET_AVX512PF"
|
||||
{
|
||||
operands[5]
|
||||
= gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
|
||||
operands[3]), UNSPEC_VSIBADDR);
|
||||
})
|
||||
|
||||
(define_insn "*avx512pf_gatherpf<VI48_512:mode>sf_mask"
|
||||
[(unspec
|
||||
[(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
|
||||
(match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
|
||||
[(unspec:P
|
||||
[(match_operand:P 2 "vsib_address_operand" "Tv")
|
||||
(match_operand:VI48_512 1 "register_operand" "v")
|
||||
(match_operand:SI 3 "const1248_operand")]
|
||||
UNSPEC_VSIBADDR)])
|
||||
(match_operand:SI 4 "const_2_to_3_operand")]
|
||||
UNSPEC_GATHER_PREFETCH)]
|
||||
"TARGET_AVX512PF"
|
||||
{
|
||||
switch (INTVAL (operands[4]))
|
||||
{
|
||||
case 3:
|
||||
/* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
|
||||
gas changed what it requires incompatibly. */
|
||||
return "%M2vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
|
||||
case 2:
|
||||
return "%M2vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
[(set_attr "type" "sse")
|
||||
(set_attr "prefix" "evex")
|
||||
(set_attr "mode" "XI")])
|
||||
|
||||
;; Packed double variants
|
||||
(define_expand "avx512pf_gatherpf<mode>df"
|
||||
[(unspec
|
||||
[(match_operand:<avx512fmaskmode> 0 "register_operand")
|
||||
(mem:V8DF
|
||||
(match_par_dup 5
|
||||
[(match_operand 2 "vsib_address_operand")
|
||||
(match_operand:VI4_256_8_512 1 "register_operand")
|
||||
(match_operand:SI 3 "const1248_operand")]))
|
||||
(match_operand:SI 4 "const_2_to_3_operand")]
|
||||
UNSPEC_GATHER_PREFETCH)]
|
||||
"TARGET_AVX512PF"
|
||||
{
|
||||
operands[5]
|
||||
= gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
|
||||
operands[3]), UNSPEC_VSIBADDR);
|
||||
})
|
||||
|
||||
(define_insn "*avx512pf_gatherpf<VI4_256_8_512:mode>df_mask"
|
||||
[(unspec
|
||||
[(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
|
||||
(match_operator:V8DF 5 "vsib_mem_operator"
|
||||
[(unspec:P
|
||||
[(match_operand:P 2 "vsib_address_operand" "Tv")
|
||||
(match_operand:VI4_256_8_512 1 "register_operand" "v")
|
||||
(match_operand:SI 3 "const1248_operand")]
|
||||
UNSPEC_VSIBADDR)])
|
||||
(match_operand:SI 4 "const_2_to_3_operand")]
|
||||
UNSPEC_GATHER_PREFETCH)]
|
||||
"TARGET_AVX512PF"
|
||||
{
|
||||
switch (INTVAL (operands[4]))
|
||||
{
|
||||
case 3:
|
||||
/* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
|
||||
gas changed what it requires incompatibly. */
|
||||
return "%M2vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
|
||||
case 2:
|
||||
return "%M2vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
[(set_attr "type" "sse")
|
||||
(set_attr "prefix" "evex")
|
||||
(set_attr "mode" "XI")])
|
||||
|
||||
;; Packed float variants
|
||||
(define_expand "avx512pf_scatterpf<mode>sf"
|
||||
[(unspec
|
||||
[(match_operand:<avx512fmaskmode> 0 "register_operand")
|
||||
(mem:<GATHER_SCATTER_SF_MEM_MODE>
|
||||
(match_par_dup 5
|
||||
[(match_operand 2 "vsib_address_operand")
|
||||
(match_operand:VI48_512 1 "register_operand")
|
||||
(match_operand:SI 3 "const1248_operand")]))
|
||||
(match_operand:SI 4 "const2367_operand")]
|
||||
UNSPEC_SCATTER_PREFETCH)]
|
||||
"TARGET_AVX512PF"
|
||||
{
|
||||
operands[5]
|
||||
= gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
|
||||
operands[3]), UNSPEC_VSIBADDR);
|
||||
})
|
||||
|
||||
(define_insn "*avx512pf_scatterpf<VI48_512:mode>sf_mask"
|
||||
[(unspec
|
||||
[(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
|
||||
(match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
|
||||
[(unspec:P
|
||||
[(match_operand:P 2 "vsib_address_operand" "Tv")
|
||||
(match_operand:VI48_512 1 "register_operand" "v")
|
||||
(match_operand:SI 3 "const1248_operand")]
|
||||
UNSPEC_VSIBADDR)])
|
||||
(match_operand:SI 4 "const2367_operand")]
|
||||
UNSPEC_SCATTER_PREFETCH)]
|
||||
"TARGET_AVX512PF"
|
||||
{
|
||||
switch (INTVAL (operands[4]))
|
||||
{
|
||||
case 3:
|
||||
case 7:
|
||||
/* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
|
||||
gas changed what it requires incompatibly. */
|
||||
return "%M2vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
|
||||
case 2:
|
||||
case 6:
|
||||
return "%M2vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
[(set_attr "type" "sse")
|
||||
(set_attr "prefix" "evex")
|
||||
(set_attr "mode" "XI")])
|
||||
|
||||
;; Packed double variants
|
||||
(define_expand "avx512pf_scatterpf<mode>df"
|
||||
[(unspec
|
||||
[(match_operand:<avx512fmaskmode> 0 "register_operand")
|
||||
(mem:V8DF
|
||||
(match_par_dup 5
|
||||
[(match_operand 2 "vsib_address_operand")
|
||||
(match_operand:VI4_256_8_512 1 "register_operand")
|
||||
(match_operand:SI 3 "const1248_operand")]))
|
||||
(match_operand:SI 4 "const2367_operand")]
|
||||
UNSPEC_SCATTER_PREFETCH)]
|
||||
"TARGET_AVX512PF"
|
||||
{
|
||||
operands[5]
|
||||
= gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
|
||||
operands[3]), UNSPEC_VSIBADDR);
|
||||
})
|
||||
|
||||
(define_insn "*avx512pf_scatterpf<VI4_256_8_512:mode>df_mask"
|
||||
[(unspec
|
||||
[(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
|
||||
(match_operator:V8DF 5 "vsib_mem_operator"
|
||||
[(unspec:P
|
||||
[(match_operand:P 2 "vsib_address_operand" "Tv")
|
||||
(match_operand:VI4_256_8_512 1 "register_operand" "v")
|
||||
(match_operand:SI 3 "const1248_operand")]
|
||||
UNSPEC_VSIBADDR)])
|
||||
(match_operand:SI 4 "const2367_operand")]
|
||||
UNSPEC_SCATTER_PREFETCH)]
|
||||
"TARGET_AVX512PF"
|
||||
{
|
||||
switch (INTVAL (operands[4]))
|
||||
{
|
||||
case 3:
|
||||
case 7:
|
||||
/* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
|
||||
gas changed what it requires incompatibly. */
|
||||
return "%M2vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
|
||||
case 2:
|
||||
case 6:
|
||||
return "%M2vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
[(set_attr "type" "sse")
|
||||
(set_attr "prefix" "evex")
|
||||
(set_attr "mode" "XI")])
|
||||
|
||||
(define_expand "exp2<mode>2"
|
||||
[(set (match_operand:VF_512 0 "register_operand")
|
||||
(unspec:VF_512
|
||||
[(match_operand:VF_512 1 "vector_operand")]
|
||||
UNSPEC_EXP2))]
|
||||
"TARGET_AVX512ER")
|
||||
|
||||
(define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
|
||||
[(set (match_operand:VF_512 0 "register_operand" "=v")
|
||||
(unspec:VF_512
|
||||
[(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
|
||||
UNSPEC_EXP2))]
|
||||
"TARGET_AVX512ER"
|
||||
"vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
|
||||
[(set_attr "prefix" "evex")
|
||||
(set_attr "type" "sse")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
|
||||
[(set (match_operand:VF_512 0 "register_operand" "=v")
|
||||
(unspec:VF_512
|
||||
[(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
|
||||
UNSPEC_RCP28))]
|
||||
"TARGET_AVX512ER"
|
||||
"vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
|
||||
[(set_attr "prefix" "evex")
|
||||
(set_attr "type" "sse")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "avx512er_vmrcp28<mode><mask_name><round_saeonly_name>"
|
||||
[(set (match_operand:VF_128 0 "register_operand" "=v")
|
||||
(vec_merge:VF_128
|
||||
(unspec:VF_128
|
||||
[(match_operand:VF_128 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")]
|
||||
UNSPEC_RCP28)
|
||||
(match_operand:VF_128 2 "register_operand" "v")
|
||||
(const_int 1)))]
|
||||
"TARGET_AVX512ER"
|
||||
"vrcp28<ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%1, %2, %0<mask_operand3>|<mask_operand3>%0, %2, %<iptr>1<round_saeonly_mask_op3>}"
|
||||
[(set_attr "length_immediate" "1")
|
||||
(set_attr "prefix" "evex")
|
||||
(set_attr "type" "sse")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
|
||||
[(set (match_operand:VF_512 0 "register_operand" "=v")
|
||||
(unspec:VF_512
|
||||
[(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
|
||||
UNSPEC_RSQRT28))]
|
||||
"TARGET_AVX512ER"
|
||||
"vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
|
||||
[(set_attr "prefix" "evex")
|
||||
(set_attr "type" "sse")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "avx512er_vmrsqrt28<mode><mask_name><round_saeonly_name>"
|
||||
[(set (match_operand:VF_128 0 "register_operand" "=v")
|
||||
(vec_merge:VF_128
|
||||
(unspec:VF_128
|
||||
[(match_operand:VF_128 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")]
|
||||
UNSPEC_RSQRT28)
|
||||
(match_operand:VF_128 2 "register_operand" "v")
|
||||
(const_int 1)))]
|
||||
"TARGET_AVX512ER"
|
||||
"vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%1, %2, %0<mask_operand3>|<mask_operand3>%0, %2, %<iptr>1<round_saeonly_mask_op3>}"
|
||||
[(set_attr "length_immediate" "1")
|
||||
(set_attr "type" "sse")
|
||||
(set_attr "prefix" "evex")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;
|
||||
;; XOP instructions
|
||||
|
@ -29419,291 +29135,6 @@
|
|||
(set_attr "prefix" "evex")
|
||||
(set_attr "mode" "<sseinsnmode>")])
|
||||
|
||||
(define_mode_iterator IMOD4
|
||||
[(V64SF "TARGET_AVX5124FMAPS") (V64SI "TARGET_AVX5124VNNIW")])
|
||||
|
||||
(define_mode_attr imod4_narrow
|
||||
[(V64SF "V16SF") (V64SI "V16SI")])
|
||||
|
||||
(define_expand "mov<mode>"
|
||||
[(set (match_operand:IMOD4 0 "nonimmediate_operand")
|
||||
(match_operand:IMOD4 1 "nonimm_or_0_operand"))]
|
||||
"TARGET_AVX512F"
|
||||
{
|
||||
ix86_expand_vector_move (<MODE>mode, operands);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn_and_split "*mov<mode>_internal"
|
||||
[(set (match_operand:IMOD4 0 "nonimmediate_operand" "=v,v ,m")
|
||||
(match_operand:IMOD4 1 "nonimm_or_0_operand" " C,vm,v"))]
|
||||
"TARGET_AVX512F
|
||||
&& (register_operand (operands[0], <MODE>mode)
|
||||
|| register_operand (operands[1], <MODE>mode))"
|
||||
"#"
|
||||
"&& reload_completed"
|
||||
[(const_int 0)]
|
||||
{
|
||||
rtx op0, op1;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
op0 = simplify_subreg
|
||||
(<imod4_narrow>mode, operands[0], <MODE>mode, i * 64);
|
||||
op1 = simplify_subreg
|
||||
(<imod4_narrow>mode, operands[1], <MODE>mode, i * 64);
|
||||
emit_move_insn (op0, op1);
|
||||
}
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "avx5124fmaddps_4fmaddps"
|
||||
[(set (match_operand:V16SF 0 "register_operand" "=v")
|
||||
(unspec:V16SF
|
||||
[(match_operand:V16SF 1 "register_operand" "0")
|
||||
(match_operand:V64SF 2 "register_operand" "v")
|
||||
(match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
|
||||
"TARGET_AVX5124FMAPS"
|
||||
"v4fmaddps\t{%3, %g2, %0|%0, %g2, %3}"
|
||||
[(set_attr ("type") ("ssemuladd"))
|
||||
(set_attr ("prefix") ("evex"))
|
||||
(set_attr ("mode") ("V16SF"))])
|
||||
|
||||
(define_insn "avx5124fmaddps_4fmaddps_mask"
|
||||
[(set (match_operand:V16SF 0 "register_operand" "=v")
|
||||
(vec_merge:V16SF
|
||||
(unspec:V16SF
|
||||
[(match_operand:V64SF 1 "register_operand" "v")
|
||||
(match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
|
||||
(match_operand:V16SF 3 "register_operand" "0")
|
||||
(match_operand:HI 4 "register_operand" "Yk")))]
|
||||
"TARGET_AVX5124FMAPS"
|
||||
"v4fmaddps\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
|
||||
[(set_attr ("type") ("ssemuladd"))
|
||||
(set_attr ("prefix") ("evex"))
|
||||
(set_attr ("mode") ("V16SF"))])
|
||||
|
||||
(define_insn "avx5124fmaddps_4fmaddps_maskz"
|
||||
[(set (match_operand:V16SF 0 "register_operand" "=v")
|
||||
(vec_merge:V16SF
|
||||
(unspec:V16SF
|
||||
[(match_operand:V16SF 1 "register_operand" "0")
|
||||
(match_operand:V64SF 2 "register_operand" "v")
|
||||
(match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
|
||||
(match_operand:V16SF 4 "const0_operand")
|
||||
(match_operand:HI 5 "register_operand" "Yk")))]
|
||||
"TARGET_AVX5124FMAPS"
|
||||
"v4fmaddps\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
|
||||
[(set_attr ("type") ("ssemuladd"))
|
||||
(set_attr ("prefix") ("evex"))
|
||||
(set_attr ("mode") ("V16SF"))])
|
||||
|
||||
(define_insn "avx5124fmaddps_4fmaddss"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=v")
|
||||
(unspec:V4SF
|
||||
[(match_operand:V4SF 1 "register_operand" "0")
|
||||
(match_operand:V64SF 2 "register_operand" "v")
|
||||
(match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
|
||||
"TARGET_AVX5124FMAPS"
|
||||
"v4fmaddss\t{%3, %x2, %0|%0, %x2, %3}"
|
||||
[(set_attr ("type") ("ssemuladd"))
|
||||
(set_attr ("prefix") ("evex"))
|
||||
(set_attr ("mode") ("SF"))])
|
||||
|
||||
(define_insn "avx5124fmaddps_4fmaddss_mask"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=v")
|
||||
(vec_merge:V4SF
|
||||
(unspec:V4SF
|
||||
[(match_operand:V64SF 1 "register_operand" "v")
|
||||
(match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
|
||||
(match_operand:V4SF 3 "register_operand" "0")
|
||||
(match_operand:QI 4 "register_operand" "Yk")))]
|
||||
"TARGET_AVX5124FMAPS"
|
||||
"v4fmaddss\t{%2, %x1, %0%{%4%}|%0%{%4%}, %x1, %2}"
|
||||
[(set_attr ("type") ("ssemuladd"))
|
||||
(set_attr ("prefix") ("evex"))
|
||||
(set_attr ("mode") ("SF"))])
|
||||
|
||||
(define_insn "avx5124fmaddps_4fmaddss_maskz"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=v")
|
||||
(vec_merge:V4SF
|
||||
(unspec:V4SF
|
||||
[(match_operand:V4SF 1 "register_operand" "0")
|
||||
(match_operand:V64SF 2 "register_operand" "v")
|
||||
(match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
|
||||
(match_operand:V4SF 4 "const0_operand")
|
||||
(match_operand:QI 5 "register_operand" "Yk")))]
|
||||
"TARGET_AVX5124FMAPS"
|
||||
"v4fmaddss\t{%3, %x2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %x2, %3}"
|
||||
[(set_attr ("type") ("ssemuladd"))
|
||||
(set_attr ("prefix") ("evex"))
|
||||
(set_attr ("mode") ("SF"))])
|
||||
|
||||
(define_insn "avx5124fmaddps_4fnmaddps"
|
||||
[(set (match_operand:V16SF 0 "register_operand" "=v")
|
||||
(unspec:V16SF
|
||||
[(match_operand:V16SF 1 "register_operand" "0")
|
||||
(match_operand:V64SF 2 "register_operand" "v")
|
||||
(match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
|
||||
"TARGET_AVX5124FMAPS"
|
||||
"v4fnmaddps\t{%3, %g2, %0|%0, %g2, %3}"
|
||||
[(set_attr ("type") ("ssemuladd"))
|
||||
(set_attr ("prefix") ("evex"))
|
||||
(set_attr ("mode") ("V16SF"))])
|
||||
|
||||
(define_insn "avx5124fmaddps_4fnmaddps_mask"
|
||||
[(set (match_operand:V16SF 0 "register_operand" "=v")
|
||||
(vec_merge:V16SF
|
||||
(unspec:V16SF
|
||||
[(match_operand:V64SF 1 "register_operand" "v")
|
||||
(match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
|
||||
(match_operand:V16SF 3 "register_operand" "0")
|
||||
(match_operand:HI 4 "register_operand" "Yk")))]
|
||||
"TARGET_AVX5124FMAPS"
|
||||
"v4fnmaddps\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
|
||||
[(set_attr ("type") ("ssemuladd"))
|
||||
(set_attr ("prefix") ("evex"))
|
||||
(set_attr ("mode") ("V16SF"))])
|
||||
|
||||
(define_insn "avx5124fmaddps_4fnmaddps_maskz"
|
||||
[(set (match_operand:V16SF 0 "register_operand" "=v")
|
||||
(vec_merge:V16SF
|
||||
(unspec:V16SF
|
||||
[(match_operand:V16SF 1 "register_operand" "0")
|
||||
(match_operand:V64SF 2 "register_operand" "v")
|
||||
(match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
|
||||
(match_operand:V16SF 4 "const0_operand")
|
||||
(match_operand:HI 5 "register_operand" "Yk")))]
|
||||
"TARGET_AVX5124FMAPS"
|
||||
"v4fnmaddps\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
|
||||
[(set_attr ("type") ("ssemuladd"))
|
||||
(set_attr ("prefix") ("evex"))
|
||||
(set_attr ("mode") ("V16SF"))])
|
||||
|
||||
(define_insn "avx5124fmaddps_4fnmaddss"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=v")
|
||||
(unspec:V4SF
|
||||
[(match_operand:V4SF 1 "register_operand" "0")
|
||||
(match_operand:V64SF 2 "register_operand" "v")
|
||||
(match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
|
||||
"TARGET_AVX5124FMAPS"
|
||||
"v4fnmaddss\t{%3, %x2, %0|%0, %x2, %3}"
|
||||
[(set_attr ("type") ("ssemuladd"))
|
||||
(set_attr ("prefix") ("evex"))
|
||||
(set_attr ("mode") ("SF"))])
|
||||
|
||||
(define_insn "avx5124fmaddps_4fnmaddss_mask"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=v")
|
||||
(vec_merge:V4SF
|
||||
(unspec:V4SF
|
||||
[(match_operand:V64SF 1 "register_operand" "v")
|
||||
(match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
|
||||
(match_operand:V4SF 3 "register_operand" "0")
|
||||
(match_operand:QI 4 "register_operand" "Yk")))]
|
||||
"TARGET_AVX5124FMAPS"
|
||||
"v4fnmaddss\t{%2, %x1, %0%{%4%}|%0%{%4%}, %x1, %2}"
|
||||
[(set_attr ("type") ("ssemuladd"))
|
||||
(set_attr ("prefix") ("evex"))
|
||||
(set_attr ("mode") ("SF"))])
|
||||
|
||||
(define_insn "avx5124fmaddps_4fnmaddss_maskz"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=v")
|
||||
(vec_merge:V4SF
|
||||
(unspec:V4SF
|
||||
[(match_operand:V4SF 1 "register_operand" "0")
|
||||
(match_operand:V64SF 2 "register_operand" "v")
|
||||
(match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
|
||||
(match_operand:V4SF 4 "const0_operand")
|
||||
(match_operand:QI 5 "register_operand" "Yk")))]
|
||||
"TARGET_AVX5124FMAPS"
|
||||
"v4fnmaddss\t{%3, %x2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %x2, %3}"
|
||||
[(set_attr ("type") ("ssemuladd"))
|
||||
(set_attr ("prefix") ("evex"))
|
||||
(set_attr ("mode") ("SF"))])
|
||||
|
||||
(define_insn "avx5124vnniw_vp4dpwssd"
|
||||
[(set (match_operand:V16SI 0 "register_operand" "=v")
|
||||
(unspec:V16SI
|
||||
[(match_operand:V16SI 1 "register_operand" "0")
|
||||
(match_operand:V64SI 2 "register_operand" "v")
|
||||
(match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD))]
|
||||
"TARGET_AVX5124VNNIW"
|
||||
"vp4dpwssd\t{%3, %g2, %0|%0, %g2, %3}"
|
||||
[(set_attr ("type") ("ssemuladd"))
|
||||
(set_attr ("prefix") ("evex"))
|
||||
(set_attr ("mode") ("TI"))])
|
||||
|
||||
(define_insn "avx5124vnniw_vp4dpwssd_mask"
|
||||
[(set (match_operand:V16SI 0 "register_operand" "=v")
|
||||
(vec_merge:V16SI
|
||||
(unspec:V16SI
|
||||
[(match_operand:V64SI 1 "register_operand" "v")
|
||||
(match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
|
||||
(match_operand:V16SI 3 "register_operand" "0")
|
||||
(match_operand:HI 4 "register_operand" "Yk")))]
|
||||
"TARGET_AVX5124VNNIW"
|
||||
"vp4dpwssd\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
|
||||
[(set_attr ("type") ("ssemuladd"))
|
||||
(set_attr ("prefix") ("evex"))
|
||||
(set_attr ("mode") ("TI"))])
|
||||
|
||||
(define_insn "avx5124vnniw_vp4dpwssd_maskz"
|
||||
[(set (match_operand:V16SI 0 "register_operand" "=v")
|
||||
(vec_merge:V16SI
|
||||
(unspec:V16SI
|
||||
[(match_operand:V16SI 1 "register_operand" "0")
|
||||
(match_operand:V64SI 2 "register_operand" "v")
|
||||
(match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
|
||||
(match_operand:V16SI 4 "const0_operand")
|
||||
(match_operand:HI 5 "register_operand" "Yk")))]
|
||||
"TARGET_AVX5124VNNIW"
|
||||
"vp4dpwssd\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
|
||||
[(set_attr ("type") ("ssemuladd"))
|
||||
(set_attr ("prefix") ("evex"))
|
||||
(set_attr ("mode") ("TI"))])
|
||||
|
||||
(define_insn "avx5124vnniw_vp4dpwssds"
|
||||
[(set (match_operand:V16SI 0 "register_operand" "=v")
|
||||
(unspec:V16SI
|
||||
[(match_operand:V16SI 1 "register_operand" "0")
|
||||
(match_operand:V64SI 2 "register_operand" "v")
|
||||
(match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS))]
|
||||
"TARGET_AVX5124VNNIW"
|
||||
"vp4dpwssds\t{%3, %g2, %0|%0, %g2, %3}"
|
||||
[(set_attr ("type") ("ssemuladd"))
|
||||
(set_attr ("prefix") ("evex"))
|
||||
(set_attr ("mode") ("TI"))])
|
||||
|
||||
(define_insn "avx5124vnniw_vp4dpwssds_mask"
|
||||
[(set (match_operand:V16SI 0 "register_operand" "=v")
|
||||
(vec_merge:V16SI
|
||||
(unspec:V16SI
|
||||
[(match_operand:V64SI 1 "register_operand" "v")
|
||||
(match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
|
||||
(match_operand:V16SI 3 "register_operand" "0")
|
||||
(match_operand:HI 4 "register_operand" "Yk")))]
|
||||
"TARGET_AVX5124VNNIW"
|
||||
"vp4dpwssds\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
|
||||
[(set_attr ("type") ("ssemuladd"))
|
||||
(set_attr ("prefix") ("evex"))
|
||||
(set_attr ("mode") ("TI"))])
|
||||
|
||||
(define_insn "avx5124vnniw_vp4dpwssds_maskz"
|
||||
[(set (match_operand:V16SI 0 "register_operand" "=v")
|
||||
(vec_merge:V16SI
|
||||
(unspec:V16SI
|
||||
[(match_operand:V16SI 1 "register_operand" "0")
|
||||
(match_operand:V64SI 2 "register_operand" "v")
|
||||
(match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
|
||||
(match_operand:V16SI 4 "const0_operand")
|
||||
(match_operand:HI 5 "register_operand" "Yk")))]
|
||||
"TARGET_AVX5124VNNIW"
|
||||
"vp4dpwssds\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
|
||||
[(set_attr ("type") ("ssemuladd"))
|
||||
(set_attr ("prefix") ("evex"))
|
||||
(set_attr ("mode") ("TI"))])
|
||||
|
||||
(define_expand "popcount<mode>2"
|
||||
[(set (match_operand:VI48_AVX512VL 0 "register_operand")
|
||||
(popcount:VI48_AVX512VL
|
||||
|
|
|
@ -44,8 +44,6 @@ ix86_issue_rate (void)
|
|||
case PROCESSOR_LAKEMONT:
|
||||
case PROCESSOR_BONNELL:
|
||||
case PROCESSOR_SILVERMONT:
|
||||
case PROCESSOR_KNL:
|
||||
case PROCESSOR_KNM:
|
||||
case PROCESSOR_INTEL:
|
||||
case PROCESSOR_K6:
|
||||
case PROCESSOR_BTVER2:
|
||||
|
@ -500,8 +498,6 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
|
|||
break;
|
||||
|
||||
case PROCESSOR_SILVERMONT:
|
||||
case PROCESSOR_KNL:
|
||||
case PROCESSOR_KNM:
|
||||
case PROCESSOR_INTEL:
|
||||
if (!reload_completed)
|
||||
return cost;
|
||||
|
|
|
@ -41,9 +41,9 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|||
/* X86_TUNE_SCHEDULE: Enable scheduling. */
|
||||
DEF_TUNE (X86_TUNE_SCHEDULE, "schedule",
|
||||
m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL | m_SILVERMONT
|
||||
| m_INTEL | m_KNL | m_KNM | m_K6_GEODE | m_AMD_MULTIPLE | m_ZHAOXIN
|
||||
| m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_CORE_HYBRID
|
||||
| m_CORE_ATOM | m_GENERIC)
|
||||
| m_INTEL | m_K6_GEODE | m_AMD_MULTIPLE | m_ZHAOXIN | m_GOLDMONT
|
||||
| m_GOLDMONT_PLUS | m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM
|
||||
| m_GENERIC)
|
||||
|
||||
/* X86_TUNE_PARTIAL_REG_DEPENDENCY: Enable more register renaming
|
||||
on modern chips. Prefer stores affecting whole integer register
|
||||
|
@ -52,8 +52,8 @@ DEF_TUNE (X86_TUNE_SCHEDULE, "schedule",
|
|||
DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency",
|
||||
m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2
|
||||
| m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_INTEL
|
||||
| m_KNL | m_KNM | m_AMD_MULTIPLE | m_ZHAOXIN | m_TREMONT
|
||||
| m_CORE_HYBRID | m_CORE_ATOM | m_GENERIC)
|
||||
| m_AMD_MULTIPLE | m_ZHAOXIN | m_TREMONT | m_CORE_HYBRID
|
||||
| m_CORE_ATOM | m_GENERIC)
|
||||
|
||||
/* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: This knob promotes all store
|
||||
destinations to be 128bit to allow register renaming on 128bit SSE units,
|
||||
|
@ -112,7 +112,7 @@ DEF_TUNE (X86_TUNE_PARTIAL_FLAG_REG_STALL, "partial_flag_reg_stall",
|
|||
partial dependencies. */
|
||||
DEF_TUNE (X86_TUNE_MOVX, "movx",
|
||||
m_PPRO | m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE
|
||||
| m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_KNL | m_KNM | m_INTEL
|
||||
| m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_INTEL
|
||||
| m_GOLDMONT_PLUS | m_GEODE | m_AMD_MULTIPLE | m_ZHAOXIN
|
||||
| m_CORE_AVX2 | m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM | m_GENERIC)
|
||||
|
||||
|
@ -120,8 +120,8 @@ DEF_TUNE (X86_TUNE_MOVX, "movx",
|
|||
full sized loads. */
|
||||
DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall",
|
||||
m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL
|
||||
| m_KNL | m_KNM | m_GOLDMONT | m_GOLDMONT_PLUS | m_AMD_MULTIPLE
|
||||
| m_ZHAOXIN | m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM | m_GENERIC)
|
||||
| m_GOLDMONT | m_GOLDMONT_PLUS | m_AMD_MULTIPLE | m_ZHAOXIN
|
||||
| m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM | m_GENERIC)
|
||||
|
||||
/* X86_TUNE_FUSE_CMP_AND_BRANCH_32: Fuse compare with a subsequent
|
||||
conditional jump instruction for 32 bit TARGET. */
|
||||
|
@ -161,7 +161,7 @@ DEF_TUNE (X86_TUNE_FUSE_ALU_AND_BRANCH, "fuse_alu_and_branch",
|
|||
to happen in parallel. */
|
||||
|
||||
DEF_TUNE (X86_TUNE_ACCUMULATE_OUTGOING_ARGS, "accumulate_outgoing_args",
|
||||
m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL
|
||||
m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_INTEL
|
||||
| m_GOLDMONT | m_GOLDMONT_PLUS | m_ATHLON_K8 | m_ZHAOXIN)
|
||||
|
||||
/* X86_TUNE_PROLOGUE_USING_MOVE: Do not use push/pop in prologues that are
|
||||
|
@ -224,8 +224,8 @@ DEF_TUNE (X86_TUNE_PAD_RETURNS, "pad_returns",
|
|||
/* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
|
||||
than 4 branch instructions in the 16 byte window. */
|
||||
DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, "four_jump_limit",
|
||||
m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM
|
||||
| m_GOLDMONT | m_GOLDMONT_PLUS | m_INTEL | m_ATHLON_K8 | m_AMDFAM10)
|
||||
m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_GOLDMONT
|
||||
| m_GOLDMONT_PLUS | m_INTEL | m_ATHLON_K8 | m_AMDFAM10)
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Integer instruction selection tuning */
|
||||
|
@ -253,7 +253,7 @@ DEF_TUNE (X86_TUNE_READ_MODIFY, "read_modify", ~(m_PENT | m_LAKEMONT | m_PPRO))
|
|||
done by GCC generated code. */
|
||||
DEF_TUNE (X86_TUNE_USE_INCDEC, "use_incdec",
|
||||
~(m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE
|
||||
| m_BONNELL | m_SILVERMONT | m_INTEL | m_KNL | m_KNM | m_GOLDMONT
|
||||
| m_BONNELL | m_SILVERMONT | m_INTEL | m_GOLDMONT
|
||||
| m_GOLDMONT_PLUS | m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM
|
||||
| m_ZHAOXIN | m_GENERIC))
|
||||
|
||||
|
@ -261,19 +261,18 @@ DEF_TUNE (X86_TUNE_USE_INCDEC, "use_incdec",
|
|||
for DFmode copies */
|
||||
DEF_TUNE (X86_TUNE_INTEGER_DFMODE_MOVES, "integer_dfmode_moves",
|
||||
~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
|
||||
| m_KNL | m_KNM | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_ZHAOXIN
|
||||
| m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_CORE_HYBRID
|
||||
| m_CORE_ATOM | m_GENERIC))
|
||||
| m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_ZHAOXIN | m_GOLDMONT
|
||||
| m_GOLDMONT_PLUS | m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM
|
||||
| m_GENERIC))
|
||||
|
||||
/* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
|
||||
will impact LEA instruction selection. */
|
||||
DEF_TUNE (X86_TUNE_OPT_AGU, "opt_agu", m_BONNELL | m_SILVERMONT | m_KNL
|
||||
| m_KNM | m_GOLDMONT | m_GOLDMONT_PLUS | m_INTEL | m_ZHAOXIN)
|
||||
DEF_TUNE (X86_TUNE_OPT_AGU, "opt_agu", m_BONNELL | m_SILVERMONT | m_GOLDMONT
|
||||
| m_GOLDMONT_PLUS | m_INTEL | m_ZHAOXIN)
|
||||
|
||||
/* X86_TUNE_AVOID_LEA_FOR_ADDR: Avoid lea for address computation. */
|
||||
DEF_TUNE (X86_TUNE_AVOID_LEA_FOR_ADDR, "avoid_lea_for_addr",
|
||||
m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS
|
||||
| m_KNL | m_KNM)
|
||||
m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS)
|
||||
|
||||
/* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
|
||||
vector path on AMD machines.
|
||||
|
@ -290,8 +289,7 @@ DEF_TUNE (X86_TUNE_SLOW_IMUL_IMM8, "slow_imul_imm8",
|
|||
/* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for
|
||||
a conditional move. */
|
||||
DEF_TUNE (X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE, "avoid_mem_opnd_for_cmove",
|
||||
m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_KNL
|
||||
| m_KNM | m_INTEL)
|
||||
m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_INTEL)
|
||||
|
||||
/* X86_TUNE_SINGLE_STRINGOP: Enable use of single string operations, such
|
||||
as MOVS and STOS (without a REP prefix) to move/set sequences of bytes. */
|
||||
|
@ -316,22 +314,21 @@ DEF_TUNE (X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES,
|
|||
|
||||
/* X86_TUNE_USE_SAHF: Controls use of SAHF. */
|
||||
DEF_TUNE (X86_TUNE_USE_SAHF, "use_sahf",
|
||||
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
|
||||
| m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER
|
||||
| m_BTVER | m_ZNVER | m_ZHAOXIN | m_GOLDMONT | m_GOLDMONT_PLUS
|
||||
| m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM | m_GENERIC)
|
||||
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
|
||||
| m_INTEL | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER
|
||||
| m_ZNVER | m_ZHAOXIN | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT
|
||||
| m_CORE_HYBRID | m_CORE_ATOM | m_GENERIC)
|
||||
|
||||
/* X86_TUNE_USE_CLTD: Controls use of CLTD and CTQO instructions. */
|
||||
DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd",
|
||||
~(m_PENT | m_LAKEMONT | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL
|
||||
~(m_PENT | m_LAKEMONT | m_BONNELL | m_SILVERMONT | m_INTEL
|
||||
| m_K6 | m_GOLDMONT | m_GOLDMONT_PLUS))
|
||||
|
||||
/* X86_TUNE_USE_BT: Enable use of BT (bit test) instructions. */
|
||||
DEF_TUNE (X86_TUNE_USE_BT, "use_bt",
|
||||
m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL
|
||||
| m_LAKEMONT | m_AMD_MULTIPLE | m_ZHAOXIN | m_GOLDMONT
|
||||
| m_GOLDMONT_PLUS | m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM
|
||||
| m_GENERIC)
|
||||
m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL | m_LAKEMONT
|
||||
| m_AMD_MULTIPLE | m_ZHAOXIN | m_GOLDMONT | m_GOLDMONT_PLUS
|
||||
| m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM | m_GENERIC)
|
||||
|
||||
/* X86_TUNE_AVOID_FALSE_DEP_FOR_BMI: Avoid false dependency
|
||||
for bit-manipulation instructions. */
|
||||
|
@ -349,9 +346,8 @@ DEF_TUNE (X86_TUNE_ADJUST_UNROLL, "adjust_unroll_factor", m_BDVER3 | m_BDVER4)
|
|||
/* X86_TUNE_ONE_IF_CONV_INSNS: Restrict a number of cmov insns in
|
||||
if-converted sequence to one. */
|
||||
DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn",
|
||||
m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_CORE_ALL | m_GOLDMONT
|
||||
| m_GOLDMONT_PLUS | m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM
|
||||
| m_ZHAOXIN | m_GENERIC)
|
||||
m_SILVERMONT | m_INTEL | m_CORE_ALL | m_GOLDMONT | m_GOLDMONT_PLUS
|
||||
| m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM | m_ZHAOXIN | m_GENERIC)
|
||||
|
||||
/* X86_TUNE_AVOID_MFENCE: Use lock prefixed instructions instead of mfence. */
|
||||
DEF_TUNE (X86_TUNE_AVOID_MFENCE, "avoid_mfence",
|
||||
|
@ -362,8 +358,7 @@ DEF_TUNE (X86_TUNE_AVOID_MFENCE, "avoid_mfence",
|
|||
generating instructions for abs (x) = (((signed) x >> (W-1) ^ x) -
|
||||
(signed) x >> (W-1)) instead of cmove or SSE max/abs instructions. */
|
||||
DEF_TUNE (X86_TUNE_EXPAND_ABS, "expand_abs",
|
||||
m_CORE_ALL | m_SILVERMONT | m_KNL | m_KNM | m_GOLDMONT
|
||||
| m_GOLDMONT_PLUS | m_ZHAOXIN)
|
||||
m_CORE_ALL | m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_ZHAOXIN)
|
||||
|
||||
/*****************************************************************************/
|
||||
/* 387 instruction selection tuning */
|
||||
|
@ -379,9 +374,9 @@ DEF_TUNE (X86_TUNE_USE_HIMODE_FIOP, "use_himode_fiop",
|
|||
integer operand. */
|
||||
DEF_TUNE (X86_TUNE_USE_SIMODE_FIOP, "use_simode_fiop",
|
||||
~(m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL
|
||||
| m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_AMD_MULTIPLE
|
||||
| m_ZHAOXIN | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT
|
||||
| m_CORE_HYBRID | m_CORE_ATOM | m_GENERIC))
|
||||
| m_SILVERMONT | m_INTEL | m_AMD_MULTIPLE | m_ZHAOXIN | m_GOLDMONT
|
||||
| m_GOLDMONT_PLUS | m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM
|
||||
| m_GENERIC))
|
||||
|
||||
/* X86_TUNE_USE_FFREEP: Use freep instruction instead of fstp. */
|
||||
DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE | m_ZHAOXIN)
|
||||
|
@ -389,9 +384,9 @@ DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE | m_ZHAOXIN)
|
|||
/* X86_TUNE_EXT_80387_CONSTANTS: Use fancy 80387 constants, such as PI. */
|
||||
DEF_TUNE (X86_TUNE_EXT_80387_CONSTANTS, "ext_80387_constants",
|
||||
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
|
||||
| m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_ATHLON_K8 | m_ZHAOXIN
|
||||
| m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_CORE_HYBRID
|
||||
| m_CORE_ATOM | m_GENERIC)
|
||||
| m_INTEL | m_K6_GEODE | m_ATHLON_K8 | m_ZHAOXIN | m_GOLDMONT
|
||||
| m_GOLDMONT_PLUS | m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM
|
||||
| m_GENERIC)
|
||||
|
||||
/*****************************************************************************/
|
||||
/* SSE instruction selection tuning */
|
||||
|
@ -405,15 +400,15 @@ DEF_TUNE (X86_TUNE_GENERAL_REGS_SSE_SPILL, "general_regs_sse_spill",
|
|||
/* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL: Use movups for misaligned loads instead
|
||||
of a sequence loading registers by parts. */
|
||||
DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal",
|
||||
m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_SILVERMONT | m_KNL | m_KNM
|
||||
| m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_CORE_HYBRID
|
||||
m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_SILVERMONT | m_INTEL
|
||||
| m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_CORE_HYBRID
|
||||
| m_CORE_ATOM | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER | m_ZHAOXIN
|
||||
| m_GENERIC)
|
||||
|
||||
/* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL: Use movups for misaligned stores
|
||||
instead of a sequence loading registers by parts. */
|
||||
DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal",
|
||||
m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_SILVERMONT | m_KNL | m_KNM
|
||||
m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_SILVERMONT
|
||||
| m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_CORE_HYBRID
|
||||
| m_CORE_ATOM | m_BDVER | m_ZNVER | m_ZHAOXIN | m_GENERIC)
|
||||
|
||||
|
@ -457,8 +452,7 @@ DEF_TUNE (X86_TUNE_INTER_UNIT_CONVERSIONS, "inter_unit_conversions",
|
|||
/* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for
|
||||
fp converts to destination register. */
|
||||
DEF_TUNE (X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS, "split_mem_opnd_for_fp_converts",
|
||||
m_SILVERMONT | m_KNL | m_KNM | m_GOLDMONT | m_GOLDMONT_PLUS
|
||||
| m_INTEL)
|
||||
m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_INTEL)
|
||||
|
||||
/* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
|
||||
from FP to FP. This form of instructions avoids partial write to the
|
||||
|
@ -472,8 +466,7 @@ DEF_TUNE (X86_TUNE_USE_VECTOR_CONVERTS, "use_vector_converts", m_AMDFAM10)
|
|||
|
||||
/* X86_TUNE_SLOW_SHUFB: Indicates tunings with slow pshufb instruction. */
|
||||
DEF_TUNE (X86_TUNE_SLOW_PSHUFB, "slow_pshufb",
|
||||
m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_GOLDMONT
|
||||
| m_GOLDMONT_PLUS | m_INTEL)
|
||||
m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_INTEL)
|
||||
|
||||
/* X86_TUNE_AVOID_4BYTE_PREFIXES: Avoid instructions requiring 4+ bytes of prefixes. */
|
||||
DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes",
|
||||
|
@ -708,10 +701,6 @@ DEF_TUNE (X86_TUNE_QIMODE_MATH, "qimode_math", m_ALL)
|
|||
is usually used for RISC targets. */
|
||||
DEF_TUNE (X86_TUNE_PROMOTE_QI_REGS, "promote_qi_regs", m_NONE)
|
||||
|
||||
/* X86_TUNE_EMIT_VZEROUPPER: This enables vzeroupper instruction insertion
|
||||
before a transfer of control flow out of the function. */
|
||||
DEF_TUNE (X86_TUNE_EMIT_VZEROUPPER, "emit_vzeroupper", ~m_KNL)
|
||||
|
||||
/* X86_TUNE_SLOW_STC: This disables use of stc, clc and cmc carry flag
|
||||
modifications on architectures where theses operations are slow. */
|
||||
DEF_TUNE (X86_TUNE_SLOW_STC, "slow_stc", m_PENT4)
|
||||
|
|
|
@ -40,7 +40,6 @@ enum _mm_hint
|
|||
_MM_HINT_IT1 = 18,
|
||||
/* _MM_HINT_ET is _MM_HINT_T with set 3rd bit. */
|
||||
_MM_HINT_ET0 = 7,
|
||||
_MM_HINT_ET1 = 6,
|
||||
_MM_HINT_T0 = 3,
|
||||
_MM_HINT_T1 = 2,
|
||||
_MM_HINT_T2 = 1,
|
||||
|
|
|
@ -6945,16 +6945,6 @@ Enable/disable the generation of the AVX instructions.
|
|||
@itemx no-avx2
|
||||
Enable/disable the generation of the AVX2 instructions.
|
||||
|
||||
@cindex @code{target("avx5124fmaps")} function attribute, x86
|
||||
@item avx5124fmaps
|
||||
@itemx no-avx5124fmaps
|
||||
Enable/disable the generation of the AVX5124FMAPS instructions.
|
||||
|
||||
@cindex @code{target("avx5124vnniw")} function attribute, x86
|
||||
@item avx5124vnniw
|
||||
@itemx no-avx5124vnniw
|
||||
Enable/disable the generation of the AVX5124VNNIW instructions.
|
||||
|
||||
@cindex @code{target("avx512bitalg")} function attribute, x86
|
||||
@item avx512bitalg
|
||||
@itemx no-avx512bitalg
|
||||
|
@ -6990,11 +6980,6 @@ Enable/disable the generation of the AVX512F instructions.
|
|||
@itemx no-avx512ifma
|
||||
Enable/disable the generation of the AVX512IFMA instructions.
|
||||
|
||||
@cindex @code{target("avx512pf")} function attribute, x86
|
||||
@item avx512pf
|
||||
@itemx no-avx512pf
|
||||
Enable/disable the generation of the AVX512PF instructions.
|
||||
|
||||
@cindex @code{target("avx512vbmi")} function attribute, x86
|
||||
@item avx512vbmi
|
||||
@itemx no-avx512vbmi
|
||||
|
@ -7160,11 +7145,6 @@ Enable/disable the generation of the PKU instructions.
|
|||
@itemx no-popcnt
|
||||
Enable/disable the generation of the POPCNT instruction.
|
||||
|
||||
@cindex @code{target("prefetchwt1")} function attribute, x86
|
||||
@item prefetchwt1
|
||||
@itemx no-prefetchwt1
|
||||
Enable/disable the generation of the PREFETCHWT1 instructions.
|
||||
|
||||
@cindex @code{target("prfchw")} function attribute, x86
|
||||
@item prfchw
|
||||
@itemx no-prfchw
|
||||
|
@ -26167,12 +26147,6 @@ Intel Atom Grand Ridge CPU.
|
|||
@item clearwaterforest
|
||||
Intel Atom Clearwater Forest CPU.
|
||||
|
||||
@item knl
|
||||
Intel Knights Landing CPU.
|
||||
|
||||
@item knm
|
||||
Intel Knights Mill CPU.
|
||||
|
||||
@item lujiazui
|
||||
ZHAOXIN lujiazui CPU.
|
||||
|
||||
|
@ -26301,18 +26275,10 @@ AVX512BW instructions.
|
|||
AVX512DQ instructions.
|
||||
@item avx512cd
|
||||
AVX512CD instructions.
|
||||
@item avx512er
|
||||
AVX512ER instructions.
|
||||
@item avx512pf
|
||||
AVX512PF instructions.
|
||||
@item avx512vbmi
|
||||
AVX512VBMI instructions.
|
||||
@item avx512ifma
|
||||
AVX512IFMA instructions.
|
||||
@item avx5124vnniw
|
||||
AVX5124VNNIW instructions.
|
||||
@item avx5124fmaps
|
||||
AVX5124FMAPS instructions.
|
||||
@item avx512vpopcntdq
|
||||
AVX512VPOPCNTDQ instructions.
|
||||
@item avx512vbmi2
|
||||
|
|
|
@ -1459,17 +1459,17 @@ See RS/6000 and PowerPC Options.
|
|||
-mmove-max=@var{bits} -mstore-max=@var{bits}
|
||||
-mnoreturn-no-callee-saved-registers
|
||||
-mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx
|
||||
-mavx2 -mavx512f -mavx512pf -mavx512er -mavx512cd -mavx512vl
|
||||
-mavx2 -mavx512f -mavx512cd -mavx512vl
|
||||
-mavx512bw -mavx512dq -mavx512ifma -mavx512vbmi -msha -maes
|
||||
-mpclmul -mfsgsbase -mrdrnd -mf16c -mfma -mpconfig -mwbnoinvd
|
||||
-mptwrite -mprefetchwt1 -mclflushopt -mclwb -mxsavec -mxsaves
|
||||
-mptwrite -mclflushopt -mclwb -mxsavec -mxsaves
|
||||
-msse4a -m3dnow -m3dnowa -mpopcnt -mabm -mbmi -mtbm -mfma4 -mxop
|
||||
-madx -mlzcnt -mbmi2 -mfxsr -mxsave -mxsaveopt -mrtm -mhle -mlwp
|
||||
-mmwaitx -mclzero -mpku -mthreads -mgfni -mvaes -mwaitpkg
|
||||
-mshstk -mmanual-endbr -mcet-switch -mforce-indirect-call
|
||||
-mavx512vbmi2 -mavx512bf16 -menqcmd
|
||||
-mvpclmulqdq -mavx512bitalg -mmovdiri -mmovdir64b -mavx512vpopcntdq
|
||||
-mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid
|
||||
-mavx512vnni -mprfchw -mrdpid
|
||||
-mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk
|
||||
-mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset -mavxvnni
|
||||
-mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16
|
||||
|
@ -34475,19 +34475,6 @@ LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI,
|
|||
ENQCMD, UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16,
|
||||
SHA512, SM3, SM4, USER_MSR and PREFETCHI instruction set support.
|
||||
|
||||
@item knl
|
||||
Intel Knight's Landing CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
|
||||
SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE,
|
||||
RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW,
|
||||
AVX512PF, AVX512ER, AVX512F, AVX512CD and PREFETCHWT1 instruction set support.
|
||||
|
||||
@item knm
|
||||
Intel Knights Mill CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
|
||||
SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE,
|
||||
RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW,
|
||||
AVX512PF, AVX512ER, AVX512F, AVX512CD and PREFETCHWT1, AVX5124VNNIW,
|
||||
AVX5124FMAPS and AVX512VPOPCNTDQ instruction set support.
|
||||
|
||||
@item skylake-avx512
|
||||
Intel Skylake Server CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
|
||||
SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE,
|
||||
|
@ -35249,12 +35236,6 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
|
|||
@opindex mavx512f
|
||||
@itemx -mavx512f
|
||||
@need 200
|
||||
@opindex mavx512pf
|
||||
@itemx -mavx512pf
|
||||
@need 200
|
||||
@opindex mavx512er
|
||||
@itemx -mavx512er
|
||||
@need 200
|
||||
@opindex mavx512cd
|
||||
@itemx -mavx512cd
|
||||
@need 200
|
||||
|
@ -35318,9 +35299,6 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
|
|||
@opindex mrdpid
|
||||
@itemx -mrdpid
|
||||
@need 200
|
||||
@opindex mprefetchwt1
|
||||
@itemx -mprefetchwt1
|
||||
@need 200
|
||||
@opindex mrdseed
|
||||
@itemx -mrdseed
|
||||
@need 200
|
||||
|
@ -35434,18 +35412,12 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
|
|||
@opindex mavx512vp2intersect
|
||||
@itemx -mavx512vp2intersect
|
||||
@need 200
|
||||
@opindex mavx5124fmaps
|
||||
@itemx -mavx5124fmaps
|
||||
@need 200
|
||||
@opindex mavx512vnni
|
||||
@itemx -mavx512vnni
|
||||
@need 200
|
||||
@opindex mavxvnni
|
||||
@itemx -mavxvnni
|
||||
@need 200
|
||||
@opindex mavx5124vnniw
|
||||
@itemx -mavx5124vnniw
|
||||
@need 200
|
||||
@opindex mcldemote
|
||||
@itemx -mcldemote
|
||||
@need 200
|
||||
|
@ -35520,18 +35492,17 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
|
|||
@opindex mavx10.1-512
|
||||
@itemx -mavx10.1-512
|
||||
These switches enable the use of instructions in the MMX, SSE,
|
||||
AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA,
|
||||
AES, PCLMUL, CLFLUSHOPT, CLWB, FSGSBASE, PTWRITE, RDRND, F16C, FMA, PCONFIG,
|
||||
WBNOINVD, FMA4, PREFETCHW, RDPID, PREFETCHWT1, RDSEED, SGX, XOP, LWP,
|
||||
3DNow!@:, enhanced 3DNow!@:, POPCNT, ABM, ADX, BMI, BMI2, LZCNT, FXSR, XSAVE,
|
||||
XSAVEOPT, XSAVEC, XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2,
|
||||
GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16,
|
||||
ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE,
|
||||
UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512-FP16,
|
||||
AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AMX-FP16, PREFETCHI, RAOINT,
|
||||
AMX-COMPLEX, AVXVNNIINT16, SM3, SHA512, SM4, APX_F, USER_MSR, AVX10.1 or
|
||||
CLDEMOTE extended instruction sets. Each has a corresponding @option{-mno-}
|
||||
option to disable use of these instructions.
|
||||
AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA, AES,
|
||||
PCLMUL, CLFLUSHOPT, CLWB, FSGSBASE, PTWRITE, RDRND, F16C, FMA, PCONFIG,
|
||||
WBNOINVD, FMA4, PREFETCHW, RDPID, RDSEED, SGX, XOP, LWP, 3DNow!@:,
|
||||
enhanced 3DNow!@:, POPCNT, ABM, ADX, BMI, BMI2, LZCNT, FXSR, XSAVE, XSAVEOPT,
|
||||
XSAVEC, XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2, GFNI, VAES,
|
||||
WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16, ENQCMD,
|
||||
AVX512VPOPCNTDQ, AVX512VNNI, SERIALIZE, UINTR, HRESET, AMXTILE, AMXINT8,
|
||||
AMXBF16, KL, WIDEKL, AVXVNNI, AVX512-FP16, AVXIFMA, AVXVNNIINT8, AVXNECONVERT,
|
||||
CMPCCXADD, AMX-FP16, PREFETCHI, RAOINT, AMX-COMPLEX, AVXVNNIINT16, SM3, SHA512,
|
||||
SM4, APX_F, USER_MSR, AVX10.1 or CLDEMOTE extended instruction sets. Each has
|
||||
a corresponding @option{-mno-} option to disable use of these instructions.
|
||||
|
||||
These extensions are also available as built-in functions: see
|
||||
@ref{x86 Built-in Functions}, for details of the functions enabled and
|
||||
|
|
|
@ -1,10 +1,5 @@
|
|||
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4" } */
|
||||
/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-warning "AVX5124FMAPS support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-warning "AVX5124VNNIW support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-warning "PREFETCHWT1 support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512cd -msha -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4" } */
|
||||
|
||||
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
|
||||
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
|
||||
|
|
|
@ -1,10 +1,5 @@
|
|||
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4" } */
|
||||
/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-warning "AVX5124FMAPS support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-warning "AVX5124VNNIW support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-warning "PREFETCHWT1 support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512cd -msha -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4" } */
|
||||
|
||||
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
|
||||
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
|
||||
|
|
|
@ -5,12 +5,8 @@ void __attribute__ ((target("avx512vl"))) foo () {} /* { dg-error "ISA '\[^\n\r\
|
|||
void __attribute__ ((target("avx512bw"))) foo () {} /* { dg-error "ISA '\[^\n\r\]*' is not supported in 'target' attribute, use 'arch=' syntax" } */
|
||||
void __attribute__ ((target("avx512dq"))) foo () {} /* { dg-error "ISA '\[^\n\r\]*' is not supported in 'target' attribute, use 'arch=' syntax" } */
|
||||
void __attribute__ ((target("avx512cd"))) foo () {} /* { dg-error "ISA '\[^\n\r\]*' is not supported in 'target' attribute, use 'arch=' syntax" } */
|
||||
void __attribute__ ((target("avx512er"))) foo () {} /* { dg-error "ISA '\[^\n\r\]*' is not supported in 'target' attribute, use 'arch=' syntax" } */
|
||||
void __attribute__ ((target("avx512pf"))) foo () {} /* { dg-error "ISA '\[^\n\r\]*' is not supported in 'target' attribute, use 'arch=' syntax" } */
|
||||
void __attribute__ ((target("avx512vbmi"))) foo () {} /* { dg-error "ISA '\[^\n\r\]*' is not supported in 'target' attribute, use 'arch=' syntax" } */
|
||||
void __attribute__ ((target("avx512ifma"))) foo () {} /* { dg-error "ISA '\[^\n\r\]*' is not supported in 'target' attribute, use 'arch=' syntax" } */
|
||||
void __attribute__ ((target("avx5124vnniw"))) foo () {} /* { dg-error "ISA '\[^\n\r\]*' is not supported in 'target' attribute, use 'arch=' syntax" } */
|
||||
void __attribute__ ((target("avx5124fmaps"))) foo () {} /* { dg-error "ISA '\[^\n\r\]*' is not supported in 'target' attribute, use 'arch=' syntax" } */
|
||||
void __attribute__ ((target("avx512vpopcntdq"))) foo () {} /* { dg-error "ISA '\[^\n\r\]*' is not supported in 'target' attribute, use 'arch=' syntax" } */
|
||||
void __attribute__ ((target("avx512vbmi2"))) foo () {} /* { dg-error "ISA '\[^\n\r\]*' is not supported in 'target' attribute, use 'arch=' syntax" } */
|
||||
void __attribute__ ((target("gfni"))) foo () {} /* { dg-error "ISA '\[^\n\r\]*' is not supported in 'target' attribute, use 'arch=' syntax" } */
|
||||
|
|
|
@ -1,19 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mavx5124fmaps" } */
|
||||
/* { dg-warning "AVX5124FMAPS support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-times "v4fmaddps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "v4fmaddps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "v4fmaddps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
__m512 a, b, c, d, e, f, g, x1, x2, x3;
|
||||
__m128 *mem;
|
||||
__mmask16 m;
|
||||
|
||||
int foo ()
|
||||
{
|
||||
x1 = _mm512_4fmadd_ps (a, b, c, d, e, mem);
|
||||
x2 = _mm512_mask_4fmadd_ps (a, m, b, c, d, e, mem);
|
||||
x3 = _mm512_maskz_4fmadd_ps (m, a, b, c, d, e, mem);
|
||||
}
|
|
@ -1,71 +0,0 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -mavx5124fmaps" } */
|
||||
/* { dg-require-effective-target avx5124fmaps } */
|
||||
/* { dg-warning "AVX5124FMAPS support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
|
||||
#define ESP_FLOAT 1.0
|
||||
|
||||
#define AVX5124FMAPS
|
||||
#include "avx512f-helper.h"
|
||||
|
||||
#define SIZE (AVX512F_LEN / 32)
|
||||
|
||||
#include "avx512f-mask-type.h"
|
||||
|
||||
void
|
||||
CALC (float *src1, float* src2, float *src3,
|
||||
float *src4, float* prev_dst, float *mult, float *dst)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SIZE; i++)
|
||||
{
|
||||
dst[i] = (double)prev_dst[i]
|
||||
+ (double)src1[i] * (double)mult[0]
|
||||
+ (double)src2[i] * (double)mult[1]
|
||||
+ (double)src3[i] * (double)mult[2]
|
||||
+ (double)src4[i] * (double)mult[3];
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
TEST (void)
|
||||
{
|
||||
int i, sign;
|
||||
UNION_TYPE (AVX512F_LEN,) src1, src2, src3, src4, src5, dst, res1, res2, res3;
|
||||
UNION_TYPE (128,) mult;
|
||||
MASK_TYPE mask = MASK_VALUE;
|
||||
float res_ref[SIZE];
|
||||
|
||||
sign = -1;
|
||||
for (i = 0; i < SIZE; i++)
|
||||
{
|
||||
src1.a[i] = 1.5 + 34.67 * i * sign;
|
||||
src2.a[i] = -22.17 * i * sign;
|
||||
src3.a[i] = src1.a[i] * src1.a[i];
|
||||
src4.a[i] = src2.a[i] * src2.a[i];
|
||||
sign = sign * -1;
|
||||
}
|
||||
for (i = 0; i < 4; i++)
|
||||
mult.a[i] = 3.1415 + i * 2.71828;
|
||||
|
||||
for (i = 0; i < SIZE; i++)
|
||||
src5.a[i] = DEFAULT_VALUE;
|
||||
|
||||
CALC (src1.a, src2.a, src3.a, src4.a, src5.a, mult.a, res_ref);
|
||||
|
||||
res1.x = INTRINSIC (_4fmadd_ps) ( src5.x, src1.x, src2.x, src3.x, src4.x, &mult.x);
|
||||
res2.x = INTRINSIC (_mask_4fmadd_ps) (src5.x, mask, src1.x, src2.x, src3.x, src4.x, &mult.x);
|
||||
res3.x = INTRINSIC (_maskz_4fmadd_ps) (mask, src5.x, src1.x, src2.x, src3.x, src4.x, &mult.x);
|
||||
|
||||
if (UNION_FP_CHECK (AVX512F_LEN,) (res1, res_ref))
|
||||
abort ();
|
||||
|
||||
MASK_MERGE () (res_ref, mask, SIZE);
|
||||
if (UNION_FP_CHECK (AVX512F_LEN,) (res2, res_ref))
|
||||
abort ();
|
||||
|
||||
MASK_ZERO () (res_ref, mask, SIZE);
|
||||
if (UNION_FP_CHECK (AVX512F_LEN,) (res3, res_ref))
|
||||
abort ();
|
||||
}
|
|
@ -1,19 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mavx5124fmaps" } */
|
||||
/* { dg-warning "AVX5124FMAPS support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-times "v4fmaddss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "v4fmaddss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "v4fmaddss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
__m128 a, b, c, d, e, f, x1, x2, x3;
|
||||
__m128 *mem;
|
||||
__mmask8 m;
|
||||
|
||||
int foo ()
|
||||
{
|
||||
x1 = _mm_4fmadd_ss (a, b, c, d, e, mem);
|
||||
x2 = _mm_mask_4fmadd_ss (a, m, b, c, d, e, mem);
|
||||
x3 = _mm_maskz_4fmadd_ss (m, a, b, c, d, e, mem);
|
||||
}
|
|
@ -1,19 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mavx5124fmaps" } */
|
||||
/* { dg-warning "AVX5124FMAPS support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-times "v4fnmaddps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "v4fnmaddps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "v4fnmaddps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
__m512 a, b, c, d, e, f, g, x1, x2, x3;
|
||||
__m128 *mem;
|
||||
__mmask16 m;
|
||||
|
||||
int foo ()
|
||||
{
|
||||
x1 = _mm512_4fnmadd_ps (a, b, c, d, e, mem);
|
||||
x2 = _mm512_mask_4fnmadd_ps (a, m, b, c, d, e, mem);
|
||||
x3 = _mm512_maskz_4fnmadd_ps (m, a, b, c, d, e, mem);
|
||||
}
|
|
@ -1,71 +0,0 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -mavx5124fmaps" } */
|
||||
/* { dg-require-effective-target avx5124fmaps } */
|
||||
/* { dg-warning "AVX5124FMAPS support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
|
||||
#define ESP_FLOAT 1.0
|
||||
|
||||
#define AVX5124FMAPS
|
||||
#include "avx512f-helper.h"
|
||||
|
||||
#define SIZE (AVX512F_LEN / 32)
|
||||
|
||||
#include "avx512f-mask-type.h"
|
||||
|
||||
void
|
||||
CALC (float *src1, float* src2, float *src3,
|
||||
float *src4, float* prev_dst, float *mult, float *dst)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SIZE; i++)
|
||||
{
|
||||
dst[i] = (double)prev_dst[i]
|
||||
- (double)src1[i] * (double)mult[0]
|
||||
- (double)src2[i] * (double)mult[1]
|
||||
- (double)src3[i] * (double)mult[2]
|
||||
- (double)src4[i] * (double)mult[3];
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
TEST (void)
|
||||
{
|
||||
int i, sign;
|
||||
UNION_TYPE (AVX512F_LEN,) src1, src2, src3, src4, src5, dst, res1, res2, res3;
|
||||
UNION_TYPE (128,) mult;
|
||||
MASK_TYPE mask = MASK_VALUE;
|
||||
float res_ref[SIZE];
|
||||
|
||||
sign = -1;
|
||||
for (i = 0; i < SIZE; i++)
|
||||
{
|
||||
src1.a[i] = 1.5 + 34.67 * i * sign;
|
||||
src2.a[i] = -22.17 * i * sign;
|
||||
src3.a[i] = src1.a[i] * src1.a[i];
|
||||
src4.a[i] = src2.a[i] * src2.a[i];
|
||||
sign = sign * -1;
|
||||
}
|
||||
for (i = 0; i < 4; i++)
|
||||
mult.a[i] = 3.1415 + i * 2.71828;
|
||||
|
||||
for (i = 0; i < SIZE; i++)
|
||||
src5.a[i] = DEFAULT_VALUE;
|
||||
|
||||
CALC (src1.a, src2.a, src3.a, src4.a, src5.a, mult.a, res_ref);
|
||||
|
||||
res1.x = INTRINSIC (_4fnmadd_ps) ( src5.x, src1.x, src2.x, src3.x, src4.x, &mult.x);
|
||||
res2.x = INTRINSIC (_mask_4fnmadd_ps) (src5.x, mask, src1.x, src2.x, src3.x, src4.x, &mult.x);
|
||||
res3.x = INTRINSIC (_maskz_4fnmadd_ps) (mask, src5.x, src1.x, src2.x, src3.x, src4.x, &mult.x);
|
||||
|
||||
if (UNION_FP_CHECK (AVX512F_LEN,) (res1, res_ref))
|
||||
abort ();
|
||||
|
||||
MASK_MERGE () (res_ref, mask, SIZE);
|
||||
if (UNION_FP_CHECK (AVX512F_LEN,) (res2, res_ref))
|
||||
abort ();
|
||||
|
||||
MASK_ZERO () (res_ref, mask, SIZE);
|
||||
if (UNION_FP_CHECK (AVX512F_LEN,) (res3, res_ref))
|
||||
abort ();
|
||||
}
|
|
@ -1,20 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mavx5124fmaps" } */
|
||||
/* { dg-warning "AVX5124FMAPS support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-times "v4fnmaddss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "v4fnmaddss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "v4fnmaddss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
__m128 a, b, c, d, e, f, x1, x2, x3;
|
||||
__m128 *mem;
|
||||
__mmask8 m;
|
||||
|
||||
int foo ()
|
||||
{
|
||||
x1 = _mm_4fnmadd_ss (a, b, c, d, e, mem);
|
||||
x2 = _mm_mask_4fnmadd_ss (a, m, b, c, d, e, mem);
|
||||
x3 = _mm_maskz_4fnmadd_ss (m, a, b, c, d, e, mem);
|
||||
}
|
|
@ -1,19 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mavx5124vnniw" } */
|
||||
/* { dg-warning "AVX5124VNNIW support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-times "vp4dpwssd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vp4dpwssd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vp4dpwssd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
__m512i a, b, c, d, e, f, g, x1, x2, x3;
|
||||
__m128i *mem;
|
||||
__mmask16 m;
|
||||
|
||||
int foo ()
|
||||
{
|
||||
x1 = _mm512_4dpwssd_epi32 (a, b, c, d, e, mem);
|
||||
x2 = _mm512_mask_4dpwssd_epi32 (a, m, b, c, d, e, mem);
|
||||
x3 = _mm512_maskz_4dpwssd_epi32 (m, a, b, c, d, e, mem);
|
||||
}
|
|
@ -1,80 +0,0 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -mavx5124vnniw" } */
|
||||
/* { dg-require-effective-target avx5124vnniw } */
|
||||
/* { dg-warning "AVX5124VNNIW support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
|
||||
#define AVX5124VNNIW
|
||||
#include "avx512f-helper.h"
|
||||
|
||||
#define SIZE (AVX512F_LEN / 32)
|
||||
|
||||
#include "avx512f-mask-type.h"
|
||||
|
||||
void
|
||||
CALC (short *src1, short* src2, short *src3,
|
||||
short *src4, int* prev_dst, short *mult, int *dst)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SIZE; i++)
|
||||
{
|
||||
int p1dword, p2dword;
|
||||
dst[i] = prev_dst[i];
|
||||
p1dword = (int)(src1[2*i ]) * (int)(mult[0]);
|
||||
p2dword = (int)(src1[2*i+1]) * (int)(mult[1]);
|
||||
dst[i] += p1dword + p2dword;
|
||||
|
||||
p1dword = (int)(src2[2*i ]) * (int)(mult[2]);
|
||||
p2dword = (int)(src2[2*i+1]) * (int)(mult[3]);
|
||||
dst[i] += p1dword + p2dword;
|
||||
|
||||
p1dword = (int)(src3[2*i ]) * (int)(mult[4]);
|
||||
p2dword = (int)(src3[2*i+1]) * (int)(mult[5]);
|
||||
dst[i] += p1dword + p2dword;
|
||||
|
||||
p1dword = (int)(src4[2*i ]) * (int)(mult[6]);
|
||||
p2dword = (int)(src4[2*i+1]) * (int)(mult[7]);
|
||||
dst[i] += p1dword + p2dword;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
TEST (void)
|
||||
{
|
||||
int i;
|
||||
UNION_TYPE (AVX512F_LEN, i_w) src1, src2, src3, src4;
|
||||
UNION_TYPE (AVX512F_LEN, i_d) src5, dst, res1, res2, res3;
|
||||
UNION_TYPE (128, i_w) mult;
|
||||
MASK_TYPE mask = MASK_VALUE;
|
||||
int res_ref[SIZE];
|
||||
|
||||
for (i = 0; i < SIZE * 2; i++)
|
||||
{
|
||||
src1.a[i] = 2 + 7 * i % 291;
|
||||
src2.a[i] = 3 + 11 * (i % 377) * i;
|
||||
src3.a[i] = src1.a[i] * src1.a[i];
|
||||
src4.a[i] = src2.a[i] * src2.a[i];
|
||||
}
|
||||
for (i = 0; i < 8; i++)
|
||||
mult.a[i] = 3 + i * 2;
|
||||
|
||||
for (i = 0; i < SIZE; i++)
|
||||
src5.a[i] = DEFAULT_VALUE;
|
||||
|
||||
CALC (src1.a, src2.a, src3.a, src4.a, src5.a, mult.a, res_ref);
|
||||
|
||||
res1.x = INTRINSIC (_4dpwssd_epi32) ( src5.x, src1.x, src2.x, src3.x, src4.x, &mult.x);
|
||||
res2.x = INTRINSIC (_mask_4dpwssd_epi32) (src5.x, mask, src1.x, src2.x, src3.x, src4.x, &mult.x);
|
||||
res3.x = INTRINSIC (_maskz_4dpwssd_epi32) (mask, src5.x, src1.x, src2.x, src3.x, src4.x, &mult.x);
|
||||
|
||||
if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
|
||||
abort ();
|
||||
|
||||
MASK_MERGE (i_d) (res_ref, mask, SIZE);
|
||||
if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
|
||||
abort ();
|
||||
|
||||
MASK_ZERO (i_d) (res_ref, mask, SIZE);
|
||||
if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
|
||||
abort ();
|
||||
}
|
|
@ -1,19 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mavx5124vnniw" } */
|
||||
/* { dg-warning "AVX5124VNNIW support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-times "vp4dpwssds\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vp4dpwssds\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vp4dpwssds\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
__m512i a, b, c, d, e, f, g, x1, x2, x3;
|
||||
__m128i *mem;
|
||||
__mmask16 m;
|
||||
|
||||
int foo ()
|
||||
{
|
||||
x1 = _mm512_4dpwssds_epi32 (a, b, c, d, e, mem);
|
||||
x2 = _mm512_mask_4dpwssds_epi32 (a, m, b, c, d, e, mem);
|
||||
x3 = _mm512_maskz_4dpwssds_epi32 (m, a, b, c, d, e, mem);
|
||||
}
|
|
@ -1,99 +0,0 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -mavx5124vnniw" } */
|
||||
/* { dg-require-effective-target avx5124vnniw } */
|
||||
/* { dg-warning "AVX5124VNNIW support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
|
||||
#define DEFAULT_VALUE 0x7ffffffe
|
||||
|
||||
#define AVX5124VNNIW
|
||||
#include "avx512f-helper.h"
|
||||
|
||||
#define SIZE (AVX512F_LEN / 32)
|
||||
|
||||
#include "avx512f-mask-type.h"
|
||||
|
||||
void
|
||||
CALC (short *src1, short* src2, short *src3,
|
||||
short *src4, int* prev_dst, short *mult, int *dst)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SIZE; i++)
|
||||
{
|
||||
int p1dword, p2dword;
|
||||
long long int tmp;
|
||||
dst[i] = prev_dst[i];
|
||||
p1dword = (int)(src1[2*i ]) * (int)(mult[0]);
|
||||
p2dword = (int)(src1[2*i+1]) * (int)(mult[1]);
|
||||
tmp = (long long)dst[i] + p1dword + p2dword;
|
||||
if (tmp > 0x7fffffff)
|
||||
dst[i] = 0x7fffffff;
|
||||
else
|
||||
dst[i] += p1dword + p2dword;
|
||||
|
||||
p1dword = (int)(src2[2*i ]) * (int)(mult[2]);
|
||||
p2dword = (int)(src2[2*i+1]) * (int)(mult[3]);
|
||||
tmp = (long long)dst[i] + p1dword + p2dword;
|
||||
if (tmp > 0x7fffffff)
|
||||
dst[i] = 0x7fffffff;
|
||||
else
|
||||
dst[i] += p1dword + p2dword;
|
||||
|
||||
p1dword = (int)(src3[2*i ]) * (int)(mult[4]);
|
||||
p2dword = (int)(src3[2*i+1]) * (int)(mult[5]);
|
||||
tmp = (long long)dst[i] + p1dword + p2dword;
|
||||
if (tmp > 0x7fffffff)
|
||||
dst[i] = 0x7fffffff;
|
||||
else
|
||||
dst[i] += p1dword + p2dword;
|
||||
|
||||
p1dword = (int)(src4[2*i ]) * (int)(mult[6]);
|
||||
p2dword = (int)(src4[2*i+1]) * (int)(mult[7]);
|
||||
tmp = (long long)dst[i] + p1dword + p2dword;
|
||||
if (tmp > 0x7fffffff)
|
||||
dst[i] = 0x7fffffff;
|
||||
else
|
||||
dst[i] += p1dword + p2dword;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
TEST (void)
|
||||
{
|
||||
int i;
|
||||
UNION_TYPE (AVX512F_LEN, i_w) src1, src2, src3, src4;
|
||||
UNION_TYPE (AVX512F_LEN, i_d) src5, dst, res1, res2, res3;
|
||||
UNION_TYPE (128, i_w) mult;
|
||||
MASK_TYPE mask = MASK_VALUE;
|
||||
int res_ref[SIZE];
|
||||
|
||||
for (i = 0; i < SIZE * 2; i++)
|
||||
{
|
||||
src1.a[i] = 2 + 7 * i % 291;
|
||||
src2.a[i] = 3 + 11 * (i % 377) * i;
|
||||
src3.a[i] = src1.a[i] * src1.a[i];
|
||||
src4.a[i] = src2.a[i] * src2.a[i];
|
||||
}
|
||||
for (i = 0; i < 8; i++)
|
||||
mult.a[i] = 3 + i * 2;
|
||||
|
||||
for (i = 0; i < SIZE; i++)
|
||||
src5.a[i] = DEFAULT_VALUE;
|
||||
|
||||
CALC (src1.a, src2.a, src3.a, src4.a, src5.a, mult.a, res_ref);
|
||||
|
||||
res1.x = INTRINSIC (_4dpwssds_epi32) ( src5.x, src1.x, src2.x, src3.x, src4.x, &mult.x);
|
||||
res2.x = INTRINSIC (_mask_4dpwssds_epi32) (src5.x, mask, src1.x, src2.x, src3.x, src4.x, &mult.x);
|
||||
res3.x = INTRINSIC (_maskz_4dpwssds_epi32) (mask, src5.x, src1.x, src2.x, src3.x, src4.x, &mult.x);
|
||||
|
||||
if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
|
||||
abort ();
|
||||
|
||||
MASK_MERGE (i_d) (res_ref, mask, SIZE);
|
||||
if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
|
||||
abort ();
|
||||
|
||||
MASK_ZERO (i_d) (res_ref, mask, SIZE);
|
||||
if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
|
||||
abort ();
|
||||
}
|
|
@ -1,3 +0,0 @@
|
|||
#define AVX512ER
|
||||
#define avx512er_test test_512
|
||||
#include "avx512f-helper.h"
|
|
@ -1,25 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512er -O2" } */
|
||||
/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-times "vexp2pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vexp2pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vexp2pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vexp2pd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\[^\{\]*(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vexp2pd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vexp2pd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
volatile __m512d x;
|
||||
volatile __mmask8 m;
|
||||
|
||||
void extern
|
||||
avx512er_test (void)
|
||||
{
|
||||
x = _mm512_exp2a23_pd (x);
|
||||
x = _mm512_mask_exp2a23_pd (x, m, x);
|
||||
x = _mm512_maskz_exp2a23_pd (m, x);
|
||||
x = _mm512_exp2a23_round_pd (x, _MM_FROUND_NO_EXC);
|
||||
x = _mm512_mask_exp2a23_round_pd (x, m, x, _MM_FROUND_NO_EXC);
|
||||
x = _mm512_maskz_exp2a23_round_pd (m, x, _MM_FROUND_NO_EXC);
|
||||
}
|
|
@ -1,49 +0,0 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx512er } */
|
||||
/* { dg-options "-O2 -mavx512er" } */
|
||||
/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
|
||||
#include "avx512er-check.h"
|
||||
#include "avx512f-mask-type.h"
|
||||
#include "avx512f-helper.h"
|
||||
#include <math.h>
|
||||
|
||||
void static
|
||||
compute_vexp2pd (double *s, double *r)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 8; i++)
|
||||
r[i] = pow (2.0, s[i]);
|
||||
}
|
||||
|
||||
void static
|
||||
avx512er_test (void)
|
||||
{
|
||||
union512d src, res1, res2, res3;
|
||||
__mmask8 mask = MASK_VALUE;
|
||||
double res_ref[8];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
src.a[i] = 179.345 - 6.5645 * i;
|
||||
res2.a[i] = DEFAULT_VALUE;
|
||||
}
|
||||
|
||||
res1.x = _mm512_exp2a23_pd (src.x);
|
||||
res2.x = _mm512_mask_exp2a23_pd (res2.x, mask, src.x);
|
||||
res3.x = _mm512_maskz_exp2a23_pd (mask, src.x);
|
||||
|
||||
compute_vexp2pd (src.a, res_ref);
|
||||
|
||||
if (check_rough_union512d (res1, res_ref, 0.0001))
|
||||
abort ();
|
||||
|
||||
MASK_MERGE (d) (res_ref, mask, 8);
|
||||
if (check_rough_union512d (res2, res_ref, 0.0001))
|
||||
abort ();
|
||||
|
||||
MASK_ZERO (d) (res_ref, mask, 8);
|
||||
if (check_rough_union512d (res3, res_ref, 0.0001))
|
||||
abort ();
|
||||
}
|
|
@ -1,25 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512er -O2" } */
|
||||
/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-times "vexp2ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vexp2ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vexp2ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vexp2ps\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\[^\{\]*(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vexp2ps\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vexp2ps\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
volatile __m512 x;
|
||||
volatile __mmask16 m;
|
||||
|
||||
void extern
|
||||
avx512er_test (void)
|
||||
{
|
||||
x = _mm512_exp2a23_ps (x);
|
||||
x = _mm512_mask_exp2a23_ps (x, m, x);
|
||||
x = _mm512_maskz_exp2a23_ps (m, x);
|
||||
x = _mm512_exp2a23_round_ps (x, _MM_FROUND_NO_EXC);
|
||||
x = _mm512_mask_exp2a23_round_ps (x, m, x, _MM_FROUND_NO_EXC);
|
||||
x = _mm512_maskz_exp2a23_round_ps (m, x, _MM_FROUND_NO_EXC);
|
||||
}
|
|
@ -1,49 +0,0 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx512er } */
|
||||
/* { dg-options "-O2 -mavx512er" } */
|
||||
/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
|
||||
#include "avx512er-check.h"
|
||||
#include "avx512f-mask-type.h"
|
||||
#include "avx512f-helper.h"
|
||||
#include <math.h>
|
||||
|
||||
void static
|
||||
compute_vexp2ps (float *s, float *r)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 16; i++)
|
||||
r[i] = pow (2.0, s[i]);
|
||||
}
|
||||
|
||||
void static
|
||||
avx512er_test (void)
|
||||
{
|
||||
union512 src, res1, res2, res3;
|
||||
__mmask16 mask = MASK_VALUE;
|
||||
float res_ref[16];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
src.a[i] = 79.345 - 6.5645 * i;
|
||||
res2.a[i] = DEFAULT_VALUE;
|
||||
}
|
||||
|
||||
res1.x = _mm512_exp2a23_ps (src.x);
|
||||
res2.x = _mm512_mask_exp2a23_ps (res2.x, mask, src.x);
|
||||
res3.x = _mm512_maskz_exp2a23_ps (mask, src.x);
|
||||
|
||||
compute_vexp2ps (src.a, res_ref);
|
||||
|
||||
if (check_rough_union512 (res1, res_ref, 0.0001))
|
||||
abort ();
|
||||
|
||||
MASK_MERGE ()(res_ref, mask, 16);
|
||||
if (check_rough_union512 (res2, res_ref, 0.0001))
|
||||
abort ();
|
||||
|
||||
MASK_ZERO ()(res_ref, mask, 16);
|
||||
if (check_rough_union512 (res3, res_ref, 0.0001))
|
||||
abort ();
|
||||
}
|
|
@ -1,25 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512er -O2" } */
|
||||
/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-times "vrcp28pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrcp28pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrcp28pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrcp28pd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\[^\{\]*(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrcp28pd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrcp28pd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
volatile __m512d x;
|
||||
volatile __mmask8 m;
|
||||
|
||||
void extern
|
||||
avx512er_test (void)
|
||||
{
|
||||
x = _mm512_rcp28_pd (x);
|
||||
x = _mm512_mask_rcp28_pd (x, m, x);
|
||||
x = _mm512_maskz_rcp28_pd (m, x);
|
||||
x = _mm512_rcp28_round_pd (x, _MM_FROUND_NO_EXC);
|
||||
x = _mm512_mask_rcp28_round_pd (x, m, x, _MM_FROUND_NO_EXC);
|
||||
x = _mm512_maskz_rcp28_round_pd (m, x, _MM_FROUND_NO_EXC);
|
||||
}
|
|
@ -1,48 +0,0 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx512er } */
|
||||
/* { dg-options "-O2 -mavx512er" } */
|
||||
/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
|
||||
#include "avx512er-check.h"
|
||||
#include "avx512f-mask-type.h"
|
||||
#include "avx512f-helper.h"
|
||||
|
||||
void static
|
||||
compute_vrcp28pd (double *s, double *r)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 8; i++)
|
||||
r[i] = 1.0 / s[i];
|
||||
}
|
||||
|
||||
void static
|
||||
avx512er_test (void)
|
||||
{
|
||||
union512d src, res1, res2, res3;
|
||||
__mmask8 mask = MASK_VALUE;
|
||||
double res_ref[8];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
src.a[i] = 179.345 - 6.5645 * i;
|
||||
res2.a[i] = DEFAULT_VALUE;
|
||||
}
|
||||
|
||||
res1.x = _mm512_rcp28_pd (src.x);
|
||||
res2.x = _mm512_mask_rcp28_pd (res2.x, mask, src.x);
|
||||
res3.x = _mm512_maskz_rcp28_pd (mask, src.x);
|
||||
|
||||
compute_vrcp28pd (src.a, res_ref);
|
||||
|
||||
if (check_rough_union512d (res1, res_ref, 0.0001))
|
||||
abort ();
|
||||
|
||||
MASK_MERGE (d) (res_ref, mask, 8);
|
||||
if (check_rough_union512d (res2, res_ref, 0.0001))
|
||||
abort ();
|
||||
|
||||
MASK_ZERO (d) (res_ref, mask, 8);
|
||||
if (check_rough_union512d (res3, res_ref, 0.0001))
|
||||
abort ();
|
||||
}
|
|
@ -1,25 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512er -O2" } */
|
||||
/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-times "vrcp28ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrcp28ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrcp28ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrcp28ps\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\[^\{\]*(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrcp28ps\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrcp28ps\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
volatile __m512 x;
|
||||
volatile __mmask16 m;
|
||||
|
||||
void extern
|
||||
avx512er_test (void)
|
||||
{
|
||||
x = _mm512_rcp28_ps (x);
|
||||
x = _mm512_mask_rcp28_ps (x, m, x);
|
||||
x = _mm512_maskz_rcp28_ps (m, x);
|
||||
x = _mm512_rcp28_round_ps (x, _MM_FROUND_NO_EXC);
|
||||
x = _mm512_mask_rcp28_round_ps (x, m, x, _MM_FROUND_NO_EXC);
|
||||
x = _mm512_maskz_rcp28_round_ps (m, x, _MM_FROUND_NO_EXC);
|
||||
}
|
|
@ -1,48 +0,0 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx512er } */
|
||||
/* { dg-options "-O2 -mavx512er" } */
|
||||
/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
|
||||
#include "avx512er-check.h"
|
||||
#include "avx512f-mask-type.h"
|
||||
#include "avx512f-helper.h"
|
||||
|
||||
void static
|
||||
compute_vrcp28ps (float *s, float *r)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 16; i++)
|
||||
r[i] = 1.0 / s[i];
|
||||
}
|
||||
|
||||
void static
|
||||
avx512er_test (void)
|
||||
{
|
||||
union512 src, res1, res2, res3;
|
||||
__mmask16 mask = MASK_VALUE;
|
||||
float res_ref[16];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
src.a[i] = 179.345 - 6.5645 * i;
|
||||
res2.a[i] = DEFAULT_VALUE;
|
||||
}
|
||||
|
||||
res1.x = _mm512_rcp28_ps (src.x);
|
||||
res2.x = _mm512_mask_rcp28_ps (res2.x, mask, src.x);
|
||||
res3.x = _mm512_maskz_rcp28_ps (mask, src.x);
|
||||
|
||||
compute_vrcp28ps (src.a, res_ref);
|
||||
|
||||
if (check_rough_union512 (res1, res_ref, 0.0001))
|
||||
abort ();
|
||||
|
||||
MASK_MERGE ()(res_ref, mask, 16);
|
||||
if (check_rough_union512 (res2, res_ref, 0.0001))
|
||||
abort ();
|
||||
|
||||
MASK_ZERO ()(res_ref, mask, 16);
|
||||
if (check_rough_union512 (res3, res_ref, 0.0001))
|
||||
abort ();
|
||||
}
|
|
@ -1,52 +0,0 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx512er } */
|
||||
/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512er" } */
|
||||
/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
|
||||
#include "avx512er-check.h"
|
||||
|
||||
#define MAX 1000
|
||||
#define EPS 0.00001
|
||||
|
||||
__attribute__ ((noinline, optimize (0)))
|
||||
void static
|
||||
compute_rcp_ref (float *a, float *b, float *r)
|
||||
{
|
||||
for (int i = 0; i < MAX; i++)
|
||||
r[i] = a[i] / b[i];
|
||||
}
|
||||
|
||||
__attribute__ ((noinline))
|
||||
void static
|
||||
compute_rcp_exp (float *a, float *b, float *r)
|
||||
{
|
||||
for (int i = 0; i < MAX; i++)
|
||||
r[i] = a[i] / b[i];
|
||||
}
|
||||
|
||||
void static
|
||||
avx512er_test (void)
|
||||
{
|
||||
float a[MAX];
|
||||
float b[MAX];
|
||||
float ref[MAX];
|
||||
float exp[MAX];
|
||||
|
||||
for (int i = 0; i < MAX; i++)
|
||||
{
|
||||
a[i] = 179.345 - 6.5645 * i;
|
||||
b[i] = 8765.987 - 8.6756 * i;
|
||||
}
|
||||
|
||||
compute_rcp_ref (a, b, ref);
|
||||
compute_rcp_exp (a, b, exp);
|
||||
|
||||
#pragma GCC novector
|
||||
for (int i = 0; i < MAX; i++)
|
||||
{
|
||||
float rel_err = (ref[i] - exp[i]) / ref[i];
|
||||
rel_err = rel_err > 0.0 ? rel_err : -rel_err;
|
||||
if (rel_err > EPS)
|
||||
abort ();
|
||||
}
|
||||
}
|
|
@ -1,7 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512er" } */
|
||||
/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
|
||||
#include "avx512er-vrcp28ps-3.c"
|
||||
|
||||
/* { dg-final { scan-assembler-times "vrcp28ps\[^\n\r\]*zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
|
@ -1,25 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512er -O2" } */
|
||||
/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-times "vrcp28sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrcp28sd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+\[^\{\]*(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrcp28sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrcp28sd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrcp28sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrcp28sd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
volatile __m128d x, y, z;
|
||||
volatile __mmask8 m;
|
||||
|
||||
void extern
|
||||
avx512er_test (void)
|
||||
{
|
||||
x = _mm_rcp28_sd (x, y);
|
||||
x = _mm_rcp28_round_sd (x, y, _MM_FROUND_NO_EXC);
|
||||
x = _mm_mask_rcp28_sd (z, m, x, y);
|
||||
x = _mm_mask_rcp28_round_sd (z, m, x, y, _MM_FROUND_NO_EXC);
|
||||
x = _mm_maskz_rcp28_sd (m, x, y);
|
||||
x = _mm_maskz_rcp28_round_sd (m, x, y, _MM_FROUND_NO_EXC);
|
||||
}
|
|
@ -1,59 +0,0 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx512er } */
|
||||
/* { dg-options "-O2 -mavx512er" } */
|
||||
/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
|
||||
#include "avx512er-check.h"
|
||||
#include "avx512f-mask-type.h"
|
||||
#include "avx512f-helper.h"
|
||||
#include <math.h>
|
||||
|
||||
#define IMM 0x23
|
||||
|
||||
void static
|
||||
avx512er_test (void)
|
||||
{
|
||||
union128d src1, src2, res, res1, res2, res3, res4;
|
||||
double res_ref[2];
|
||||
MASK_TYPE mask = MASK_VALUE;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
src1.a[i] = 179.345 - 6.5645 * i;
|
||||
src2.a[i] = 204179.345 + 6.5645 * i;
|
||||
res_ref[i] = src1.a[i];
|
||||
}
|
||||
|
||||
res_ref[0] = 1.0 / src2.a[0];
|
||||
|
||||
res.x = _mm_rcp28_round_sd (src1.x, src2.x, _MM_FROUND_NO_EXC);
|
||||
res1.x = _mm_mask_rcp28_sd (src1.x, IMM, src1.x, src2.x);
|
||||
res2.x = _mm_mask_rcp28_round_sd (src1.x, IMM, src1.x, src2.x,
|
||||
_MM_FROUND_TO_NEAREST_INT
|
||||
| _MM_FROUND_NO_EXC);
|
||||
res3.x = _mm_maskz_rcp28_sd (IMM, src1.x, src2.x);
|
||||
res4.x = _mm_maskz_rcp28_round_sd (IMM, src1.x, src2.x,
|
||||
_MM_FROUND_TO_NEAREST_INT
|
||||
| _MM_FROUND_NO_EXC);
|
||||
|
||||
|
||||
if (checkVd (res.a, res_ref, 2))
|
||||
abort ();
|
||||
|
||||
MASK_MERGE (d) (res_ref, mask, 1);
|
||||
|
||||
if (checkVd (res1.a, res_ref, 2))
|
||||
abort ();
|
||||
|
||||
if (checkVd (res2.a, res_ref, 2))
|
||||
abort ();
|
||||
|
||||
MASK_ZERO (d) (res_ref, mask, 1);
|
||||
|
||||
if (checkVd (res3.a, res_ref, 2))
|
||||
abort ();
|
||||
|
||||
if (checkVd (res4.a, res_ref, 2))
|
||||
abort ();
|
||||
}
|
|
@ -1,25 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512er -O2" } */
|
||||
/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-times "vrcp28ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrcp28ss\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+\[^\{\]*(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrcp28ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrcp28ss\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrcp28ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrcp28ss\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
volatile __m128 x, y, z;
|
||||
volatile __mmask8 m;
|
||||
|
||||
void extern
|
||||
avx512er_test (void)
|
||||
{
|
||||
x = _mm_rcp28_ss (x, y);
|
||||
x = _mm_rcp28_round_ss (x, y, _MM_FROUND_NO_EXC);
|
||||
x = _mm_mask_rcp28_ss (z, m, x, y);
|
||||
x = _mm_mask_rcp28_round_ss (z, m, x, y, _MM_FROUND_NO_EXC);
|
||||
x = _mm_maskz_rcp28_ss (m, x, y);
|
||||
x = _mm_maskz_rcp28_round_ss (m, x, y, _MM_FROUND_NO_EXC);
|
||||
}
|
|
@ -1,58 +0,0 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx512er } */
|
||||
/* { dg-options "-O2 -mavx512er" } */
|
||||
/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
|
||||
#include "avx512er-check.h"
|
||||
#include "avx512f-mask-type.h"
|
||||
#include "avx512f-helper.h"
|
||||
#include <math.h>
|
||||
|
||||
#define IMM 0x23
|
||||
|
||||
void static
|
||||
avx512er_test (void)
|
||||
{
|
||||
union128 src1, src2, res, res1, res2, res3, res4;
|
||||
float res_ref[4];
|
||||
MASK_TYPE mask = MASK_VALUE;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
src1.a[i] = 179.345 - 6.5645 * i;
|
||||
src2.a[i] = 179345.006 + 6.5645 * i;
|
||||
res_ref[i] = src1.a[i];
|
||||
}
|
||||
|
||||
res_ref[0] = 1.0 / src2.a[0];
|
||||
|
||||
res.x = _mm_rcp28_round_ss (src1.x, src2.x, _MM_FROUND_NO_EXC);
|
||||
res1.x = _mm_mask_rcp28_ss (src1.x, IMM, src1.x, src2.x);
|
||||
res2.x = _mm_mask_rcp28_round_ss (src1.x, IMM, src1.x, src2.x,
|
||||
_MM_FROUND_TO_NEAREST_INT
|
||||
| _MM_FROUND_NO_EXC);
|
||||
res3.x = _mm_maskz_rcp28_ss (IMM, src1.x, src2.x);
|
||||
res4.x = _mm_maskz_rcp28_round_ss (IMM, src1.x, src2.x,
|
||||
_MM_FROUND_TO_NEAREST_INT
|
||||
| _MM_FROUND_NO_EXC);
|
||||
|
||||
if (checkVf (res.a, res_ref, 4))
|
||||
abort ();
|
||||
|
||||
MASK_MERGE () (res_ref, mask, 1);
|
||||
|
||||
if (checkVf (res1.a, res_ref, 2))
|
||||
abort ();
|
||||
|
||||
if (checkVf (res2.a, res_ref, 2))
|
||||
abort ();
|
||||
|
||||
MASK_ZERO () (res_ref, mask, 1);
|
||||
|
||||
if (checkVf (res3.a, res_ref, 2))
|
||||
abort ();
|
||||
|
||||
if (checkVf (res4.a, res_ref, 2))
|
||||
abort ();
|
||||
}
|
|
@ -1,25 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512er -O2" } */
|
||||
/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-times "vrsqrt28pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrsqrt28pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrsqrt28pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrsqrt28pd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\[^\{\]*(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrsqrt28pd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrsqrt28pd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
volatile __m512d x;
|
||||
volatile __mmask8 m;
|
||||
|
||||
void extern
|
||||
avx512er_test (void)
|
||||
{
|
||||
x = _mm512_rsqrt28_pd (x);
|
||||
x = _mm512_mask_rsqrt28_pd (x, m, x);
|
||||
x = _mm512_maskz_rsqrt28_pd (m, x);
|
||||
x = _mm512_rsqrt28_round_pd (x, _MM_FROUND_NO_EXC);
|
||||
x = _mm512_mask_rsqrt28_round_pd (x, m, x, _MM_FROUND_NO_EXC);
|
||||
x = _mm512_maskz_rsqrt28_round_pd (m, x, _MM_FROUND_NO_EXC);
|
||||
}
|
|
@ -1,49 +0,0 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx512er } */
|
||||
/* { dg-options "-O2 -mavx512er" } */
|
||||
/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
|
||||
#include "avx512er-check.h"
|
||||
#include "avx512f-mask-type.h"
|
||||
#include "avx512f-helper.h"
|
||||
#include <math.h>
|
||||
|
||||
void static
|
||||
compute_vrsqrt28pd (double *s, double *r)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 8; i++)
|
||||
r[i] = 1.0 / sqrt (s[i]);
|
||||
}
|
||||
|
||||
void static
|
||||
avx512er_test (void)
|
||||
{
|
||||
union512d src, res1, res2, res3;
|
||||
__mmask8 mask = MASK_VALUE;
|
||||
double res_ref[8];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
src.a[i] = 179.345 - 6.5645 * i;
|
||||
res2.a[i] = DEFAULT_VALUE;
|
||||
}
|
||||
|
||||
res1.x = _mm512_rsqrt28_pd (src.x);
|
||||
res2.x = _mm512_mask_rsqrt28_pd (res2.x, mask, src.x);
|
||||
res3.x = _mm512_maskz_rsqrt28_pd (mask, src.x);
|
||||
|
||||
compute_vrsqrt28pd (src.a, res_ref);
|
||||
|
||||
if (check_rough_union512d (res1, res_ref, 0.0001))
|
||||
abort ();
|
||||
|
||||
MASK_MERGE (d) (res_ref, mask, 8);
|
||||
if (check_rough_union512d (res2, res_ref, 0.0001))
|
||||
abort ();
|
||||
|
||||
MASK_ZERO (d) (res_ref, mask, 8);
|
||||
if (check_rough_union512d (res3, res_ref, 0.0001))
|
||||
abort ();
|
||||
}
|
|
@ -1,25 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512er -O2" } */
|
||||
/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-times "vrsqrt28ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrsqrt28ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrsqrt28ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrsqrt28ps\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\[^\{\]*(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrsqrt28ps\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrsqrt28ps\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
volatile __m512 x;
|
||||
volatile __mmask16 m;
|
||||
|
||||
void extern
|
||||
avx512er_test (void)
|
||||
{
|
||||
x = _mm512_rsqrt28_ps (x);
|
||||
x = _mm512_mask_rsqrt28_ps (x, m, x);
|
||||
x = _mm512_maskz_rsqrt28_ps (m, x);
|
||||
x = _mm512_rsqrt28_round_ps (x, _MM_FROUND_NO_EXC);
|
||||
x = _mm512_mask_rsqrt28_round_ps (x, m, x, _MM_FROUND_NO_EXC);
|
||||
x = _mm512_maskz_rsqrt28_round_ps (m, x, _MM_FROUND_NO_EXC);
|
||||
}
|
|
@ -1,49 +0,0 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx512er } */
|
||||
/* { dg-options "-O2 -mavx512er" } */
|
||||
/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
|
||||
#include "avx512er-check.h"
|
||||
#include "avx512f-mask-type.h"
|
||||
#include "avx512f-helper.h"
|
||||
#include <math.h>
|
||||
|
||||
void static
|
||||
compute_vrsqrt28ps (float *s, float *r)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 16; i++)
|
||||
r[i] = 1.0 / sqrt (s[i]);
|
||||
}
|
||||
|
||||
void static
|
||||
avx512er_test (void)
|
||||
{
|
||||
union512 src, res1, res2, res3;
|
||||
__mmask16 mask = MASK_VALUE;
|
||||
float res_ref[16];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
src.a[i] = 179.345 - 6.5645 * i;
|
||||
res2.a[i] = DEFAULT_VALUE;
|
||||
}
|
||||
|
||||
res1.x = _mm512_rsqrt28_ps (src.x);
|
||||
res2.x = _mm512_mask_rsqrt28_ps (res2.x, mask, src.x);
|
||||
res3.x = _mm512_maskz_rsqrt28_ps (mask, src.x);
|
||||
|
||||
compute_vrsqrt28ps (src.a, res_ref);
|
||||
|
||||
if (check_rough_union512 (res1, res_ref, 0.0001))
|
||||
abort ();
|
||||
|
||||
MASK_MERGE ()(res_ref, mask, 16);
|
||||
if (check_rough_union512 (res2, res_ref, 0.0001))
|
||||
abort ();
|
||||
|
||||
MASK_ZERO ()(res_ref, mask, 16);
|
||||
if (check_rough_union512 (res3, res_ref, 0.0001))
|
||||
abort ();
|
||||
}
|
|
@ -1,49 +0,0 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx512er } */
|
||||
/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512er" } */
|
||||
/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
|
||||
#include <math.h>
|
||||
#include "avx512er-check.h"
|
||||
|
||||
#define MAX 1000
|
||||
#define EPS 0.00001
|
||||
|
||||
__attribute__ ((noinline, optimize (1, "-fno-fast-math")))
|
||||
void static
|
||||
compute_rsqrt_ref (float *a, float *r)
|
||||
{
|
||||
for (int i = 0; i < MAX; i++)
|
||||
r[i] = 1.0 / sqrtf (a[i]);
|
||||
}
|
||||
|
||||
__attribute__ ((noinline))
|
||||
void static
|
||||
compute_rsqrt_exp (float *a, float *r)
|
||||
{
|
||||
for (int i = 0; i < MAX; i++)
|
||||
r[i] = 1.0 / sqrtf (a[i]);
|
||||
}
|
||||
|
||||
void static
|
||||
avx512er_test (void)
|
||||
{
|
||||
float in[MAX];
|
||||
float ref[MAX];
|
||||
float exp[MAX];
|
||||
|
||||
for (int i = 0; i < MAX; i++)
|
||||
in[i] = 8765.987 - 8.6756 * i;
|
||||
|
||||
compute_rsqrt_ref (in, ref);
|
||||
compute_rsqrt_exp (in, exp);
|
||||
|
||||
#pragma GCC novector
|
||||
for (int i = 0; i < MAX; i++)
|
||||
{
|
||||
float rel_err = (ref[i] - exp[i]) / ref[i];
|
||||
rel_err = rel_err > 0.0 ? rel_err : -rel_err;
|
||||
if (rel_err > EPS)
|
||||
abort ();
|
||||
}
|
||||
}
|
|
@ -1,8 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512er" } */
|
||||
/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
|
||||
#include "avx512er-vrsqrt28ps-3.c"
|
||||
|
||||
/* { dg-final { scan-assembler-times "vrsqrt28ps\[^\n\r\]*zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-not "vrcp28ps\[^\n\r\]*zmm\[0-9\]+(?:\n|\[ \\t\]+#)" } } */
|
|
@ -1,49 +0,0 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx512er } */
|
||||
/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512er" } */
|
||||
/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
|
||||
#include <math.h>
|
||||
#include "avx512er-check.h"
|
||||
|
||||
#define MAX 1000
|
||||
#define EPS 0.00001
|
||||
|
||||
__attribute__ ((noinline, optimize (1, "-fno-fast-math")))
|
||||
void static
|
||||
compute_sqrt_ref (float *a, float *r)
|
||||
{
|
||||
for (int i = 0; i < MAX; i++)
|
||||
r[i] = sqrtf (a[i]);
|
||||
}
|
||||
|
||||
__attribute__ ((noinline))
|
||||
void static
|
||||
compute_sqrt_exp (float *a, float *r)
|
||||
{
|
||||
for (int i = 0; i < MAX; i++)
|
||||
r[i] = sqrtf (a[i]);
|
||||
}
|
||||
|
||||
void static
|
||||
avx512er_test (void)
|
||||
{
|
||||
float in[MAX];
|
||||
float ref[MAX];
|
||||
float exp[MAX];
|
||||
|
||||
for (int i = 0; i < MAX; i++)
|
||||
in[i] = 8765.987 - 8.6756 * i;
|
||||
|
||||
compute_sqrt_ref (in, ref);
|
||||
compute_sqrt_exp (in, exp);
|
||||
|
||||
#pragma GCC novector
|
||||
for (int i = 0; i < MAX; i++)
|
||||
{
|
||||
float rel_err = (ref[i] - exp[i]) / ref[i];
|
||||
rel_err = rel_err > 0.0 ? rel_err : -rel_err;
|
||||
if (rel_err > EPS)
|
||||
abort ();
|
||||
}
|
||||
}
|
|
@ -1,8 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512er" } */
|
||||
/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
|
||||
#include "avx512er-vrsqrt28ps-5.c"
|
||||
|
||||
/* { dg-final { scan-assembler-times "vrsqrt28ps\[^\n\r\]*zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrcp28ps\[^\n\r\]*zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
|
@ -1,25 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512er -O2" } */
|
||||
/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-times "vrsqrt28sd\[ \\t\]+\[^\{^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrsqrt28sd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrsqrt28sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrsqrt28sd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrsqrt28sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrsqrt28sd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
volatile __m128d x, y, z;
|
||||
volatile __mmask8 m;
|
||||
|
||||
void extern
|
||||
avx512er_test (void)
|
||||
{
|
||||
x = _mm_rsqrt28_sd (x, y);
|
||||
x = _mm_rsqrt28_round_sd (x, y, _MM_FROUND_NO_EXC);
|
||||
x = _mm_mask_rsqrt28_sd (z, m, x, y);
|
||||
x = _mm_mask_rsqrt28_round_sd (z, m, x, y, _MM_FROUND_NO_EXC);
|
||||
x = _mm_maskz_rsqrt28_sd (m, x, y);
|
||||
x = _mm_maskz_rsqrt28_round_sd (m, x, y, _MM_FROUND_NO_EXC);
|
||||
}
|
|
@ -1,58 +0,0 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx512er } */
|
||||
/* { dg-options "-O2 -mavx512er" } */
|
||||
/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
|
||||
#include "avx512er-check.h"
|
||||
#include "avx512f-mask-type.h"
|
||||
#include "avx512f-helper.h"
|
||||
#include <math.h>
|
||||
|
||||
#define IMM 0x23
|
||||
|
||||
void static
|
||||
avx512er_test (void)
|
||||
{
|
||||
union128d src1, src2, res, res1, res2, res3, res4;
|
||||
double res_ref[2];
|
||||
MASK_TYPE mask = MASK_VALUE;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
src1.a[i] = 179.345 - 6.5645 * i;
|
||||
src2.a[i] = 45 - 6.5645 * i;
|
||||
res_ref[i] = src1.a[i];
|
||||
}
|
||||
|
||||
res_ref[0] = 1.0 / sqrt (src2.a[0]);
|
||||
|
||||
res.x = _mm_rsqrt28_round_sd (src1.x, src2.x, _MM_FROUND_NO_EXC);
|
||||
res1.x = _mm_mask_rsqrt28_sd (src1.x, IMM, src1.x, src2.x);
|
||||
res2.x = _mm_mask_rsqrt28_round_sd (src1.x, IMM, src1.x, src2.x,
|
||||
_MM_FROUND_TO_NEAREST_INT
|
||||
| _MM_FROUND_NO_EXC);
|
||||
res3.x = _mm_maskz_rsqrt28_sd (IMM, src1.x, src2.x);
|
||||
res4.x = _mm_maskz_rsqrt28_round_sd (IMM, src1.x, src2.x,
|
||||
_MM_FROUND_TO_NEAREST_INT
|
||||
| _MM_FROUND_NO_EXC);
|
||||
|
||||
if (checkVd (res.a, res_ref, 2))
|
||||
abort ();
|
||||
|
||||
MASK_MERGE (d) (res_ref, mask, 1);
|
||||
|
||||
if (checkVd (res1.a, res_ref, 2))
|
||||
abort ();
|
||||
|
||||
if (checkVd (res2.a, res_ref, 2))
|
||||
abort ();
|
||||
|
||||
MASK_ZERO (d) (res_ref, mask, 1);
|
||||
|
||||
if (checkVd (res3.a, res_ref, 2))
|
||||
abort ();
|
||||
|
||||
if (checkVd (res4.a, res_ref, 2))
|
||||
abort ();
|
||||
}
|
|
@ -1,25 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512er -O2" } */
|
||||
/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-times "vrsqrt28ss\[ \\t\]+\[^\{^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrsqrt28ss\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrsqrt28ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrsqrt28ss\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrsqrt28ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrsqrt28ss\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
volatile __m128 x, y, z;
|
||||
volatile __mmask8 m;
|
||||
|
||||
void extern
|
||||
avx512er_test (void)
|
||||
{
|
||||
x = _mm_rsqrt28_ss (x, y);
|
||||
x = _mm_rsqrt28_round_ss (x, y, _MM_FROUND_NO_EXC);
|
||||
x = _mm_mask_rsqrt28_ss (z, m, x, y);
|
||||
x = _mm_mask_rsqrt28_round_ss (z, m, x, y, _MM_FROUND_NO_EXC);
|
||||
x = _mm_maskz_rsqrt28_ss (m, x, y);
|
||||
x = _mm_maskz_rsqrt28_round_ss (m, x, y, _MM_FROUND_NO_EXC);
|
||||
}
|
|
@ -1,58 +0,0 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx512er } */
|
||||
/* { dg-options "-O2 -mavx512er" } */
|
||||
/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
|
||||
#include "avx512er-check.h"
|
||||
#include "avx512f-mask-type.h"
|
||||
#include "avx512f-helper.h"
|
||||
#include <math.h>
|
||||
|
||||
#define IMM 0x23
|
||||
|
||||
void static
|
||||
avx512er_test (void)
|
||||
{
|
||||
union128 src1, src2, res, res1, res2, res3, res4;
|
||||
float res_ref[4];
|
||||
MASK_TYPE mask = MASK_VALUE;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
src1.a[i] = 179.345 - 6.5645 * i;
|
||||
src2.a[i] = 179221345 + 6.5645 * i;
|
||||
res_ref[i] = src1.a[i];
|
||||
}
|
||||
|
||||
res_ref[0] = 1.0 / sqrt (src2.a[0]);
|
||||
|
||||
res.x = _mm_rsqrt28_round_ss (src1.x, src2.x, _MM_FROUND_NO_EXC);
|
||||
res1.x = _mm_mask_rsqrt28_ss (src1.x, IMM, src1.x, src2.x);
|
||||
res2.x = _mm_mask_rsqrt28_round_ss (src1.x, IMM, src1.x, src2.x,
|
||||
_MM_FROUND_TO_NEAREST_INT
|
||||
| _MM_FROUND_NO_EXC);
|
||||
res3.x = _mm_maskz_rsqrt28_ss (IMM, src1.x, src2.x);
|
||||
res4.x = _mm_maskz_rsqrt28_round_ss (IMM, src1.x, src2.x,
|
||||
_MM_FROUND_TO_NEAREST_INT
|
||||
| _MM_FROUND_NO_EXC);
|
||||
|
||||
if (checkVf (res.a, res_ref, 4))
|
||||
abort ();
|
||||
|
||||
MASK_MERGE () (res_ref, mask, 1);
|
||||
|
||||
if (checkVf (res1.a, res_ref, 2))
|
||||
abort ();
|
||||
|
||||
if (checkVf (res2.a, res_ref, 2))
|
||||
abort ();
|
||||
|
||||
MASK_ZERO () (res_ref, mask, 1);
|
||||
|
||||
if (checkVf (res3.a, res_ref, 2))
|
||||
abort ();
|
||||
|
||||
if (checkVf (res4.a, res_ref, 2))
|
||||
abort ();
|
||||
}
|
|
@ -1,17 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512pf -O2" } */
|
||||
/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-times "vgatherpf0dpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
volatile __m256i idx;
|
||||
volatile __mmask8 m8;
|
||||
void *base;
|
||||
|
||||
void extern
|
||||
avx512pf_test (void)
|
||||
{
|
||||
_mm512_prefetch_i32gather_pd (idx, base, 8, _MM_HINT_T0);
|
||||
_mm512_mask_prefetch_i32gather_pd (idx, m8, base, 8, _MM_HINT_T0);
|
||||
}
|
|
@ -1,17 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512pf -O2" } */
|
||||
/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-times "vgatherpf0dps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
volatile __m512i idx;
|
||||
volatile __mmask16 m16;
|
||||
int *base;
|
||||
|
||||
void extern
|
||||
avx512pf_test (void)
|
||||
{
|
||||
_mm512_prefetch_i32gather_ps (idx, base, 8, _MM_HINT_T0);
|
||||
_mm512_mask_prefetch_i32gather_ps (idx, m16, base, 8, _MM_HINT_T0);
|
||||
}
|
|
@ -1,17 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512pf -O2" } */
|
||||
/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-times "vgatherpf0qpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
volatile __m512i idx;
|
||||
volatile __mmask8 m8;
|
||||
int *base;
|
||||
|
||||
void extern
|
||||
avx512pf_test (void)
|
||||
{
|
||||
_mm512_prefetch_i64gather_pd (idx, base, 8, _MM_HINT_T0);
|
||||
_mm512_mask_prefetch_i64gather_pd (idx, m8, base, 8, _MM_HINT_T0);
|
||||
}
|
|
@ -1,17 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512pf -O2" } */
|
||||
/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-times "vgatherpf0qps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
volatile __m512i idx;
|
||||
volatile __mmask8 m8;
|
||||
int *base;
|
||||
|
||||
void extern
|
||||
avx512pf_test (void)
|
||||
{
|
||||
_mm512_prefetch_i64gather_ps (idx, base, 8, _MM_HINT_T0);
|
||||
_mm512_mask_prefetch_i64gather_ps (idx, m8, base, 8, _MM_HINT_T0);
|
||||
}
|
|
@ -1,17 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512pf -O2" } */
|
||||
/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-times "vgatherpf1dpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
volatile __m256i idx;
|
||||
volatile __mmask8 m8;
|
||||
int *base;
|
||||
|
||||
void extern
|
||||
avx512pf_test (void)
|
||||
{
|
||||
_mm512_prefetch_i32gather_pd (idx, base, 8, _MM_HINT_T1);
|
||||
_mm512_mask_prefetch_i32gather_pd (idx, m8, base, 8, _MM_HINT_T1);
|
||||
}
|
|
@ -1,17 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512pf -O2" } */
|
||||
/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-times "vgatherpf1dps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
volatile __m512i idx;
|
||||
volatile __mmask16 m16;
|
||||
int *base;
|
||||
|
||||
void extern
|
||||
avx512pf_test (void)
|
||||
{
|
||||
_mm512_prefetch_i32gather_ps (idx, base, 8, _MM_HINT_T1);
|
||||
_mm512_mask_prefetch_i32gather_ps (idx, m16, base, 8, _MM_HINT_T1);
|
||||
}
|
|
@ -1,17 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512pf -O2" } */
|
||||
/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-times "vgatherpf1qpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
volatile __m512i idx;
|
||||
volatile __mmask8 m8;
|
||||
int *base;
|
||||
|
||||
void extern
|
||||
avx512pf_test (void)
|
||||
{
|
||||
_mm512_prefetch_i64gather_pd (idx, base, 8, _MM_HINT_T1);
|
||||
_mm512_mask_prefetch_i64gather_pd (idx, m8, base, 8, _MM_HINT_T1);
|
||||
}
|
|
@ -1,17 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512pf -O2" } */
|
||||
/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-times "vgatherpf1qps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
volatile __m512i idx;
|
||||
volatile __mmask8 m8;
|
||||
int *base;
|
||||
|
||||
void extern
|
||||
avx512pf_test (void)
|
||||
{
|
||||
_mm512_prefetch_i64gather_ps (idx, base, 8, _MM_HINT_T1);
|
||||
_mm512_mask_prefetch_i64gather_ps (idx, m8, base, 8, _MM_HINT_T1);
|
||||
}
|
|
@ -1,16 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512pf -O2" } */
|
||||
/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-times "vscatterpf0dpd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
#include <immintrin.h>
|
||||
|
||||
volatile __m256i idx;
|
||||
volatile __mmask8 m8;
|
||||
void *base;
|
||||
|
||||
void extern
|
||||
avx512pf_test (void)
|
||||
{
|
||||
_mm512_prefetch_i32scatter_pd (base, idx, 8, _MM_HINT_T0);
|
||||
_mm512_mask_prefetch_i32scatter_pd (base, m8, idx, 8, _MM_HINT_ET0);
|
||||
}
|
|
@ -1,17 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512pf -O2" } */
|
||||
/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-times "vscatterpf0dps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
volatile __m512i idx;
|
||||
volatile __mmask16 m16;
|
||||
int *base;
|
||||
|
||||
void extern
|
||||
avx512pf_test (void)
|
||||
{
|
||||
_mm512_prefetch_i32scatter_ps (base, idx, 8, _MM_HINT_T0);
|
||||
_mm512_mask_prefetch_i32scatter_ps (base, m16, idx, 8, _MM_HINT_ET0);
|
||||
}
|
|
@ -1,17 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512pf -O2" } */
|
||||
/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-times "vscatterpf0qpd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
volatile __m512i idx;
|
||||
volatile __mmask8 m8;
|
||||
void *base;
|
||||
|
||||
void extern
|
||||
avx512pf_test (void)
|
||||
{
|
||||
_mm512_prefetch_i64scatter_pd (base, idx, 8, _MM_HINT_T0);
|
||||
_mm512_mask_prefetch_i64scatter_pd (base, m8, idx, 8, _MM_HINT_ET0);
|
||||
}
|
|
@ -1,17 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512pf -O2" } */
|
||||
/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-times "vscatterpf0qps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
volatile __m512i idx;
|
||||
volatile __mmask8 m8;
|
||||
int *base;
|
||||
|
||||
void extern
|
||||
avx512pf_test (void)
|
||||
{
|
||||
_mm512_prefetch_i64scatter_ps (base, idx, 8, _MM_HINT_T0);
|
||||
_mm512_mask_prefetch_i64scatter_ps (base, m8, idx, 8, _MM_HINT_ET0);
|
||||
}
|
|
@ -1,17 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512pf -O2" } */
|
||||
/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-times "vscatterpf1dpd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
volatile __m256i idx;
|
||||
volatile __mmask8 m8;
|
||||
void *base;
|
||||
|
||||
void extern
|
||||
avx512pf_test (void)
|
||||
{
|
||||
_mm512_prefetch_i32scatter_pd (base, idx, 8, _MM_HINT_T1);
|
||||
_mm512_mask_prefetch_i32scatter_pd (base, m8, idx, 8, _MM_HINT_ET1);
|
||||
}
|
|
@ -1,17 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512pf -O2" } */
|
||||
/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-times "vscatterpf1dps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
volatile __m512i idx;
|
||||
volatile __mmask16 m16;
|
||||
int *base;
|
||||
|
||||
void extern
|
||||
avx512pf_test (void)
|
||||
{
|
||||
_mm512_prefetch_i32scatter_ps (base, idx, 8, _MM_HINT_T1);
|
||||
_mm512_mask_prefetch_i32scatter_ps (base, m16, idx, 8, _MM_HINT_ET1);
|
||||
}
|
|
@ -1,17 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512pf -O2" } */
|
||||
/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-times "vscatterpf1qpd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
volatile __m512i idx;
|
||||
volatile __mmask8 m8;
|
||||
int *base;
|
||||
|
||||
void extern
|
||||
avx512pf_test (void)
|
||||
{
|
||||
_mm512_prefetch_i64scatter_pd (base, idx, 8, _MM_HINT_T1);
|
||||
_mm512_mask_prefetch_i64scatter_pd (base, m8, idx, 8, _MM_HINT_ET1);
|
||||
}
|
|
@ -1,17 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512pf -O2" } */
|
||||
/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-times "vscatterpf1qps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
volatile __m512i idx;
|
||||
volatile __mmask8 m8;
|
||||
int *base;
|
||||
|
||||
void extern
|
||||
avx512pf_test (void)
|
||||
{
|
||||
_mm512_prefetch_i64scatter_ps (base, idx, 8, _MM_HINT_T1);
|
||||
_mm512_mask_prefetch_i64scatter_ps (base, m8, idx, 8, _MM_HINT_ET1);
|
||||
}
|
|
@ -89,10 +89,6 @@ quick_check ()
|
|||
|
||||
assert (__builtin_cpu_supports ("avx512f") >= 0);
|
||||
|
||||
assert (__builtin_cpu_supports ("avx5124vnniw") >= 0);
|
||||
|
||||
assert (__builtin_cpu_supports ("avx5124fmaps") >= 0);
|
||||
|
||||
assert (__builtin_cpu_supports ("avx512vpopcntdq") >= 0);
|
||||
|
||||
assert (__builtin_cpu_supports ("x86-64") >= 0);
|
||||
|
|
|
@ -1,10 +0,0 @@
|
|||
/* PR target/104448 */
|
||||
/* { dg-do compile { target { *-*-linux* && lp64 } } } */
|
||||
/* { dg-options "-mavx5124vnniw -mno-xsave -mabi=ms" } */
|
||||
/* { dg-warning "AVX5124VNNIW support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
return 0;
|
||||
}
|
|
@ -1,7 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -march=knl" } */
|
||||
/* { dg-warning "'-march=knl' support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
|
||||
#include "pr82941-1.c"
|
||||
|
||||
/* { dg-final { scan-assembler-not "vzeroupper" } } */
|
|
@ -1,8 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512f -mavx512er -mtune=knl -O2" } */
|
||||
/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-warning "'-mtune=knl' support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
|
||||
#include "pr82941-1.c"
|
||||
|
||||
/* { dg-final { scan-assembler-not "vzeroupper" } } */
|
|
@ -1,15 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -march=knl -mvzeroupper" } */
|
||||
/* { dg-warning "'-march=knl' support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
extern __m512d y, z;
|
||||
|
||||
void
|
||||
pr82941 ()
|
||||
{
|
||||
z = y;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
|
|
@ -1,7 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512f -mavx512er -mvzeroupper -O2" } */
|
||||
/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
|
||||
#include "pr82941-1.c"
|
||||
|
||||
/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
|
|
@ -1,7 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -march=skylake-avx512 -mtune=knl" } */
|
||||
/* { dg-warning "'-mtune=knl' support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
|
||||
#include "pr82941-1.c"
|
||||
|
||||
/* { dg-final { scan-assembler-not "vzeroupper" } } */
|
|
@ -1,6 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -march=skylake-avx512 -mtune=generic -mtune-ctrl=^emit_vzeroupper" } */
|
||||
|
||||
#include "pr82941-1.c"
|
||||
|
||||
/* { dg-final { scan-assembler-not "vzeroupper" } } */
|
|
@ -1,40 +0,0 @@
|
|||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-require-effective-target maybe_x32 } */
|
||||
/* { dg-options "-mx32 -O2 -mavx512pf" } */
|
||||
/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler "\tvgather" } } */
|
||||
/* { dg-final { scan-assembler-not "addr32 vgather" } } */
|
||||
|
||||
typedef int __v8si __attribute__ ((__vector_size__ (32)));
|
||||
typedef long long __m256i __attribute__ ((__vector_size__ (32),
|
||||
__may_alias__));
|
||||
typedef unsigned char __mmask8;
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i32gather_pd (__m256i __index, void const *__addr,
|
||||
int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i32gather_pd (__m256i __index, __mmask8 __mask,
|
||||
void const *__addr, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfdpd (__mask, (__v8si) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
volatile __m256i idx;
|
||||
volatile __mmask8 m8;
|
||||
void *base;
|
||||
|
||||
void extern
|
||||
avx512pf_test (void)
|
||||
{
|
||||
_mm512_prefetch_i32gather_pd (idx, base, 8, 3);
|
||||
_mm512_mask_prefetch_i32gather_pd (idx, m8, base, 8, 3);
|
||||
}
|
|
@ -1,39 +0,0 @@
|
|||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-require-effective-target maybe_x32 } */
|
||||
/* { dg-options "-mx32 -O2 -mavx512pf" } */
|
||||
/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler-not "\tvgather" } } */
|
||||
/* { dg-final { scan-assembler "addr32 vgather" } } */
|
||||
|
||||
typedef int __v8si __attribute__ ((__vector_size__ (32)));
|
||||
typedef long long __m256i __attribute__ ((__vector_size__ (32),
|
||||
__may_alias__));
|
||||
typedef unsigned char __mmask8;
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i32gather_pd (__m256i __index, void const *__addr,
|
||||
int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i32gather_pd (__m256i __index, __mmask8 __mask,
|
||||
void const *__addr, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfdpd (__mask, (__v8si) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
volatile __m256i idx;
|
||||
volatile __mmask8 m8;
|
||||
|
||||
void extern
|
||||
avx512pf_test (void)
|
||||
{
|
||||
_mm512_prefetch_i32gather_pd (idx, (void *) 0, 8, 3);
|
||||
_mm512_mask_prefetch_i32gather_pd (idx, m8, (void *) 0, 8, 3);
|
||||
}
|
|
@ -1,16 +0,0 @@
|
|||
/* PR tree-optimization/91033 */
|
||||
/* { dg-do compile { target pthread } } */
|
||||
/* { dg-options "-march=knl -O2 -fopenmp-simd -ftree-parallelize-loops=2" } */
|
||||
/* { dg-warning "'-march=knl' support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
|
||||
#define N 1024
|
||||
int a[N];
|
||||
|
||||
void
|
||||
foo (void)
|
||||
{
|
||||
int i;
|
||||
#pragma omp simd simdlen (4)
|
||||
for (i = 0; i < N; ++i)
|
||||
a[i] = a[i] + 1;
|
||||
}
|
|
@ -1,15 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mprefetchwt1 -O2" } */
|
||||
/* { dg-warning "PREFETCHWT1 support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-final { scan-assembler "\[ \\t\]+prefetchwt1\[ \\t\]+" } } */
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
void *p;
|
||||
|
||||
void extern
|
||||
prefetchw__test (void)
|
||||
{
|
||||
_mm_prefetch (p, _MM_HINT_ET1);
|
||||
}
|
||||
|
|
@ -3,12 +3,7 @@
|
|||
popcntintrin.h gfniintrin.h and mm_malloc.h are usable
|
||||
with -O -std=c89 -pedantic-errors. */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4" } */
|
||||
/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-warning "AVX5124FMAPS support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-warning "AVX5124VNNIW support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-warning "PREFETCHWT1 support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512cd -msha -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4" } */
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
|
|
|
@ -1,11 +1,6 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4" } */
|
||||
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512cd -msha -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4" } */
|
||||
/* { dg-add-options bind_pic_locally } */
|
||||
/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-warning "AVX5124FMAPS support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-warning "AVX5124VNNIW support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
/* { dg-warning "PREFETCHWT1 support will be removed in GCC 15" "" { target *-*-* } 0 } */
|
||||
|
||||
#include <mm_malloc.h>
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue