i386.c (ix86_size_cost, [...]): Set reassociation width to 1.
* i386.c (ix86_size_cost, i386_cost, i486_cost, pentium_cost, lakemont_cost, pentiumpro_cost, geode_cost, k6_cost, athlon_cost, k8_cost, amdfam10_cost, btver1_cost, btver2_cost, pentium4_cost, nocona_cost): Set reassociation width to 1. (bdver1_cost, bdver2_cost, bdver3_cost, bdver4_cost): Set reassociation width to 2 for fp operations and 1 otherwise. (znver1_cost): Set scalar reassoc width to 4 and vector to 3 and 6 for int and fp. (atom_cost): Set reassociation width to 2. (slm_cost, generic_cost): Set fp reassociation width to 2 and 1 otherwise. (intel_cost): Set fp reassociation width to 4 and 1 otherwise. (core_cost): Set fp reassociation width to 4 and vector to 2. (ix86_reassociation_width): Rewrite using cost table; special case plus/minus on Zen; honor X86_TUNE_SSE_SPLIT_REGS and TARGET_AVX128_OPTIMAL. * i386.h (processor_costs): Add reassoc_int, reassoc_fp, reassoc_vec_int, reassoc_vec_fp. (TARGET_VECTOR_PARALLEL_EXECUTION, TARGET_REASSOC_INT_TO_PARALLEL, TARGET_REASSOC_FP_TO_PARALLEL): Remove. * x86-tune.def (X86_TUNE_REASSOC_INT_TO_PARALLEL): Remove. (X86_TUNE_REASSOC_FP_TO_PARALLEL): Remove. (X86_TUNE_VECTOR_PARALLEL_EXECUTION): Remove. From-SVN: r253448
This commit is contained in:
parent
807e3be2b8
commit
a813c28053
4 changed files with 97 additions and 48 deletions
|
@ -1,3 +1,28 @@
|
|||
2017-10-05 Jan Hubicka <hubicka@ucw.cz>
|
||||
|
||||
* i386.c (ix86_size_cost, i386_cost, i486_cost, pentium_cost,
|
||||
lakemont_cost, pentiumpro_cost, geode_cost, k6_cost,
|
||||
athlon_cost, k8_cost, amdfam10_cost, btver1_cost, btver2_cost,
|
||||
pentium4_cost, nocona_cost): Set reassociation width to 1.
|
||||
(bdver1_cost, bdver2_cost, bdver3_cost, bdver4_cost): Set reassociation
|
||||
width to 2 for fp operations and 1 otherwise.
|
||||
(znver1_cost): Set scalar reassoc width to 4 and vector to 3 and 6
|
||||
for int and fp.
|
||||
(atom_cost): Set reassociation width to 2.
|
||||
(slm_cost, generic_cost): Set fp reassociation width to 2 and 1 otherwise.
|
||||
(intel_cost): Set fp reassociation width to 4 and 1 otherwise.
|
||||
(core_cost): Set fp reassociation width to 4 and vector to 2.
|
||||
(ix86_reassociation_width): Rewrite using cost table; special case
|
||||
plus/minus on Zen; honor X86_TUNE_SSE_SPLIT_REGS
|
||||
and TARGET_AVX128_OPTIMAL.
|
||||
* i386.h (processor_costs): Add
|
||||
reassoc_int, reassoc_fp, reassoc_vec_int, reassoc_vec_fp.
|
||||
(TARGET_VECTOR_PARALLEL_EXECUTION, TARGET_REASSOC_INT_TO_PARALLEL,
|
||||
TARGET_REASSOC_FP_TO_PARALLEL): Remove.
|
||||
* x86-tune.def (X86_TUNE_REASSOC_INT_TO_PARALLEL): Remove.
|
||||
(X86_TUNE_REASSOC_FP_TO_PARALLEL): Remove.
|
||||
(X86_TUNE_VECTOR_PARALLEL_EXECUTION): Remove.
|
||||
|
||||
2017-10-05 Nathan Sidwell <nathan@acm.org>
|
||||
|
||||
* doc/invoke.texi (Wparentheses): Document C++ MVP behaviour.
|
||||
|
|
|
@ -177,6 +177,7 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
|
|||
COSTS_N_BYTES (2), /* cost of FABS instruction. */
|
||||
COSTS_N_BYTES (2), /* cost of FCHS instruction. */
|
||||
COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
|
||||
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
ix86_size_memcpy,
|
||||
ix86_size_memset,
|
||||
1, /* scalar_stmt_cost. */
|
||||
|
@ -253,6 +254,7 @@ struct processor_costs i386_cost = { /* 386 specific costs */
|
|||
COSTS_N_INSNS (22), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (24), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
|
||||
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
i386_memcpy,
|
||||
i386_memset,
|
||||
1, /* scalar_stmt_cost. */
|
||||
|
@ -330,6 +332,7 @@ struct processor_costs i486_cost = { /* 486 specific costs */
|
|||
COSTS_N_INSNS (3), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (3), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
|
||||
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
i486_memcpy,
|
||||
i486_memset,
|
||||
1, /* scalar_stmt_cost. */
|
||||
|
@ -405,6 +408,7 @@ struct processor_costs pentium_cost = {
|
|||
COSTS_N_INSNS (1), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (1), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
|
||||
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
pentium_memcpy,
|
||||
pentium_memset,
|
||||
1, /* scalar_stmt_cost. */
|
||||
|
@ -473,6 +477,7 @@ struct processor_costs lakemont_cost = {
|
|||
COSTS_N_INSNS (1), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (1), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
|
||||
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
pentium_memcpy,
|
||||
pentium_memset,
|
||||
1, /* scalar_stmt_cost. */
|
||||
|
@ -556,6 +561,7 @@ struct processor_costs pentiumpro_cost = {
|
|||
COSTS_N_INSNS (2), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
|
||||
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
pentiumpro_memcpy,
|
||||
pentiumpro_memset,
|
||||
1, /* scalar_stmt_cost. */
|
||||
|
@ -631,6 +637,7 @@ struct processor_costs geode_cost = {
|
|||
COSTS_N_INSNS (1), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (1), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
|
||||
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
geode_memcpy,
|
||||
geode_memset,
|
||||
1, /* scalar_stmt_cost. */
|
||||
|
@ -708,6 +715,7 @@ struct processor_costs k6_cost = {
|
|||
COSTS_N_INSNS (2), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
|
||||
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
k6_memcpy,
|
||||
k6_memset,
|
||||
1, /* scalar_stmt_cost. */
|
||||
|
@ -785,6 +793,7 @@ struct processor_costs athlon_cost = {
|
|||
COSTS_N_INSNS (2), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
|
||||
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
athlon_memcpy,
|
||||
athlon_memset,
|
||||
1, /* scalar_stmt_cost. */
|
||||
|
@ -871,7 +880,7 @@ struct processor_costs k8_cost = {
|
|||
COSTS_N_INSNS (2), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
|
||||
|
||||
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
k8_memcpy,
|
||||
k8_memset,
|
||||
4, /* scalar_stmt_cost. */
|
||||
|
@ -965,7 +974,7 @@ struct processor_costs amdfam10_cost = {
|
|||
COSTS_N_INSNS (2), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
|
||||
|
||||
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
amdfam10_memcpy,
|
||||
amdfam10_memset,
|
||||
4, /* scalar_stmt_cost. */
|
||||
|
@ -1060,7 +1069,7 @@ const struct processor_costs bdver1_cost = {
|
|||
COSTS_N_INSNS (2), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
|
||||
|
||||
1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
bdver1_memcpy,
|
||||
bdver1_memset,
|
||||
6, /* scalar_stmt_cost. */
|
||||
|
@ -1156,7 +1165,7 @@ const struct processor_costs bdver2_cost = {
|
|||
COSTS_N_INSNS (2), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
|
||||
|
||||
1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
bdver2_memcpy,
|
||||
bdver2_memset,
|
||||
6, /* scalar_stmt_cost. */
|
||||
|
@ -1243,7 +1252,7 @@ struct processor_costs bdver3_cost = {
|
|||
COSTS_N_INSNS (2), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
|
||||
|
||||
1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
bdver3_memcpy,
|
||||
bdver3_memset,
|
||||
6, /* scalar_stmt_cost. */
|
||||
|
@ -1329,7 +1338,7 @@ struct processor_costs bdver4_cost = {
|
|||
COSTS_N_INSNS (2), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
|
||||
|
||||
1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
bdver4_memcpy,
|
||||
bdver4_memset,
|
||||
6, /* scalar_stmt_cost. */
|
||||
|
@ -1419,7 +1428,15 @@ struct processor_costs znver1_cost = {
|
|||
COSTS_N_INSNS (2), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
|
||||
/* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles
|
||||
and it can execute 2 integer additions and 2 multiplications thus
|
||||
reassociation may make sense up to with of 6. SPEC2k6 bencharks suggests
|
||||
that 4 works better than 6 probably due to register pressure.
|
||||
|
||||
Integer vector operations are taken by FP unit and execute 3 vector
|
||||
plus/minus operations per cycle but only one multiply. This is adjusted
|
||||
in ix86_reassociation_width. */
|
||||
4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
znver1_memcpy,
|
||||
znver1_memset,
|
||||
6, /* scalar_stmt_cost. */
|
||||
|
@ -1508,7 +1525,7 @@ const struct processor_costs btver1_cost = {
|
|||
COSTS_N_INSNS (2), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
|
||||
|
||||
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
btver1_memcpy,
|
||||
btver1_memset,
|
||||
4, /* scalar_stmt_cost. */
|
||||
|
@ -1594,6 +1611,7 @@ const struct processor_costs btver2_cost = {
|
|||
COSTS_N_INSNS (2), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
|
||||
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
btver2_memcpy,
|
||||
btver2_memset,
|
||||
4, /* scalar_stmt_cost. */
|
||||
|
@ -1670,6 +1688,7 @@ struct processor_costs pentium4_cost = {
|
|||
COSTS_N_INSNS (2), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
|
||||
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
pentium4_memcpy,
|
||||
pentium4_memset,
|
||||
1, /* scalar_stmt_cost. */
|
||||
|
@ -1749,6 +1768,7 @@ struct processor_costs nocona_cost = {
|
|||
COSTS_N_INSNS (3), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (3), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
|
||||
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
nocona_memcpy,
|
||||
nocona_memset,
|
||||
1, /* scalar_stmt_cost. */
|
||||
|
@ -1826,6 +1846,7 @@ struct processor_costs atom_cost = {
|
|||
COSTS_N_INSNS (8), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (8), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
|
||||
2, 2, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
atom_memcpy,
|
||||
atom_memset,
|
||||
1, /* scalar_stmt_cost. */
|
||||
|
@ -1903,6 +1924,7 @@ struct processor_costs slm_cost = {
|
|||
COSTS_N_INSNS (8), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (8), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
|
||||
1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
slm_memcpy,
|
||||
slm_memset,
|
||||
1, /* scalar_stmt_cost. */
|
||||
|
@ -1980,6 +2002,7 @@ struct processor_costs intel_cost = {
|
|||
COSTS_N_INSNS (8), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (8), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
|
||||
1, 4, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
intel_memcpy,
|
||||
intel_memset,
|
||||
1, /* scalar_stmt_cost. */
|
||||
|
@ -2067,6 +2090,7 @@ struct processor_costs generic_cost = {
|
|||
COSTS_N_INSNS (8), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (8), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
|
||||
1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
generic_memcpy,
|
||||
generic_memset,
|
||||
1, /* scalar_stmt_cost. */
|
||||
|
@ -2153,6 +2177,7 @@ struct processor_costs core_cost = {
|
|||
COSTS_N_INSNS (8), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (8), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
|
||||
1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
core_memcpy,
|
||||
core_memset,
|
||||
1, /* scalar_stmt_cost. */
|
||||
|
@ -51830,34 +51855,47 @@ has_dispatch (rtx_insn *insn, int action)
|
|||
/* Implementation of reassociation_width target hook used by
|
||||
reassoc phase to identify parallelism level in reassociated
|
||||
tree. Statements tree_code is passed in OPC. Arguments type
|
||||
is passed in MODE.
|
||||
|
||||
Currently parallel reassociation is enabled for Atom
|
||||
processors only and we set reassociation width to be 2
|
||||
because Atom may issue up to 2 instructions per cycle.
|
||||
|
||||
Return value should be fixed if parallel reassociation is
|
||||
enabled for other processors. */
|
||||
is passed in MODE. */
|
||||
|
||||
static int
|
||||
ix86_reassociation_width (unsigned int, machine_mode mode)
|
||||
ix86_reassociation_width (unsigned int op, machine_mode mode)
|
||||
{
|
||||
int width = 1;
|
||||
/* Vector part. */
|
||||
if (VECTOR_MODE_P (mode))
|
||||
{
|
||||
if (TARGET_VECTOR_PARALLEL_EXECUTION)
|
||||
return 2;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
int div = 1;
|
||||
if (INTEGRAL_MODE_P (mode))
|
||||
width = ix86_cost->reassoc_vec_int;
|
||||
else if (FLOAT_MODE_P (mode))
|
||||
width = ix86_cost->reassoc_vec_fp;
|
||||
|
||||
if (width == 1)
|
||||
return 1;
|
||||
|
||||
/* Integer vector instructions execute in FP unit
|
||||
and can execute 3 additions and one multiplication per cycle. */
|
||||
if (ix86_tune == PROCESSOR_ZNVER1 && INTEGRAL_MODE_P (mode)
|
||||
&& op != PLUS && op != MINUS)
|
||||
return 1;
|
||||
|
||||
/* Account for targets that splits wide vectors into multiple parts. */
|
||||
if (TARGET_AVX128_OPTIMAL && GET_MODE_BITSIZE (mode) > 128)
|
||||
div = GET_MODE_BITSIZE (mode) / 128;
|
||||
else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
|
||||
div = GET_MODE_BITSIZE (mode) / 64;
|
||||
width = (width + div - 1) / div;
|
||||
}
|
||||
/* Scalar part. */
|
||||
if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
|
||||
return 2;
|
||||
else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
|
||||
return ((TARGET_64BIT && ix86_tune == PROCESSOR_HASWELL)? 4 : 2);
|
||||
else
|
||||
return 1;
|
||||
else if (INTEGRAL_MODE_P (mode))
|
||||
width = ix86_cost->reassoc_int;
|
||||
else if (FLOAT_MODE_P (mode))
|
||||
width = ix86_cost->reassoc_fp;
|
||||
|
||||
/* Avoid using too many registers in 32bit mode. */
|
||||
if (!TARGET_64BIT && width > 2)
|
||||
width = 2;
|
||||
return width;
|
||||
}
|
||||
|
||||
/* ??? No autovectorization into MMX or 3DNOW until we can reliably
|
||||
|
|
|
@ -257,6 +257,13 @@ struct processor_costs {
|
|||
const int fsqrt; /* cost of FSQRT instruction. */
|
||||
/* Specify what algorithm
|
||||
to use for stringops on unknown size. */
|
||||
const int reassoc_int, reassoc_fp, reassoc_vec_int, reassoc_vec_fp;
|
||||
/* Specify reassociation width for integer,
|
||||
fp, vector integer and vector fp
|
||||
operations. Generally should correspond
|
||||
to number of instructions executed in
|
||||
parallel. See also
|
||||
ix86_reassociation_width. */
|
||||
struct stringop_algs *memcpy, *memset;
|
||||
const int scalar_stmt_cost; /* Cost of any scalar operation, excluding
|
||||
load and store. */
|
||||
|
@ -466,8 +473,6 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
|
|||
ix86_tune_features[X86_TUNE_USE_VECTOR_CONVERTS]
|
||||
#define TARGET_SLOW_PSHUFB \
|
||||
ix86_tune_features[X86_TUNE_SLOW_PSHUFB]
|
||||
#define TARGET_VECTOR_PARALLEL_EXECUTION \
|
||||
ix86_tune_features[X86_TUNE_VECTOR_PARALLEL_EXECUTION]
|
||||
#define TARGET_AVOID_4BYTE_PREFIXES \
|
||||
ix86_tune_features[X86_TUNE_AVOID_4BYTE_PREFIXES]
|
||||
#define TARGET_FUSE_CMP_AND_BRANCH_32 \
|
||||
|
@ -488,10 +493,6 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
|
|||
ix86_tune_features[X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL]
|
||||
#define TARGET_AVX128_OPTIMAL \
|
||||
ix86_tune_features[X86_TUNE_AVX128_OPTIMAL]
|
||||
#define TARGET_REASSOC_INT_TO_PARALLEL \
|
||||
ix86_tune_features[X86_TUNE_REASSOC_INT_TO_PARALLEL]
|
||||
#define TARGET_REASSOC_FP_TO_PARALLEL \
|
||||
ix86_tune_features[X86_TUNE_REASSOC_FP_TO_PARALLEL]
|
||||
#define TARGET_GENERAL_REGS_SSE_SPILL \
|
||||
ix86_tune_features[X86_TUNE_GENERAL_REGS_SSE_SPILL]
|
||||
#define TARGET_AVOID_MEM_OPND_FOR_CMOVE \
|
||||
|
|
|
@ -117,16 +117,6 @@ DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS, "fuse_cmp_and_branch_soflags",
|
|||
DEF_TUNE (X86_TUNE_FUSE_ALU_AND_BRANCH, "fuse_alu_and_branch",
|
||||
m_SANDYBRIDGE | m_HASWELL)
|
||||
|
||||
/* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
|
||||
during reassociation of integer computation. */
|
||||
DEF_TUNE (X86_TUNE_REASSOC_INT_TO_PARALLEL, "reassoc_int_to_parallel",
|
||||
m_BONNELL)
|
||||
|
||||
/* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
|
||||
during reassociation of fp computation. */
|
||||
DEF_TUNE (X86_TUNE_REASSOC_FP_TO_PARALLEL, "reassoc_fp_to_parallel",
|
||||
m_BONNELL | m_SILVERMONT | m_HASWELL | m_KNL | m_KNM |m_INTEL | m_BDVER1
|
||||
| m_BDVER2 | m_ZNVER1 | m_GENERIC)
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Function prologue, epilogue and function calling sequences. */
|
||||
|
@ -391,11 +381,6 @@ DEF_TUNE (X86_TUNE_USE_VECTOR_CONVERTS, "use_vector_converts", m_AMDFAM10)
|
|||
DEF_TUNE (X86_TUNE_SLOW_PSHUFB, "slow_pshufb",
|
||||
m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL)
|
||||
|
||||
/* X86_TUNE_VECTOR_PARALLEL_EXECUTION: Indicates tunings with ability to
|
||||
execute 2 or more vector instructions in parallel. */
|
||||
DEF_TUNE (X86_TUNE_VECTOR_PARALLEL_EXECUTION, "vec_parallel",
|
||||
m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
|
||||
|
||||
/* X86_TUNE_AVOID_4BYTE_PREFIXES: Avoid instructions requiring 4+ bytes of prefixes. */
|
||||
DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes",
|
||||
m_SILVERMONT | m_INTEL)
|
||||
|
|
Loading…
Add table
Reference in a new issue