i386.c: Tidy processor feature bitmasks.
* config/i386/i386.c: Tidy processor feature bitmasks. (m_P4_NOCONA): New. From-SVN: r176215
This commit is contained in:
parent
10b75750f2
commit
3a4ffde68c
3 changed files with 62 additions and 70 deletions
|
@ -1,3 +1,8 @@
|
|||
2011-07-12 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
* config/i386/i386.c: Tidy processor feature bitmasks.
|
||||
(m_P4_NOCONA): New.
|
||||
|
||||
2011-07-12 Andrew Pinski <pinskia@gmail.com>
|
||||
|
||||
PR rtl-opt/49474
|
||||
|
|
|
@ -1880,30 +1880,31 @@ const struct processor_costs *ix86_cost = &pentium_cost;
|
|||
#define m_486 (1<<PROCESSOR_I486)
|
||||
#define m_PENT (1<<PROCESSOR_PENTIUM)
|
||||
#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
|
||||
#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
|
||||
#define m_NOCONA (1<<PROCESSOR_NOCONA)
|
||||
#define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
|
||||
#define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
|
||||
#define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
|
||||
#define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
|
||||
#define m_COREI7 (m_COREI7_32 | m_COREI7_64)
|
||||
#define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
|
||||
#define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
|
||||
#define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
|
||||
#define m_ATOM (1<<PROCESSOR_ATOM)
|
||||
#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
|
||||
#define m_NOCONA (1<<PROCESSOR_NOCONA)
|
||||
#define m_P4_NOCONA (m_PENT4 | m_NOCONA)
|
||||
#define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
|
||||
#define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
|
||||
#define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
|
||||
#define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
|
||||
#define m_COREI7 (m_COREI7_32 | m_COREI7_64)
|
||||
#define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
|
||||
#define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
|
||||
#define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
|
||||
#define m_ATOM (1<<PROCESSOR_ATOM)
|
||||
|
||||
#define m_GEODE (1<<PROCESSOR_GEODE)
|
||||
#define m_K6 (1<<PROCESSOR_K6)
|
||||
#define m_K6_GEODE (m_K6 | m_GEODE)
|
||||
#define m_K8 (1<<PROCESSOR_K8)
|
||||
#define m_ATHLON (1<<PROCESSOR_ATHLON)
|
||||
#define m_ATHLON_K8 (m_K8 | m_ATHLON)
|
||||
#define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
|
||||
#define m_BDVER1 (1<<PROCESSOR_BDVER1)
|
||||
#define m_BDVER2 (1<<PROCESSOR_BDVER2)
|
||||
#define m_BTVER1 (1<<PROCESSOR_BTVER1)
|
||||
#define m_BDVER (m_BDVER1 | m_BDVER2)
|
||||
#define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
|
||||
#define m_GEODE (1<<PROCESSOR_GEODE)
|
||||
#define m_K6 (1<<PROCESSOR_K6)
|
||||
#define m_K6_GEODE (m_K6 | m_GEODE)
|
||||
#define m_K8 (1<<PROCESSOR_K8)
|
||||
#define m_ATHLON (1<<PROCESSOR_ATHLON)
|
||||
#define m_ATHLON_K8 (m_K8 | m_ATHLON)
|
||||
#define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
|
||||
#define m_BDVER1 (1<<PROCESSOR_BDVER1)
|
||||
#define m_BDVER2 (1<<PROCESSOR_BDVER2)
|
||||
#define m_BDVER (m_BDVER1 | m_BDVER2)
|
||||
#define m_BTVER1 (1<<PROCESSOR_BTVER1)
|
||||
#define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
|
||||
|
||||
#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
|
||||
#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
|
||||
|
@ -1922,18 +1923,16 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
|
|||
negatively, so enabling for Generic64 seems like good code size
|
||||
tradeoff. We can't enable it for 32bit generic because it does not
|
||||
work well with PPro base chips. */
|
||||
m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2I7_64 | m_GENERIC64,
|
||||
m_386 | m_CORE2I7_64 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC64,
|
||||
|
||||
/* X86_TUNE_PUSH_MEMORY */
|
||||
m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
|
||||
| m_NOCONA | m_CORE2I7 | m_GENERIC,
|
||||
m_386 | m_P4_NOCONA | m_CORE2I7 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
|
||||
|
||||
/* X86_TUNE_ZERO_EXTEND_WITH_AND */
|
||||
m_486 | m_PENT,
|
||||
|
||||
/* X86_TUNE_UNROLL_STRLEN */
|
||||
m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
|
||||
| m_CORE2I7 | m_GENERIC,
|
||||
m_486 | m_PENT | m_PPRO | m_ATOM | m_CORE2I7 | m_K6 | m_AMD_MULTIPLE | m_GENERIC,
|
||||
|
||||
/* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
|
||||
on simulation result. But after P4 was made, no performance benefit
|
||||
|
@ -1945,13 +1944,11 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
|
|||
~m_386,
|
||||
|
||||
/* X86_TUNE_USE_SAHF */
|
||||
m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1
|
||||
| m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
|
||||
m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC,
|
||||
|
||||
/* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
|
||||
partial dependencies. */
|
||||
m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
|
||||
| m_CORE2I7 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
|
||||
m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC,
|
||||
|
||||
/* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
|
||||
register stalls on Generic32 compilation setting as well. However
|
||||
|
@ -1970,13 +1967,13 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
|
|||
m_386 | m_486 | m_K6_GEODE,
|
||||
|
||||
/* X86_TUNE_USE_SIMODE_FIOP */
|
||||
~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2I7 | m_GENERIC),
|
||||
~(m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC),
|
||||
|
||||
/* X86_TUNE_USE_MOV0 */
|
||||
m_K6,
|
||||
|
||||
/* X86_TUNE_USE_CLTD */
|
||||
~(m_PENT | m_ATOM | m_K6 | m_CORE2I7 | m_GENERIC),
|
||||
~(m_PENT | m_CORE2I7 | m_ATOM | m_K6 | m_GENERIC),
|
||||
|
||||
/* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
|
||||
m_PENT4,
|
||||
|
@ -1991,14 +1988,13 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
|
|||
~(m_PENT | m_PPRO),
|
||||
|
||||
/* X86_TUNE_PROMOTE_QIMODE */
|
||||
m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
|
||||
| m_CORE2I7 | m_GENERIC /* | m_PENT4 ? */,
|
||||
m_386 | m_486 | m_PENT | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
|
||||
|
||||
/* X86_TUNE_FAST_PREFIX */
|
||||
~(m_PENT | m_486 | m_386),
|
||||
~(m_386 | m_486 | m_PENT),
|
||||
|
||||
/* X86_TUNE_SINGLE_STRINGOP */
|
||||
m_386 | m_PENT4 | m_NOCONA,
|
||||
m_386 | m_P4_NOCONA,
|
||||
|
||||
/* X86_TUNE_QIMODE_MATH */
|
||||
~0,
|
||||
|
@ -2033,11 +2029,10 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
|
|||
|
||||
/* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
|
||||
for DFmode copies */
|
||||
~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
|
||||
| m_GENERIC | m_GEODE),
|
||||
~(m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
|
||||
|
||||
/* X86_TUNE_PARTIAL_REG_DEPENDENCY */
|
||||
m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
|
||||
m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
|
||||
|
||||
/* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
|
||||
conflict here in between PPro/Pentium4 based chips that thread 128bit
|
||||
|
@ -2048,14 +2043,13 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
|
|||
shows that disabling this option on P4 brings over 20% SPECfp regression,
|
||||
while enabling it on K8 brings roughly 2.4% regression that can be partly
|
||||
masked by careful scheduling of moves. */
|
||||
m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7 | m_GENERIC | m_AMDFAM10
|
||||
| m_BDVER,
|
||||
m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMDFAM10 | m_BDVER | m_GENERIC,
|
||||
|
||||
/* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
|
||||
m_AMDFAM10 | m_BDVER | m_BTVER1 | m_COREI7,
|
||||
m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER1,
|
||||
|
||||
/* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
|
||||
m_BDVER | m_COREI7,
|
||||
m_COREI7 | m_BDVER,
|
||||
|
||||
/* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
|
||||
m_BDVER ,
|
||||
|
@ -2070,16 +2064,16 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
|
|||
m_AMD_MULTIPLE,
|
||||
|
||||
/* X86_TUNE_SSE_LOAD0_BY_PXOR */
|
||||
m_PPRO | m_PENT4 | m_NOCONA,
|
||||
m_PPRO | m_P4_NOCONA,
|
||||
|
||||
/* X86_TUNE_MEMORY_MISMATCH_STALL */
|
||||
m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
|
||||
m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
|
||||
|
||||
/* X86_TUNE_PROLOGUE_USING_MOVE */
|
||||
m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
|
||||
m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
|
||||
|
||||
/* X86_TUNE_EPILOGUE_USING_MOVE */
|
||||
m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
|
||||
m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
|
||||
|
||||
/* X86_TUNE_SHIFT1 */
|
||||
~m_486,
|
||||
|
@ -2095,34 +2089,31 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
|
|||
|
||||
/* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
|
||||
than 4 branch instructions in the 16 byte window. */
|
||||
m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2I7
|
||||
| m_GENERIC,
|
||||
m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
|
||||
|
||||
/* X86_TUNE_SCHEDULE */
|
||||
m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2I7
|
||||
| m_GENERIC,
|
||||
m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
|
||||
|
||||
/* X86_TUNE_USE_BT */
|
||||
m_AMD_MULTIPLE | m_ATOM | m_CORE2I7 | m_GENERIC,
|
||||
m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
|
||||
|
||||
/* X86_TUNE_USE_INCDEC */
|
||||
~(m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC | m_ATOM),
|
||||
~(m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GENERIC),
|
||||
|
||||
/* X86_TUNE_PAD_RETURNS */
|
||||
m_AMD_MULTIPLE | m_CORE2I7 | m_GENERIC,
|
||||
m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC,
|
||||
|
||||
/* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
|
||||
m_ATOM,
|
||||
|
||||
/* X86_TUNE_EXT_80387_CONSTANTS */
|
||||
m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
|
||||
| m_CORE2I7 | m_GENERIC,
|
||||
m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
|
||||
|
||||
/* X86_TUNE_SHORTEN_X87_SSE */
|
||||
~m_K8,
|
||||
|
||||
/* X86_TUNE_AVOID_VECTOR_DECODE */
|
||||
m_K8 | m_CORE2I7_64 | m_GENERIC64,
|
||||
m_CORE2I7_64 | m_K8 | m_GENERIC64,
|
||||
|
||||
/* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
|
||||
and SImode multiply, but 386 and 486 do HImode multiply faster. */
|
||||
|
@ -2130,11 +2121,11 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
|
|||
|
||||
/* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
|
||||
vector path on AMD machines. */
|
||||
m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER | m_BTVER1,
|
||||
m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
|
||||
|
||||
/* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
|
||||
machines. */
|
||||
m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER | m_BTVER1,
|
||||
m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
|
||||
|
||||
/* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
|
||||
than a MOV. */
|
||||
|
@ -2151,7 +2142,7 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
|
|||
|
||||
/* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
|
||||
from FP to FP. */
|
||||
m_AMDFAM10 | m_CORE2I7 | m_GENERIC,
|
||||
m_CORE2I7 | m_AMDFAM10 | m_GENERIC,
|
||||
|
||||
/* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
|
||||
from integer to FP. */
|
||||
|
@ -2160,7 +2151,7 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
|
|||
/* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
|
||||
with a subsequent conditional jump instruction into a single
|
||||
compare-and-branch uop. */
|
||||
m_BDVER ,
|
||||
m_BDVER,
|
||||
|
||||
/* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
|
||||
will impact LEA instruction selection. */
|
||||
|
@ -2203,12 +2194,10 @@ static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
|
|||
};
|
||||
|
||||
static const unsigned int x86_accumulate_outgoing_args
|
||||
= m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
|
||||
| m_GENERIC;
|
||||
= m_PPRO | m_P4_NOCONA | m_ATOM | m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC;
|
||||
|
||||
static const unsigned int x86_arch_always_fancy_math_387
|
||||
= m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
|
||||
| m_NOCONA | m_CORE2I7 | m_GENERIC;
|
||||
= m_PENT | m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC;
|
||||
|
||||
static const unsigned int x86_avx256_split_unaligned_load
|
||||
= m_COREI7 | m_GENERIC;
|
||||
|
|
|
@ -10963,9 +10963,7 @@
|
|||
(set_attr "modrm" "0")])
|
||||
|
||||
(define_expand "indirect_jump"
|
||||
[(set (pc) (match_operand 0 "nonimmediate_operand" ""))]
|
||||
""
|
||||
"")
|
||||
[(set (pc) (match_operand 0 "nonimmediate_operand" ""))])
|
||||
|
||||
(define_insn "*indirect_jump"
|
||||
[(set (pc) (match_operand:P 0 "nonimmediate_operand" "rm"))]
|
||||
|
|
Loading…
Add table
Reference in a new issue