i386.h (TARGET_SAHF): New define.
* config/i386/i386.h (TARGET_SAHF): New define. * config/i386/i386.c (ix86_tune_features) [X86_TUNE_USE_SAHF]: Also enable for m_K8, m_AMDFAM10 and m_CORE2. (x86_sahf): New global variable. (override_options): Add PTA_NO_SAHF to pta_flags enum. Recode pta_flags masks using shifts. Add PTA_NO_SAHF to x86_64 and nocona processor flags. Set x86_sahf when PTA_NO_SAHF is not set in processor flags. Do not unconditionally disable TARGET_USE_SAHF for 64-bit. (ix86_fp_comparison_sahf_cost): Return high value for !TARGET_SAHF. (ix86_expand_fp_compare): Check for TARGET_CMOVE or TARGET_SAHF when expanding fcomi/sahf based tests. (ix86_emit_fp_unordered_jump): Check for TARGET_SAHF when expanding sahf based alternative. Emit sahf based sequence when optimizing for code size. * config/i386/i386.md (x86_sahf_1): Do not disable for TARGET_64BIT, enable for TARGET_SAHF. From-SVN: r122705
This commit is contained in:
parent
0c705abc64
commit
3c2d980c94
4 changed files with 62 additions and 32 deletions
|
@ -1,3 +1,23 @@
|
|||
2007-03-08 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
* config/i386/i386.h (TARGET_SAHF): New define.
|
||||
* config/i386/i386.c (ix86_tune_features) [X86_TUNE_USE_SAHF]:
|
||||
Also enable for m_K8, m_AMDFAM10 and m_CORE2.
|
||||
(x86_sahf): New global variable.
|
||||
(override_options): Add PTA_NO_SAHF to pta_flags enum. Recode
|
||||
pta_flags masks using shifts. Add PTA_NO_SAHF to x86_64 and
|
||||
nocona processor flags. Set x86_sahf when PTA_NO_SAHF is not set
|
||||
in processor flags. Do not unconditionally disable TARGET_USE_SAHF
|
||||
for 64-bit.
|
||||
(ix86_fp_comparison_sahf_cost): Return high value for !TARGET_SAHF.
|
||||
(ix86_expand_fp_compare): Check for TARGET_CMOVE or TARGET_SAHF
|
||||
when expanding fcomi/sahf based tests.
|
||||
(ix86_emit_fp_unordered_jump): Check for TARGET_SAHF when
|
||||
expanding sahf based alternative. Emit sahf based sequence when
|
||||
optimizing for code size.
|
||||
* config/i386/i386.md (x86_sahf_1): Do not disable for
|
||||
TARGET_64BIT, enable for TARGET_SAHF.
|
||||
|
||||
2007-03-08 Martin Michlmayr <tbm@cyrius.com>
|
||||
|
||||
* tree-ssa-coalesce.c (fail_abnormal_edge_coalesce): Remove
|
||||
|
@ -505,7 +525,7 @@
|
|||
of comparison of non-null ADDR_EXPR against null.
|
||||
|
||||
2007-03-05 Richard Guenther <rguenther@suse.de>
|
||||
Dorit Nuzman <dorit@il.ibm.com>
|
||||
Dorit Nuzman <dorit@il.ibm.com>
|
||||
|
||||
PR tree-optimization/26420
|
||||
* tree-vectorizer.c (vectorize_loops): Bail out early if there
|
||||
|
@ -515,9 +535,9 @@
|
|||
|
||||
2007-03-05 Revital Eres <eres@il.ibm.com>
|
||||
|
||||
* gcc.dg/var-expand1.c: New test.
|
||||
* loop-unroll.c (analyze_insn_to_expand_var): Add dump info
|
||||
when an accumulator is expanded.
|
||||
* gcc.dg/var-expand1.c: New test.
|
||||
* loop-unroll.c (analyze_insn_to_expand_var): Add dump info
|
||||
when an accumulator is expanded.
|
||||
|
||||
2007-03-04 Manuel Lopez-Ibanez <manu@gcc.gnu.org>
|
||||
|
||||
|
|
|
@ -1039,11 +1039,11 @@ unsigned int ix86_tune_features[X86_TUNE_LAST] = {
|
|||
~m_386,
|
||||
|
||||
/* X86_TUNE_USE_SAHF */
|
||||
m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32,
|
||||
/* | m_GENERIC | m_ATHLON_K8 ? */
|
||||
m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
|
||||
| m_NOCONA | m_CORE2 | m_GENERIC32,
|
||||
|
||||
/* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
|
||||
partial dependencies */
|
||||
partial dependencies. */
|
||||
m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
|
||||
| m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
|
||||
|
||||
|
@ -1441,6 +1441,11 @@ int x86_prefetch_sse;
|
|||
/* true if cmpxchg16b is supported. */
|
||||
int x86_cmpxchg16b;
|
||||
|
||||
/* true if sahf is supported. Early Intel CPUs with Intel 64
|
||||
lacked LAHF and SAHF instructions supported by AMD64 until
|
||||
introduction of Pentium 4 G1 step in December 2005. */
|
||||
int x86_sahf;
|
||||
|
||||
/* ix86_regparm_string as a number */
|
||||
static int ix86_regparm;
|
||||
|
||||
|
@ -1884,19 +1889,20 @@ override_options (void)
|
|||
const enum processor_type processor;
|
||||
const enum pta_flags
|
||||
{
|
||||
PTA_SSE = 1,
|
||||
PTA_SSE2 = 2,
|
||||
PTA_SSE3 = 4,
|
||||
PTA_MMX = 8,
|
||||
PTA_PREFETCH_SSE = 16,
|
||||
PTA_3DNOW = 32,
|
||||
PTA_3DNOW_A = 64,
|
||||
PTA_64BIT = 128,
|
||||
PTA_SSSE3 = 256,
|
||||
PTA_CX16 = 512,
|
||||
PTA_POPCNT = 1024,
|
||||
PTA_ABM = 2048,
|
||||
PTA_SSE4A = 4096
|
||||
PTA_SSE = 1 << 0,
|
||||
PTA_SSE2 = 1 << 1,
|
||||
PTA_SSE3 = 1 << 2,
|
||||
PTA_MMX = 1 << 3,
|
||||
PTA_PREFETCH_SSE = 1 << 4,
|
||||
PTA_3DNOW = 1 << 5,
|
||||
PTA_3DNOW_A = 1 << 6,
|
||||
PTA_64BIT = 1 << 7,
|
||||
PTA_SSSE3 = 1 << 8,
|
||||
PTA_CX16 = 1 << 9,
|
||||
PTA_POPCNT = 1 << 10,
|
||||
PTA_ABM = 1 << 11,
|
||||
PTA_SSE4A = 1 << 12,
|
||||
PTA_NO_SAHF = 1 << 13
|
||||
} flags;
|
||||
}
|
||||
const processor_alias_table[] =
|
||||
|
@ -1923,7 +1929,8 @@ override_options (void)
|
|||
{"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
|
||||
| PTA_MMX | PTA_PREFETCH_SSE},
|
||||
{"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
|
||||
| PTA_MMX | PTA_PREFETCH_SSE | PTA_CX16},
|
||||
| PTA_MMX | PTA_PREFETCH_SSE
|
||||
| PTA_CX16 | PTA_NO_SAHF},
|
||||
{"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
|
||||
| PTA_64BIT | PTA_MMX
|
||||
| PTA_PREFETCH_SSE | PTA_CX16},
|
||||
|
@ -1943,7 +1950,7 @@ override_options (void)
|
|||
{"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
|
||||
| PTA_3DNOW_A | PTA_SSE},
|
||||
{"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
|
||||
| PTA_SSE | PTA_SSE2 },
|
||||
| PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
|
||||
{"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
|
||||
| PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
|
||||
{"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
|
||||
|
@ -2146,6 +2153,8 @@ override_options (void)
|
|||
if (processor_alias_table[i].flags & PTA_SSE4A
|
||||
&& !(target_flags_explicit & MASK_SSE4A))
|
||||
target_flags |= MASK_SSE4A;
|
||||
if (!(processor_alias_table[i].flags & PTA_NO_SAHF))
|
||||
x86_sahf = true;
|
||||
if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
|
||||
error ("CPU you selected does not support x86-64 "
|
||||
"instruction set");
|
||||
|
@ -2465,10 +2474,6 @@ override_options (void)
|
|||
if (TARGET_SSE)
|
||||
TARGET_CMOVE = 1;
|
||||
|
||||
/* ??? Any idea why this is unconditionally disabled for 64-bit? */
|
||||
if (TARGET_64BIT)
|
||||
TARGET_USE_SAHF = 0;
|
||||
|
||||
/* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
|
||||
{
|
||||
char *p;
|
||||
|
@ -10976,7 +10981,7 @@ ix86_fp_comparison_sahf_cost (enum rtx_code code)
|
|||
enum rtx_code bypass_code, first_code, second_code;
|
||||
/* Return arbitrarily high cost when instruction is not preferred - this
|
||||
avoids gcc from using it. */
|
||||
if (!TARGET_USE_SAHF && !optimize_size)
|
||||
if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
|
||||
return 1024;
|
||||
ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
|
||||
return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
|
||||
|
@ -11023,7 +11028,8 @@ ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
|
|||
ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
|
||||
|
||||
/* Do fcomi/sahf based test when profitable. */
|
||||
if ((bypass_code == UNKNOWN || bypass_test)
|
||||
if ((TARGET_CMOVE || TARGET_SAHF)
|
||||
&& (bypass_code == UNKNOWN || bypass_test)
|
||||
&& (second_code == UNKNOWN || second_test)
|
||||
&& ix86_fp_comparison_arithmetics_cost (code) > cost)
|
||||
{
|
||||
|
@ -21007,7 +21013,7 @@ ix86_emit_fp_unordered_jump (rtx label)
|
|||
|
||||
emit_insn (gen_x86_fnstsw_1 (reg));
|
||||
|
||||
if (TARGET_USE_SAHF)
|
||||
if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
|
||||
{
|
||||
emit_insn (gen_x86_sahf_1 (reg));
|
||||
|
||||
|
|
|
@ -191,7 +191,7 @@ enum ix86_tune_indices {
|
|||
X86_TUNE_DEEP_BRANCH_PREDICTION,
|
||||
X86_TUNE_BRANCH_PREDICTION_HINTS,
|
||||
X86_TUNE_DOUBLE_WITH_ADD,
|
||||
X86_TUNE_USE_SAHF, /* && !TARGET_64BIT */
|
||||
X86_TUNE_USE_SAHF,
|
||||
X86_TUNE_MOVX,
|
||||
X86_TUNE_PARTIAL_REG_STALL,
|
||||
X86_TUNE_PARTIAL_FLAG_REG_STALL,
|
||||
|
@ -330,6 +330,9 @@ extern int x86_prefetch_sse;
|
|||
extern int x86_cmpxchg16b;
|
||||
#define TARGET_CMPXCHG16B x86_cmpxchg16b
|
||||
|
||||
extern int x86_sahf;
|
||||
#define TARGET_SAHF x86_sahf
|
||||
|
||||
#define ASSEMBLER_DIALECT (ix86_asm_dialect)
|
||||
|
||||
#define TARGET_SSE_MATH ((ix86_fpmath & FPMATH_SSE) != 0)
|
||||
|
|
|
@ -983,8 +983,9 @@
|
|||
|
||||
(define_insn "x86_sahf_1"
|
||||
[(set (reg:CC FLAGS_REG)
|
||||
(unspec:CC [(match_operand:HI 0 "register_operand" "a")] UNSPEC_SAHF))]
|
||||
"!TARGET_64BIT"
|
||||
(unspec:CC [(match_operand:HI 0 "register_operand" "a")]
|
||||
UNSPEC_SAHF))]
|
||||
"TARGET_SAHF"
|
||||
"sahf"
|
||||
[(set_attr "length" "1")
|
||||
(set_attr "athlon_decode" "vector")
|
||||
|
|
Loading…
Add table
Reference in a new issue