ChangeLog entry:

* coretypes (MEMMODEL_MASK): New.
        * builtins.c (get_memmodel): Add val. Call target.memmodel_check
        and return new variable.
        (expand_builtin_atomic_exchange):  Mask memmodel values.
        (expand_builtin_atomic_compare_exchange): Ditto.
        (expand_builtin_atomic_load): Ditto.
        (expand_builtin_atomic_store): Ditto.
        (expand_builtin_atomic_clear): Ditto.
        * doc/extend.texi: Mention port-dependent memory model flags.
        * config/i386/cpuid.h (bit_HLE): New.
        * config/i386/driver-i386.c (host_detect_local_cpu): Detect
        HLE support.
        * config/i386/i386-protos.h (ix86_generate_hle_prefix): New.
        * config/i386/i386-c.c (ix86_target_macros_internal): Set
        HLE defines.
        (ix86_target_string)<-mhle>: New.
        (ix86_valid_target_attribute_inner_p)<OPT_mhle>: Ditto.
        * config/i386/i386.c (ix86_target_string)<OPTION_MASK_ISA_HLE>:
        New.
        (ix86_valid_target_attribute_inner_p)<OPT_mhle>: Ditto.
        (ix86_option_override_internal)<PTA_HLE>: New switch, set it
        enabled for generic, generic64 and core-avx2.
        (ix86_print_operand): Generate HLE lock prefixes.
        (ix86_memmodel_check): New.
        (TARGET_MEMMODEL_CHECK): Ditto.
        * config/i386/i386.h (OPTION_ISA_HLE): Ditto.
        (IX86_HLE_ACQUIRE): Ditto.
        (IX86_HLE_RELEASE): Ditto.
        * config/i386/i386.h (ix86_generate_hle_prefix): Ditto.
        * config/i386/i386.opt (mhle): Ditto.
        * config/i386/sync.md(atomic_compare_and_swap<mode>): Pass
        success model to instruction emitter.
        (atomic_fetch_add<mode>): Ditto.
        (atomic_exchange<mode>): Ditto.
        (atomic_add<mode>): Ditto.
        (atomic_sub<mode>): Ditto.
        (atomic_<code><mode>): Ditto.
        (*atomic_compare_and_swap_doubledi_pic): Ditto.
        (atomic_compare_and_swap_single<mode>): Define and use argument
        for success model.
        (atomic_compare_and_swap_double<mode>): Ditto.
        * configure.ac: Check if assembler support HLE prefixes.
        * configure: Regenerate.
        * config.in: Ditto.

testsuite/ChangeLog entry:

        * gcc.target/i386/hle-cmpxchg-acq-1.c: New.
        * gcc.target/i386/hle-cmpxchg-rel-1.c: Ditto.
        * gcc.target/i386/hle-add-acq-1.c: Ditto.
        * gcc.target/i386/hle-add-rel-1.c: Ditto.
        * gcc.target/i386/hle-and-acq-1.c: Ditto.
        * gcc.target/i386/hle-and-rel-1.c: Ditto.
        * gcc.target/i386/hle-or-acq-1.c: Ditto.
        * gcc.target/i386/hle-or-rel-1.c: Ditto.
        * gcc.target/i386/hle-sub-acq-1.c: Ditto.
        * gcc.target/i386/hle-sub-rel-1.c: Ditto.
        * gcc.target/i386/hle-xadd-acq-1.c: Ditto.
        * gcc.target/i386/hle-xadd-rel-1.c: Ditto.
        * gcc.target/i386/hle-xchg-acq-1.c: Ditto.
        * gcc.target/i386/hle-xchg-rel-1.c: Ditto.
        * gcc.target/i386/hle-xor-acq-1.c: Ditto.
        * gcc.target/i386/hle-xor-rel-1.c: Ditto.



Co-Authored-By: Andi Kleen <ak@linux.intel.com>

From-SVN: r187051
This commit is contained in:
Kirill Yukhin 2012-05-02 15:32:01 +00:00 committed by Kirill Yukhin
parent 68e7284038
commit 5dcfdccd32
35 changed files with 415 additions and 37 deletions

View file

@ -1,3 +1,51 @@
2012-05-02 Kirill Yukhin <kirill.yukhin@intel.com>
Andi Kleen <ak@linux.intel.com>
* coretypes (MEMMODEL_MASK): New.
* builtins.c (get_memmodel): Add val. Call target.memmodel_check
and return new variable.
(expand_builtin_atomic_exchange): Mask memmodel values.
(expand_builtin_atomic_compare_exchange): Ditto.
(expand_builtin_atomic_load): Ditto.
(expand_builtin_atomic_store): Ditto.
(expand_builtin_atomic_clear): Ditto.
* doc/extend.texi: Mention port-dependent memory model flags.
* config/i386/cpuid.h (bit_HLE): New.
* config/i386/driver-i386.c (host_detect_local_cpu): Detect
HLE support.
* config/i386/i386-protos.h (ix86_generate_hle_prefix): New.
* config/i386/i386-c.c (ix86_target_macros_internal): Set
HLE defines.
(ix86_target_string)<-mhle>: New.
(ix86_valid_target_attribute_inner_p)<OPT_mhle>: Ditto.
* config/i386/i386.c (ix86_target_string)<OPTION_MASK_ISA_HLE>:
New.
(ix86_valid_target_attribute_inner_p)<OPT_mhle>: Ditto.
(ix86_option_override_internal)<PTA_HLE>: New switch, set it
enabled for generic, generic64 and core-avx2.
(ix86_print_operand): Generate HLE lock prefixes.
(ix86_memmodel_check): New.
(TARGET_MEMMODEL_CHECK): Ditto.
* config/i386/i386.h (OPTION_ISA_HLE): Ditto.
(IX86_HLE_ACQUIRE): Ditto.
(IX86_HLE_RELEASE): Ditto.
* config/i386/i386.h (ix86_generate_hle_prefix): Ditto.
* config/i386/i386.opt (mhle): Ditto.
* config/i386/sync.md(atomic_compare_and_swap<mode>): Pass
success model to instruction emitter.
(atomic_fetch_add<mode>): Ditto.
(atomic_exchange<mode>): Ditto.
(atomic_add<mode>): Ditto.
(atomic_sub<mode>): Ditto.
(atomic_<code><mode>): Ditto.
(*atomic_compare_and_swap_doubledi_pic): Ditto.
(atomic_compare_and_swap_single<mode>): Define and use argument
for success model.
(atomic_compare_and_swap_double<mode>): Ditto.
* configure.ac: Check if assembler support HLE prefixes.
* configure: Regenerate.
* config.in: Ditto.
2012-05-02 Steven Bosscher <steven@gcc.gnu.org>
PR middle-end/53153

View file

@ -5338,6 +5338,7 @@ static enum memmodel
get_memmodel (tree exp)
{
rtx op;
unsigned HOST_WIDE_INT val;
/* If the parameter is not a constant, it's a run time value so we'll just
convert it to MEMMODEL_SEQ_CST to avoid annoying runtime checking. */
@ -5345,13 +5346,25 @@ get_memmodel (tree exp)
return MEMMODEL_SEQ_CST;
op = expand_normal (exp);
if (INTVAL (op) < 0 || INTVAL (op) >= MEMMODEL_LAST)
val = INTVAL (op);
if (targetm.memmodel_check)
val = targetm.memmodel_check (val);
else if (val & ~MEMMODEL_MASK)
{
warning (OPT_Winvalid_memory_model,
"Unknown architecture specifier in memory model to builtin.");
return MEMMODEL_SEQ_CST;
}
if ((INTVAL(op) & MEMMODEL_MASK) >= MEMMODEL_LAST)
{
warning (OPT_Winvalid_memory_model,
"invalid memory model argument to builtin");
return MEMMODEL_SEQ_CST;
}
return (enum memmodel) INTVAL (op);
return (enum memmodel) val;
}
/* Expand the __atomic_exchange intrinsic:
@ -5366,7 +5379,7 @@ expand_builtin_atomic_exchange (enum machine_mode mode, tree exp, rtx target)
enum memmodel model;
model = get_memmodel (CALL_EXPR_ARG (exp, 2));
if (model == MEMMODEL_CONSUME)
if ((model & MEMMODEL_MASK) == MEMMODEL_CONSUME)
{
error ("invalid memory model for %<__atomic_exchange%>");
return NULL_RTX;
@ -5402,7 +5415,8 @@ expand_builtin_atomic_compare_exchange (enum machine_mode mode, tree exp,
success = get_memmodel (CALL_EXPR_ARG (exp, 4));
failure = get_memmodel (CALL_EXPR_ARG (exp, 5));
if (failure == MEMMODEL_RELEASE || failure == MEMMODEL_ACQ_REL)
if ((failure & MEMMODEL_MASK) == MEMMODEL_RELEASE
|| (failure & MEMMODEL_MASK) == MEMMODEL_ACQ_REL)
{
error ("invalid failure memory model for %<__atomic_compare_exchange%>");
return NULL_RTX;
@ -5453,8 +5467,8 @@ expand_builtin_atomic_load (enum machine_mode mode, tree exp, rtx target)
enum memmodel model;
model = get_memmodel (CALL_EXPR_ARG (exp, 1));
if (model == MEMMODEL_RELEASE
|| model == MEMMODEL_ACQ_REL)
if ((model & MEMMODEL_MASK) == MEMMODEL_RELEASE
|| (model & MEMMODEL_MASK) == MEMMODEL_ACQ_REL)
{
error ("invalid memory model for %<__atomic_load%>");
return NULL_RTX;
@ -5482,9 +5496,9 @@ expand_builtin_atomic_store (enum machine_mode mode, tree exp)
enum memmodel model;
model = get_memmodel (CALL_EXPR_ARG (exp, 2));
if (model != MEMMODEL_RELAXED
&& model != MEMMODEL_SEQ_CST
&& model != MEMMODEL_RELEASE)
if ((model & MEMMODEL_MASK) != MEMMODEL_RELAXED
&& (model & MEMMODEL_MASK) != MEMMODEL_SEQ_CST
&& (model & MEMMODEL_MASK) != MEMMODEL_RELEASE)
{
error ("invalid memory model for %<__atomic_store%>");
return NULL_RTX;
@ -5590,7 +5604,8 @@ expand_builtin_atomic_clear (tree exp)
mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
model = get_memmodel (CALL_EXPR_ARG (exp, 1));
if (model == MEMMODEL_ACQUIRE || model == MEMMODEL_ACQ_REL)
if ((model & MEMMODEL_MASK) == MEMMODEL_ACQUIRE
|| (model & MEMMODEL_MASK) == MEMMODEL_ACQ_REL)
{
error ("invalid memory model for %<__atomic_store%>");
return const0_rtx;

View file

@ -350,6 +350,11 @@
#undef HAVE_AS_IX86_SAHF
#endif
/* Define if your assembler supports HLE prefixes. */
#ifndef USED_FOR_TARGET
#undef HAVE_AS_IX86_HLE
#endif
/* Define if your assembler supports the swap suffix. */
#ifndef USED_FOR_TARGET

View file

@ -66,6 +66,7 @@
/* Extended Features (%eax == 7) */
#define bit_FSGSBASE (1 << 0)
#define bit_BMI (1 << 3)
#define bit_HLE (1 << 4)
#define bit_AVX2 (1 << 5)
#define bit_BMI2 (1 << 8)
#define bit_RTM (1 << 11)

View file

@ -397,6 +397,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0;
unsigned int has_fma = 0, has_fma4 = 0, has_xop = 0;
unsigned int has_bmi = 0, has_bmi2 = 0, has_tbm = 0, has_lzcnt = 0;
unsigned int has_hle = 0;
bool arch;
@ -456,6 +457,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
__cpuid_count (7, 0, eax, ebx, ecx, edx);
has_bmi = ebx & bit_BMI;
has_hle = ebx & bit_HLE;
has_avx2 = ebx & bit_AVX2;
has_bmi2 = ebx & bit_BMI2;
}
@ -726,10 +728,12 @@ const char *host_detect_local_cpu (int argc, const char **argv)
const char *sse4_2 = has_sse4_2 ? " -msse4.2" : " -mno-sse4.2";
const char *sse4_1 = has_sse4_1 ? " -msse4.1" : " -mno-sse4.1";
const char *lzcnt = has_lzcnt ? " -mlzcnt" : " -mno-lzcnt";
const char *hle = has_hle ? " -mhle" : "-mno-hle";
options = concat (options, cx16, sahf, movbe, ase, pclmul,
popcnt, abm, lwp, fma, fma4, xop, bmi, bmi2,
tbm, avx, avx2, sse4_2, sse4_1, lzcnt, NULL);
tbm, avx, avx2, sse4_2, sse4_1, lzcnt,
hle, NULL);
}
done:

View file

@ -54,6 +54,7 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
size_t tune_len = strlen (ix86_tune_string);
int last_arch_char = ix86_arch_string[arch_len - 1];
int last_tune_char = ix86_tune_string[tune_len - 1];
char hle_macro[64];
/* Built-ins based on -march=. */
switch (arch)
@ -293,6 +294,12 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__SSE_MATH__");
if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE2))
def_or_undef (parse_in, "__SSE2_MATH__");
sprintf (hle_macro, "__ATOMIC_HLE_ACQUIRE=%d", IX86_HLE_ACQUIRE);
def_or_undef (parse_in, hle_macro);
sprintf (hle_macro, "__ATOMIC_HLE_RELEASE=%d", IX86_HLE_RELEASE);
def_or_undef (parse_in, hle_macro);
}

View file

@ -2679,6 +2679,7 @@ ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
{ "-mbmi", OPTION_MASK_ISA_BMI },
{ "-mbmi2", OPTION_MASK_ISA_BMI2 },
{ "-mlzcnt", OPTION_MASK_ISA_LZCNT },
{ "-mhle", OPTION_MASK_ISA_HLE },
{ "-mtbm", OPTION_MASK_ISA_TBM },
{ "-mpopcnt", OPTION_MASK_ISA_POPCNT },
{ "-mmovbe", OPTION_MASK_ISA_MOVBE },
@ -2954,6 +2955,7 @@ ix86_option_override_internal (bool main_args_p)
#define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
#define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
#define PTA_RTM (HOST_WIDE_INT_1 << 32)
#define PTA_HLE (HOST_WIDE_INT_1 << 33)
/* if this reaches 64, need to widen struct pta flags below */
static struct pta
@ -3012,7 +3014,7 @@ ix86_option_override_internal (bool main_args_p)
| PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_AVX2
| PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
| PTA_RDRND | PTA_F16C | PTA_BMI | PTA_BMI2 | PTA_LZCNT
| PTA_FMA | PTA_MOVBE | PTA_RTM},
| PTA_FMA | PTA_MOVBE | PTA_RTM | PTA_HLE},
{"atom", PROCESSOR_ATOM, CPU_ATOM,
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
@ -3075,9 +3077,10 @@ ix86_option_override_internal (bool main_args_p)
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
{"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
0 /* flags are only used for -march switch. */ },
PTA_HLE /* flags are only used for -march switch. */ },
{"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
PTA_64BIT /* flags are only used for -march switch. */ },
PTA_64BIT
| PTA_HLE /* flags are only used for -march switch. */ },
};
/* -mrecip options. */
@ -3430,6 +3433,9 @@ ix86_option_override_internal (bool main_args_p)
if (processor_alias_table[i].flags & PTA_RTM
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
ix86_isa_flags |= OPTION_MASK_ISA_RTM;
if (processor_alias_table[i].flags & PTA_HLE
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
ix86_isa_flags |= OPTION_MASK_ISA_HLE;
if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
x86_prefetch_sse = true;
@ -4251,6 +4257,7 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
IX86_ATTR_ISA ("f16c", OPT_mf16c),
IX86_ATTR_ISA ("rtm", OPT_mrtm),
IX86_ATTR_ISA ("hle", OPT_mhle),
/* enum options */
IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
@ -14340,6 +14347,24 @@ ix86_print_operand (FILE *file, rtx x, int code)
x = adjust_address_nv (x, DImode, 8);
break;
case 'K':
gcc_assert (CONST_INT_P (x));
if (INTVAL (x) & IX86_HLE_ACQUIRE)
#ifdef HAVE_AS_IX86_HLE
fputs ("xacquire ", file);
#else
fputs ("\n" ASM_BYTE "0xf2\n\t", file);
#endif
else if (INTVAL (x) & IX86_HLE_RELEASE)
#ifdef HAVE_AS_IX86_HLE
fputs ("xrelease ", file);
#else
fputs ("\n" ASM_BYTE "0xf3\n\t", file);
#endif
/* We do not want to print value of the operand. */
return;
case '+':
{
rtx x;
@ -39302,6 +39327,38 @@ ix86_autovectorize_vector_sizes (void)
return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
}
/* Validate target specific memory model bits in VAL. */
static unsigned HOST_WIDE_INT
ix86_memmodel_check (unsigned HOST_WIDE_INT val)
{
unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
unsigned HOST_WIDE_INT strong;
if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
|MEMMODEL_MASK)
|| ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
{
warning (OPT_Winvalid_memory_model,
"Unknown architecture specific memory model");
return MEMMODEL_SEQ_CST;
}
strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
{
warning (OPT_Winvalid_memory_model,
"HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
}
if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
{
warning (OPT_Winvalid_memory_model,
"HLE_RELEASE not used with RELEASE or stronger memory model");
return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
}
return val;
}
/* Initialize the GCC target structure. */
#undef TARGET_RETURN_IN_MEMORY
#define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
@ -39401,6 +39458,9 @@ ix86_autovectorize_vector_sizes (void)
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
#undef TARGET_MEMMODEL_CHECK
#define TARGET_MEMMODEL_CHECK ix86_memmodel_check
#ifdef HAVE_AS_TLS
#undef TARGET_HAVE_TLS
#define TARGET_HAVE_TLS true

View file

@ -75,6 +75,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define TARGET_RDRND OPTION_ISA_RDRND
#define TARGET_F16C OPTION_ISA_F16C
#define TARGET_RTM OPTION_ISA_RTM
#define TARGET_HLE OPTION_ISA_HLE
#define TARGET_LP64 OPTION_ABI_64
#define TARGET_X32 OPTION_ABI_X32
@ -2344,6 +2345,9 @@ extern void debug_dispatch_window (int);
#define TARGET_RECIP_VEC_DIV ((recip_mask & RECIP_MASK_VEC_DIV) != 0)
#define TARGET_RECIP_VEC_SQRT ((recip_mask & RECIP_MASK_VEC_SQRT) != 0)
#define IX86_HLE_ACQUIRE (1 << 16)
#define IX86_HLE_RELEASE (1 << 17)
/*
Local variables:
version-control: t

View file

@ -58,6 +58,7 @@
;; X -- don't print any sort of PIC '@' suffix for a symbol.
;; & -- print some in-use local-dynamic symbol name.
;; H -- print a memory address offset by 8; used for sse high-parts
;; K -- print HLE lock prefix
;; Y -- print condition for XOP pcom* instruction.
;; + -- print a branch hint as 'cs' or 'ds' prefix
;; ; -- print a semicolon (after prefixes due to bug in older gas).

View file

@ -528,6 +528,10 @@ mlzcnt
Target Report Mask(ISA_LZCNT) Var(ix86_isa_flags) Save
Support LZCNT built-in function and code generation
mhle
Target Report Mask(ISA_HLE) Var(ix86_isa_flags) Save
Support Hardware Lock Elision prefixes
mtbm
Target Report Mask(ISA_TBM) Var(ix86_isa_flags) Save
Support TBM built-in functions and code generation

View file

@ -315,8 +315,9 @@
(match_operand:SI 7 "const_int_operand")] ;; failure model
"TARGET_CMPXCHG"
{
emit_insn (gen_atomic_compare_and_swap_single<mode>
(operands[1], operands[2], operands[3], operands[4]));
emit_insn
(gen_atomic_compare_and_swap_single<mode>
(operands[1], operands[2], operands[3], operands[4], operands[6]));
ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
const0_rtx);
DONE;
@ -344,8 +345,9 @@
{
if (<MODE>mode == DImode && TARGET_64BIT)
{
emit_insn (gen_atomic_compare_and_swap_singledi
(operands[1], operands[2], operands[3], operands[4]));
emit_insn
(gen_atomic_compare_and_swap_singledi
(operands[1], operands[2], operands[3], operands[4], operands[6]));
}
else
{
@ -370,7 +372,7 @@
mem = replace_equiv_address (mem, force_reg (Pmode, XEXP (mem, 0)));
emit_insn (gen_atomic_compare_and_swap_double<mode>
(lo_o, hi_o, mem, lo_e, hi_e, lo_n, hi_n));
(lo_o, hi_o, mem, lo_e, hi_e, lo_n, hi_n, operands[6]));
}
ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
const0_rtx);
@ -382,14 +384,15 @@
(unspec_volatile:SWI
[(match_operand:SWI 1 "memory_operand" "+m")
(match_operand:SWI 2 "register_operand" "0")
(match_operand:SWI 3 "register_operand" "<r>")]
(match_operand:SWI 3 "register_operand" "<r>")
(match_operand:SI 4 "const_int_operand")]
UNSPECV_CMPXCHG_1))
(set (match_dup 1)
(unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG_2))
(set (reg:CCZ FLAGS_REG)
(unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG_3))]
"TARGET_CMPXCHG"
"lock{%;} cmpxchg{<imodesuffix>}\t{%3, %1|%1, %3}")
"lock{%;} %K4cmpxchg{<imodesuffix>}\t{%3, %1|%1, %3}")
;; For double-word compare and swap, we are obliged to play tricks with
;; the input newval (op5:op6) because the Intel register numbering does
@ -403,7 +406,8 @@
(match_operand:<DCASHMODE> 3 "register_operand" "0")
(match_operand:<DCASHMODE> 4 "register_operand" "1")
(match_operand:<DCASHMODE> 5 "register_operand" "b")
(match_operand:<DCASHMODE> 6 "register_operand" "c")]
(match_operand:<DCASHMODE> 6 "register_operand" "c")
(match_operand:SI 7 "const_int_operand")]
UNSPECV_CMPXCHG_1))
(set (match_operand:<DCASHMODE> 1 "register_operand" "=d")
(unspec_volatile:<DCASHMODE> [(const_int 0)] UNSPECV_CMPXCHG_2))
@ -412,7 +416,7 @@
(set (reg:CCZ FLAGS_REG)
(unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG_4))]
""
"lock{%;} cmpxchg<doublemodesuffix>b\t%2")
"lock{%;} %K7cmpxchg<doublemodesuffix>b\t%2")
;; Theoretically we'd like to use constraint "r" (any reg) for op5,
;; but that includes ecx. If op5 and op6 are the same (like when
@ -430,7 +434,8 @@
(match_operand:SI 3 "register_operand" "0")
(match_operand:SI 4 "register_operand" "1")
(match_operand:SI 5 "register_operand" "SD")
(match_operand:SI 6 "register_operand" "c")]
(match_operand:SI 6 "register_operand" "c")
(match_operand:SI 7 "const_int_operand")]
UNSPECV_CMPXCHG_1))
(set (match_operand:SI 1 "register_operand" "=d")
(unspec_volatile:SI [(const_int 0)] UNSPECV_CMPXCHG_2))
@ -439,7 +444,7 @@
(set (reg:CCZ FLAGS_REG)
(unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG_4))]
"!TARGET_64BIT && TARGET_CMPXCHG8B && flag_pic"
"xchg{l}\t%%ebx, %5\;lock{%;} cmpxchg8b\t%2\;xchg{l}\t%%ebx, %5")
"xchg{l}\t%%ebx, %5\;lock{%;} %K7cmpxchg8b\t%2\;xchg{l}\t%%ebx, %5")
;; For operand 2 nonmemory_operand predicate is used instead of
;; register_operand to allow combiner to better optimize atomic
@ -455,7 +460,7 @@
(match_operand:SWI 2 "nonmemory_operand" "0")))
(clobber (reg:CC FLAGS_REG))]
"TARGET_XADD"
"lock{%;} xadd{<imodesuffix>}\t{%0, %1|%1, %0}")
"lock{%;} %K3xadd{<imodesuffix>}\t{%0, %1|%1, %0}")
;; This peephole2 and following insn optimize
;; __sync_fetch_and_add (x, -N) == N into just lock {add,sub,inc,dec}
@ -526,7 +531,7 @@
(set (match_dup 1)
(match_operand:SWI 2 "register_operand" "0"))] ;; input
""
"xchg{<imodesuffix>}\t{%1, %0|%0, %1}")
"%K3xchg{<imodesuffix>}\t{%1, %0|%0, %1}")
(define_insn "atomic_add<mode>"
[(set (match_operand:SWI 0 "memory_operand" "+m")
@ -541,15 +546,15 @@
if (TARGET_USE_INCDEC)
{
if (operands[1] == const1_rtx)
return "lock{%;} inc{<imodesuffix>}\t%0";
return "lock{%;} %K2inc{<imodesuffix>}\t%0";
if (operands[1] == constm1_rtx)
return "lock{%;} dec{<imodesuffix>}\t%0";
return "lock{%;} %K2dec{<imodesuffix>}\t%0";
}
if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
return "lock{%;} sub{<imodesuffix>}\t{%1, %0|%0, %1}";
return "lock{%;} %K2sub{<imodesuffix>}\t{%1, %0|%0, %1}";
return "lock{%;} add{<imodesuffix>}\t{%1, %0|%0, %1}";
return "lock{%;} %K2add{<imodesuffix>}\t{%1, %0|%0, %1}";
})
(define_insn "atomic_sub<mode>"
@ -565,15 +570,15 @@
if (TARGET_USE_INCDEC)
{
if (operands[1] == const1_rtx)
return "lock{%;} dec{<imodesuffix>}\t%0";
return "lock{%;} %K2dec{<imodesuffix>}\t%0";
if (operands[1] == constm1_rtx)
return "lock{%;} inc{<imodesuffix>}\t%0";
return "lock{%;} %K2inc{<imodesuffix>}\t%0";
}
if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
return "lock{%;} add{<imodesuffix>}\t{%1, %0|%0, %1}";
return "lock{%;} %K2add{<imodesuffix>}\t{%1, %0|%0, %1}";
return "lock{%;} sub{<imodesuffix>}\t{%1, %0|%0, %1}";
return "lock{%;} %K2sub{<imodesuffix>}\t{%1, %0|%0, %1}";
})
(define_insn "atomic_<logic><mode>"
@ -585,4 +590,4 @@
UNSPECV_LOCK))
(clobber (reg:CC FLAGS_REG))]
""
"lock{%;} <logic>{<imodesuffix>}\t{%1, %0|%0, %1}")
"lock{%;} %K2<logic>{<imodesuffix>}\t{%1, %0|%0, %1}")

33
gcc/configure vendored
View file

@ -24626,6 +24626,39 @@ if test $gcc_cv_as_ix86_sahf = yes; then
$as_echo "#define HAVE_AS_IX86_SAHF 1" >>confdefs.h
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for hle prefixes" >&5
$as_echo_n "checking assembler for hle prefixes... " >&6; }
if test "${gcc_cv_as_ix86_hle+set}" = set; then :
$as_echo_n "(cached) " >&6
else
gcc_cv_as_ix86_hle=no
if test x$gcc_cv_as != x; then
$as_echo '.code64
lock xacquire cmpxchg %esi, (%rcx)
' > conftest.s
if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s >&5'
{ { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
(eval $ac_try) 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; }
then
gcc_cv_as_ix86_hle=yes
else
echo "configure: failed program was" >&5
cat conftest.s >&5
fi
rm -f conftest.o conftest.s
fi
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_ix86_hle" >&5
$as_echo "$gcc_cv_as_ix86_hle" >&6; }
if test $gcc_cv_as_ix86_hle = yes; then
$as_echo "#define HAVE_AS_IX86_HLE 1" >>confdefs.h
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for swap suffix" >&5

View file

@ -3597,6 +3597,14 @@ foo: nop
[AC_DEFINE(HAVE_AS_IX86_SAHF, 1,
[Define if your assembler supports the sahf mnemonic in 64bit mode.])])
gcc_GAS_CHECK_FEATURE([hle prefixes],
gcc_cv_as_ix86_hle,,,
[.code64
lock xacquire cmpxchg %esi, (%rcx)
],,
[AC_DEFINE(HAVE_AS_IX86_HLE, 1,
[Define if your assembler supports HLE prefixes.])])
gcc_GAS_CHECK_FEATURE([swap suffix],
gcc_cv_as_ix86_swap,,,
[movl.s %esp, %ebp],,

View file

@ -194,5 +194,8 @@ enum memmodel
MEMMODEL_LAST = 6
};
/* Suppose that higher bits are target dependant. */
#define MEMMODEL_MASK ((1<<16)-1)
#endif /* coretypes.h */

View file

@ -7093,7 +7093,8 @@ to the same names in the C++11 standard. Refer there or to the
atomic synchronization} for more detailed definitions. These memory
models integrate both barriers to code motion as well as synchronization
requirements with other threads. These are listed in approximately
ascending order of strength.
ascending order of strength. It is also possible to use target specific
flags for memory model flags, like Hardware Lock Elision.
@table @code
@item __ATOMIC_RELAXED

View file

@ -11362,6 +11362,11 @@ MIPS, where add-immediate takes a 16-bit signed value,
@code{TARGET_CONST_ANCHOR} is set to @samp{0x8000}. The default value
is zero, which disables this optimization. @end deftypevr
@deftypefn {Target Hook} {unsigned HOST_WIDE_INT} TARGET_MEMMODEL_CHECK (unsigned HOST_WIDE_INT @var{val})
Validate target specific memory model mask bits. When NULL no target specific
memory model bits are allowed.
@end deftypefn
@deftypevr {Target Hook} {unsigned char} TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
This value should be set if the result written by @code{atomic_test_and_set} is not exactly 1, i.e. the @code{bool} @code{true}.
@end deftypevr

View file

@ -11242,4 +11242,9 @@ MIPS, where add-immediate takes a 16-bit signed value,
@code{TARGET_CONST_ANCHOR} is set to @samp{0x8000}. The default value
is zero, which disables this optimization. @end deftypevr
@hook TARGET_MEMMODEL_CHECK
Validate target specific memory model mask bits. When NULL no target specific
memory model bits are allowed.
@end deftypefn
@hook TARGET_ATOMIC_TEST_AND_SET_TRUEVAL

View file

@ -1940,6 +1940,12 @@ DEFHOOKPOD
"",
unsigned HOST_WIDE_INT, 0)
/* Defines, which target-dependent bits (upper 16) are used by port */
DEFHOOK
(memmodel_check,
"",
unsigned HOST_WIDE_INT, (unsigned HOST_WIDE_INT val), NULL)
/* Functions relating to calls - argument passing, returns, etc. */
/* Members of struct call have no special macro prefix. */
HOOK_VECTOR (TARGET_CALLS, calls)

View file

@ -1,3 +1,22 @@
2012-05-02 Kirill Yukhin <kirill.yukhin@intel.com>
* gcc.target/i386/hle-cmpxchg-acq-1.c: New.
* gcc.target/i386/hle-cmpxchg-rel-1.c: Ditto.
* gcc.target/i386/hle-add-acq-1.c: Ditto.
* gcc.target/i386/hle-add-rel-1.c: Ditto.
* gcc.target/i386/hle-and-acq-1.c: Ditto.
* gcc.target/i386/hle-and-rel-1.c: Ditto.
* gcc.target/i386/hle-or-acq-1.c: Ditto.
* gcc.target/i386/hle-or-rel-1.c: Ditto.
* gcc.target/i386/hle-sub-acq-1.c: Ditto.
* gcc.target/i386/hle-sub-rel-1.c: Ditto.
* gcc.target/i386/hle-xadd-acq-1.c: Ditto.
* gcc.target/i386/hle-xadd-rel-1.c: Ditto.
* gcc.target/i386/hle-xchg-acq-1.c: Ditto.
* gcc.target/i386/hle-xchg-rel-1.c: Ditto.
* gcc.target/i386/hle-xor-acq-1.c: Ditto.
* gcc.target/i386/hle-xor-rel-1.c: Ditto.
2012-05-02 Steven Bosscher <steven@gcc.gnu.org>
PR middle-end/53153

View file

@ -0,0 +1,9 @@
/* { dg-do compile } */
/* { dg-options "-mhle" } */
/* { dg-final { scan-assembler "lock\[ \n\t\]+\(xacquire\|\.byte\[ \t\]+0xf2\)\[ \t\n\]+add" } } */
void
hle_add (int *p, int v)
{
__atomic_fetch_add (p, v, __ATOMIC_ACQUIRE | __ATOMIC_HLE_ACQUIRE);
}

View file

@ -0,0 +1,9 @@
/* { dg-do compile } */
/* { dg-options "-mhle" } */
/* { dg-final { scan-assembler "lock\[ \n\t\]+\(xrelease\|\.byte\[ \t\]+0xf2\)\[ \t\n\]+add" } } */
void
hle_add (int *p, int v)
{
__atomic_fetch_add (p, v, __ATOMIC_RELEASE | __ATOMIC_HLE_RELEASE);
}

View file

@ -0,0 +1,9 @@
/* { dg-do compile } */
/* { dg-options "-mhle" } */
/* { dg-final { scan-assembler "lock\[ \n\t\]+\(xacquire\|\.byte\[ \t\]+0xf2\)\[ \t\n\]+and" } } */
void
hle_and (int *p, int v)
{
__atomic_fetch_and (p, v, __ATOMIC_ACQUIRE | __ATOMIC_HLE_ACQUIRE);
}

View file

@ -0,0 +1,9 @@
/* { dg-do compile } */
/* { dg-options "-mhle" } */
/* { dg-final { scan-assembler "lock\[ \n\t\]+\(xrelease\|\.byte\[ \t\]+0xf3\)\[ \t\n\]+and" } } */
void
hle_and (int *p, int v)
{
__atomic_fetch_and (p, v, __ATOMIC_RELEASE | __ATOMIC_HLE_RELEASE);
}

View file

@ -0,0 +1,9 @@
/* { dg-do compile } */
/* { dg-options "-mhle" } */
/* { dg-final { scan-assembler "lock\[ \n\t\]+\(xacquire\|\.byte\[ \t\]+0xf2\)\[ \t\n\]+cmpxchg" } } */
int
hle_cmpxchg (int *p, int oldv, int newv)
{
return __atomic_compare_exchange_n (p, &oldv, newv, 0, __ATOMIC_ACQUIRE | __ATOMIC_HLE_ACQUIRE, __ATOMIC_ACQUIRE);
}

View file

@ -0,0 +1,9 @@
/* { dg-do compile } */
/* { dg-options "-mhle" } */
/* { dg-final { scan-assembler "lock\[ \n\t\]+\(xrelease\|\.byte\[ \t\]+0xf3\)\[ \t\n\]+cmpxchg" } } */
int
hle_cmpxchg (int *p, int oldv, int newv)
{
return __atomic_compare_exchange_n (p, &oldv, newv, 0, __ATOMIC_RELEASE | __ATOMIC_HLE_RELEASE, __ATOMIC_ACQUIRE);
}

View file

@ -0,0 +1,9 @@
/* { dg-do compile } */
/* { dg-options "-mhle" } */
/* { dg-final { scan-assembler "lock\[ \n\t\]+\(xacquire\|\.byte\[ \t\]+0xf2\)\[ \t\n\]+or" } } */
void
hle_or (int *p, int v)
{
__atomic_or_fetch (p, 1, __ATOMIC_ACQUIRE | __ATOMIC_HLE_ACQUIRE);
}

View file

@ -0,0 +1,9 @@
/* { dg-do compile } */
/* { dg-options "-mhle" } */
/* { dg-final { scan-assembler "lock\[ \n\t\]+\(xrelease\|\.byte\[ \t\]+0xf3\)\[ \t\n\]+or" } } */
void
hle_xor (int *p, int v)
{
__atomic_fetch_or (p, v, __ATOMIC_RELEASE | __ATOMIC_HLE_RELEASE);
}

View file

@ -0,0 +1,9 @@
/* { dg-do compile } */
/* { dg-options "-mhle" } */
/* { dg-final { scan-assembler "lock\[ \n\t\]+\(xacquire\|\.byte\[ \t\]+0xf2\)\[ \t\n\]+sub" } } */
void
hle_sub (int *p, int v)
{
__atomic_fetch_sub (p, v, __ATOMIC_ACQUIRE | __ATOMIC_HLE_ACQUIRE);
}

View file

@ -0,0 +1,9 @@
/* { dg-do compile } */
/* { dg-options "-mhle" } */
/* { dg-final { scan-assembler "lock\[ \n\t\]+\(xrelease\|\.byte\[ \t\]+0xf2\)\[ \t\n\]+sub" } } */
void
hle_sub (int *p, int v)
{
__atomic_fetch_sub (p, v, __ATOMIC_RELEASE | __ATOMIC_HLE_RELEASE);
}

View file

@ -0,0 +1,9 @@
/* { dg-do compile } */
/* { dg-options "-mhle" } */
/* { dg-final { scan-assembler "lock\[ \n\t\]+\(xacquire\|\.byte\[ \t\]+0xf2\)\[ \t\n\]+xadd" } } */
int
hle_xadd (int *p, int v)
{
return __atomic_fetch_add (p, v, __ATOMIC_ACQUIRE | __ATOMIC_HLE_ACQUIRE);
}

View file

@ -0,0 +1,9 @@
/* { dg-do compile } */
/* { dg-options "-mhle" } */
/* { dg-final { scan-assembler "lock\[ \n\t\]+\(xrelease\|\.byte\[ \t\]+0xf2\)\[ \t\n\]+xadd" } } */
int
hle_xadd (int *p, int v)
{
return __atomic_fetch_add (p, v, __ATOMIC_RELEASE | __ATOMIC_HLE_RELEASE);
}

View file

@ -0,0 +1,9 @@
/* { dg-do compile } */
/* { dg-options "-mhle" } */
/* { dg-final { scan-assembler "\[ \n\t\]+\(xacquire\|\.byte\[ \t\]+0xf2\)\[ \t\n\]+xchg" } } */
int
hle_xchg (int *p, int v)
{
return __atomic_exchange_n (p, v, __ATOMIC_ACQUIRE | __ATOMIC_HLE_ACQUIRE);
}

View file

@ -0,0 +1,9 @@
/* { dg-do compile } */
/* { dg-options "-mhle" } */
/* { dg-final { scan-assembler "\[ \n\t\]+\(xrelease\|\.byte\[ \t\]+0xf2\)\[ \t\n\]+xchg" } } */
int
hle_xchg (int *p, int v)
{
return __atomic_exchange_n (p, v, __ATOMIC_RELEASE | __ATOMIC_HLE_RELEASE);
}

View file

@ -0,0 +1,9 @@
/* { dg-do compile } */
/* { dg-options "-mhle" } */
/* { dg-final { scan-assembler "lock\[ \n\t\]+\(xacquire\|\.byte\[ \t\]+0xf2\)\[ \t\n\]+xor" } } */
void
hle_xor (int *p, int v)
{
__atomic_fetch_xor (p, v, __ATOMIC_ACQUIRE | __ATOMIC_HLE_ACQUIRE);
}

View file

@ -0,0 +1,9 @@
/* { dg-do compile } */
/* { dg-options "-mhle" } */
/* { dg-final { scan-assembler "lock\[ \n\t\]+\(xrelease\|\.byte\[ \t\]+0xf3\)\[ \t\n\]+xor" } } */
void
hle_xor (int *p, int v)
{
__atomic_fetch_xor (p, v, __ATOMIC_RELEASE | __ATOMIC_HLE_RELEASE);
}