Add custom HTM fast path for RTM on x86_64.
* libitm_i.h (gtm_thread): Assign an asm name to serial_lock. (htm_fastpath): Assign an asm name. * libitm.h (_ITM_codeProperties): Add non-ABI flags used by custom HTM fast paths. (_ITM_actions): Likewise. * config/x86/target.h (HTM_CUSTOM_FASTPATH): Enable custom fastpath on x86_64. * config/x86/sjlj.S (_ITM_beginTransaction): Add custom HTM fast path. * config/posix/rwlock.h (gtm_rwlock): Update comments. Move summary field to the start of the structure. * config/linux/rwlock.h (gtm_rwlock): Update comments. * beginend.cc (gtm_thread::begin_transaction): Add retry policy handling for custom HTM fast paths. From-SVN: r202101
This commit is contained in:
parent
8595a07d8d
commit
bec9ec3fc1
8 changed files with 198 additions and 31 deletions
|
@ -1,3 +1,19 @@
|
|||
2013-08-30 Torvald Riegel <triegel@redhat.com>
|
||||
|
||||
* libitm_i.h (gtm_thread): Assign an asm name to serial_lock.
|
||||
(htm_fastpath): Assign an asm name.
|
||||
* libitm.h (_ITM_codeProperties): Add non-ABI flags used by custom
|
||||
HTM fast paths.
|
||||
(_ITM_actions): Likewise.
|
||||
* config/x86/target.h (HTM_CUSTOM_FASTPATH): Enable custom fastpath on
|
||||
x86_64.
|
||||
* config/x86/sjlj.S (_ITM_beginTransaction): Add custom HTM fast path.
|
||||
* config/posix/rwlock.h (gtm_rwlock): Update comments. Move summary
|
||||
field to the start of the structure.
|
||||
* config/linux/rwlock.h (gtm_rwlock): Update comments.
|
||||
* beginend.cc (gtm_thread::begin_transaction): Add retry policy
|
||||
handling for custom HTM fast paths.
|
||||
|
||||
2013-08-14 Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
|
||||
Revert:
|
||||
2013-08-02 Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
|
||||
|
|
|
@ -165,7 +165,7 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb)
|
|||
if (unlikely(prop & pr_undoLogCode))
|
||||
GTM_fatal("pr_undoLogCode not supported");
|
||||
|
||||
#if defined(USE_HTM_FASTPATH) && !defined(HTM_CUSTOM_FASTPATH)
|
||||
#ifdef USE_HTM_FASTPATH
|
||||
// HTM fastpath. Only chosen in the absence of transaction_cancel to allow
|
||||
// using an uninstrumented code path.
|
||||
// The fastpath is enabled only by dispatch_htm's method group, which uses
|
||||
|
@ -187,6 +187,7 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb)
|
|||
// indeed in serial mode, and HW transactions should never need serial mode
|
||||
// for any internal changes (e.g., they never abort visibly to the STM code
|
||||
// and thus do not trigger the standard retry handling).
|
||||
#ifndef HTM_CUSTOM_FASTPATH
|
||||
if (likely(htm_fastpath && (prop & pr_hasNoAbort)))
|
||||
{
|
||||
for (uint32_t t = htm_fastpath; t; t--)
|
||||
|
@ -237,6 +238,49 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb)
|
|||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
// If we have a custom HTM fastpath in ITM_beginTransaction, we implement
|
||||
// just the retry policy here. We communicate with the custom fastpath
|
||||
// through additional property bits and return codes, and either transfer
|
||||
// control back to the custom fastpath or run the fallback mechanism. The
|
||||
// fastpath synchronization algorithm itself is the same.
|
||||
// pr_HTMRetryableAbort states that a HW transaction started by the custom
|
||||
// HTM fastpath aborted, and that we thus have to decide whether to retry
|
||||
// the fastpath (returning a_tryHTMFastPath) or just proceed with the
|
||||
// fallback method.
|
||||
if (likely(htm_fastpath && (prop & pr_HTMRetryableAbort)))
|
||||
{
|
||||
tx = gtm_thr();
|
||||
if (unlikely(tx == NULL))
|
||||
{
|
||||
// See below.
|
||||
tx = new gtm_thread();
|
||||
set_gtm_thr(tx);
|
||||
}
|
||||
// If this is the first abort, reset the retry count. We abuse
|
||||
// restart_total for the retry count, which is fine because our only
|
||||
// other fallback will use serial transactions, which don't use
|
||||
// restart_total but will reset it when committing.
|
||||
if (!(prop & pr_HTMRetriedAfterAbort))
|
||||
tx->restart_total = htm_fastpath;
|
||||
|
||||
if (--tx->restart_total > 0)
|
||||
{
|
||||
// Wait until any concurrent serial-mode transactions have finished.
|
||||
// Essentially the same code as above.
|
||||
if (serial_lock.is_write_locked())
|
||||
{
|
||||
if (tx->nesting > 0)
|
||||
goto stop_custom_htm_fastpath;
|
||||
serial_lock.read_lock(tx);
|
||||
serial_lock.read_unlock(tx);
|
||||
}
|
||||
// Let ITM_beginTransaction retry the custom HTM fastpath.
|
||||
return a_tryHTMFastPath;
|
||||
}
|
||||
}
|
||||
stop_custom_htm_fastpath:
|
||||
#endif
|
||||
#endif
|
||||
|
||||
tx = gtm_thr();
|
||||
|
|
|
@ -39,6 +39,11 @@ struct gtm_thread;
|
|||
//
|
||||
// In this implementation, writers are given highest priority access but
|
||||
// read-to-write upgrades do not have a higher priority than writers.
|
||||
//
|
||||
// Do not change the layout of this class; it must remain a POD type with
|
||||
// standard layout, and the WRITERS field must be first (i.e., so the
|
||||
// assembler code can assume that its address is equal to the address of the
|
||||
// respective instance of the class).
|
||||
|
||||
class gtm_rwlock
|
||||
{
|
||||
|
|
|
@ -44,19 +44,25 @@ struct gtm_thread;
|
|||
//
|
||||
// In this implementation, writers are given highest priority access but
|
||||
// read-to-write upgrades do not have a higher priority than writers.
|
||||
//
|
||||
// Do not change the layout of this class; it must remain a POD type with
|
||||
// standard layout, and the SUMMARY field must be first (i.e., so the
|
||||
// assembler code can assume that its address is equal to the address of the
|
||||
// respective instance of the class).
|
||||
|
||||
class gtm_rwlock
|
||||
{
|
||||
pthread_mutex_t mutex; // Held if manipulating any field.
|
||||
pthread_cond_t c_readers; // Readers wait here
|
||||
pthread_cond_t c_writers; // Writers wait here for writers
|
||||
pthread_cond_t c_confirmed_writers; // Writers wait here for readers
|
||||
|
||||
static const unsigned a_writer = 1; // An active writer.
|
||||
static const unsigned w_writer = 2; // The w_writers field != 0
|
||||
static const unsigned w_reader = 4; // The w_readers field != 0
|
||||
|
||||
std::atomic<unsigned int> summary; // Bitmask of the above.
|
||||
|
||||
pthread_mutex_t mutex; // Held if manipulating any field.
|
||||
pthread_cond_t c_readers; // Readers wait here
|
||||
pthread_cond_t c_writers; // Writers wait here for writers
|
||||
pthread_cond_t c_confirmed_writers; // Writers wait here for readers
|
||||
|
||||
unsigned int a_readers; // Nr active readers as observed by a writer
|
||||
unsigned int w_readers; // Nr waiting readers
|
||||
unsigned int w_writers; // Nr waiting writers
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
|
||||
|
||||
#include "asmcfi.h"
|
||||
#include "config.h"
|
||||
|
||||
#define CONCAT1(a, b) CONCAT2(a, b)
|
||||
#define CONCAT2(a, b) a ## b
|
||||
|
@ -52,6 +53,19 @@
|
|||
# endif
|
||||
#endif
|
||||
|
||||
/* These are duplicates of the canonical definitions in libitm.h. Note that
|
||||
the code relies on pr_uninstrumentedCode == a_runUninstrumentedCode. */
|
||||
#define pr_uninstrumentedCode 0x02
|
||||
#define pr_hasNoAbort 0x08
|
||||
#define pr_HTMRetryableAbort 0x800000
|
||||
#define pr_HTMRetriedAfterAbort 0x1000000
|
||||
#define a_runInstrumentedCode 0x01
|
||||
#define a_runUninstrumentedCode 0x02
|
||||
#define a_tryHTMFastPath 0x20
|
||||
|
||||
#define _XABORT_EXPLICIT (1 << 0)
|
||||
#define _XABORT_RETRY (1 << 1)
|
||||
|
||||
.text
|
||||
|
||||
.align 4
|
||||
|
@ -60,20 +74,83 @@
|
|||
SYM(_ITM_beginTransaction):
|
||||
cfi_startproc
|
||||
#ifdef __x86_64__
|
||||
#ifdef HAVE_AS_RTM
|
||||
/* Custom HTM fast path. We start the HW transaction here and let
|
||||
gtm_thread::begin_transaction (aka GTM_begin_transaction) decide
|
||||
how to proceed on aborts: We either retry the fast path, or fall
|
||||
back to another execution method. RTM restores all registers after
|
||||
a HW transaction abort, so we can do the SW setjmp after aborts,
|
||||
and we have to because we might choose a SW fall back. However,
|
||||
we have to explicitly save/restore the first argument (edi). */
|
||||
cmpl $0, SYM(gtm_htm_fastpath)(%rip)
|
||||
jz .Lno_htm
|
||||
testl $pr_hasNoAbort, %edi
|
||||
jz .Lno_htm
|
||||
.Lhtm_fastpath:
|
||||
xbegin .Ltxn_abort
|
||||
/* Monitor the serial lock (specifically, the 32b writer/summary field
|
||||
at its start), and only continue if there is no serial-mode
|
||||
transaction. Note that we might be just a nested transaction and
|
||||
our outermost transaction might be in serial mode; we check for
|
||||
this case in the retry policy implementation. */
|
||||
cmpl $0, SYM(gtm_serial_lock)(%rip)
|
||||
jnz 1f
|
||||
/* Everything is good. Run the transaction, preferably using the
|
||||
uninstrumented code path. Note that the following works because
|
||||
pr_uninstrumentedCode == a_runUninstrumentedCode. */
|
||||
andl $pr_uninstrumentedCode, %edi
|
||||
mov $a_runInstrumentedCode, %eax
|
||||
cmovnz %edi, %eax
|
||||
ret
|
||||
/* There is a serial-mode transaction, so abort (see htm_abort()
|
||||
regarding the abort code). */
|
||||
1: xabort $0xff
|
||||
.Ltxn_abort:
|
||||
/* If it might make sense to retry the HTM fast path, let the C++
|
||||
code decide. */
|
||||
testl $(_XABORT_RETRY|_XABORT_EXPLICIT), %eax
|
||||
jz .Lno_htm
|
||||
orl $pr_HTMRetryableAbort, %edi
|
||||
/* Let the C++ code handle the retry policy. */
|
||||
.Lno_htm:
|
||||
#endif
|
||||
leaq 8(%rsp), %rax
|
||||
subq $56, %rsp
|
||||
cfi_def_cfa_offset(64)
|
||||
movq %rax, (%rsp)
|
||||
movq %rbx, 8(%rsp)
|
||||
movq %rbp, 16(%rsp)
|
||||
movq %r12, 24(%rsp)
|
||||
movq %r13, 32(%rsp)
|
||||
movq %r14, 40(%rsp)
|
||||
movq %r15, 48(%rsp)
|
||||
movq %rsp, %rsi
|
||||
subq $72, %rsp
|
||||
cfi_adjust_cfa_offset(72)
|
||||
/* Store edi for future HTM fast path retries. We use a stack slot
|
||||
lower than the jmpbuf so that the jmpbuf's rip field will overlap
|
||||
with the proper return address on the stack. */
|
||||
movl %edi, 8(%rsp)
|
||||
/* Save the jmpbuf for any non-HTM-fastpath execution method.
|
||||
Because rsp-based addressing is 1 byte larger and we've got rax
|
||||
handy, use it. */
|
||||
movq %rax, -64(%rax)
|
||||
movq %rbx, -56(%rax)
|
||||
movq %rbp, -48(%rax)
|
||||
movq %r12, -40(%rax)
|
||||
movq %r13, -32(%rax)
|
||||
movq %r14, -24(%rax)
|
||||
movq %r15, -16(%rax)
|
||||
leaq -64(%rax), %rsi
|
||||
call SYM(GTM_begin_transaction)
|
||||
addq $56, %rsp
|
||||
cfi_def_cfa_offset(8)
|
||||
movl 8(%rsp), %edi
|
||||
addq $72, %rsp
|
||||
cfi_adjust_cfa_offset(-72)
|
||||
#ifdef HAVE_AS_RTM
|
||||
/* If a_tryHTMFastPath was returned, then we need to retry the
|
||||
fast path. We also restore edi and set pr_HTMRetriedAfterAbort
|
||||
to state that we have retried the fast path already (it's harmless
|
||||
if this bit is set even if we don't retry the fast path because it
|
||||
is checked iff pr_HTMRetryableAbort is set). We clear
|
||||
pr_HTMRetryableAbort because it applies to a previous HW
|
||||
transaction attempt. */
|
||||
cmpl $a_tryHTMFastPath, %eax
|
||||
jnz 2f
|
||||
andl $(0xffffffff-pr_HTMRetryableAbort), %edi
|
||||
orl $pr_HTMRetriedAfterAbort, %edi
|
||||
jmp .Lhtm_fastpath
|
||||
2:
|
||||
#endif
|
||||
#else
|
||||
leal 4(%esp), %ecx
|
||||
movl 4(%esp), %eax
|
||||
|
|
|
@ -70,6 +70,10 @@ cpu_relax (void)
|
|||
// See gtm_thread::begin_transaction for how these functions are used.
|
||||
#ifdef HAVE_AS_RTM
|
||||
#define USE_HTM_FASTPATH
|
||||
#ifdef __x86_64__
|
||||
// Use the custom fastpath in ITM_beginTransaction.
|
||||
#define HTM_CUSTOM_FASTPATH
|
||||
#endif
|
||||
|
||||
static inline bool
|
||||
htm_available ()
|
||||
|
|
|
@ -72,7 +72,9 @@ typedef enum
|
|||
inIrrevocableTransaction
|
||||
} _ITM_howExecuting;
|
||||
|
||||
/* Values to describe properties of code, passed in to beginTransaction */
|
||||
/* Values to describe properties of code, passed in to beginTransaction.
|
||||
Some of these constants are duplicated in some of the ITM_beginTransaction
|
||||
implementations, so update those too when applying any changes. */
|
||||
typedef enum
|
||||
{
|
||||
pr_instrumentedCode = 0x0001,
|
||||
|
@ -95,10 +97,16 @@ typedef enum
|
|||
pr_exceptionBlock = 0x1000,
|
||||
pr_hasElse = 0x2000,
|
||||
pr_readOnly = 0x4000,
|
||||
pr_hasNoSimpleReads = 0x400000
|
||||
pr_hasNoSimpleReads = 0x400000,
|
||||
/* These are not part of the ABI but used for custom HTM fast paths. See
|
||||
ITM_beginTransaction and gtm_thread::begin_transaction. */
|
||||
pr_HTMRetryableAbort = 0x800000,
|
||||
pr_HTMRetriedAfterAbort = 0x1000000
|
||||
} _ITM_codeProperties;
|
||||
|
||||
/* Result from startTransaction that describes what actions to take. */
|
||||
/* Result from startTransaction that describes what actions to take.
|
||||
Some of these constants are duplicated in some of the ITM_beginTransaction
|
||||
implementations, so update those too when applying any changes. */
|
||||
typedef enum
|
||||
{
|
||||
a_runInstrumentedCode = 0x01,
|
||||
|
@ -106,6 +114,7 @@ typedef enum
|
|||
a_saveLiveVariables = 0x04,
|
||||
a_restoreLiveVariables = 0x08,
|
||||
a_abortTransaction = 0x10,
|
||||
a_tryHTMFastPath = 0x20
|
||||
} _ITM_actions;
|
||||
|
||||
typedef struct
|
||||
|
|
|
@ -87,6 +87,14 @@ enum gtm_restart_reason
|
|||
#include "dispatch.h"
|
||||
#include "containers.h"
|
||||
|
||||
#ifdef __USER_LABEL_PREFIX__
|
||||
# define UPFX UPFX1(__USER_LABEL_PREFIX__)
|
||||
# define UPFX1(t) UPFX2(t)
|
||||
# define UPFX2(t) #t
|
||||
#else
|
||||
# define UPFX
|
||||
#endif
|
||||
|
||||
namespace GTM HIDDEN {
|
||||
|
||||
// This type is private to alloc.c, but needs to be defined so that
|
||||
|
@ -230,6 +238,7 @@ struct gtm_thread
|
|||
// be used for the next iteration of the transaction.
|
||||
// Only restart_total is reset to zero when the transaction commits, the
|
||||
// other counters are total values for all previously executed transactions.
|
||||
// restart_total is also used by the HTM fastpath in a different way.
|
||||
uint32_t restart_reason[NUM_RESTARTS];
|
||||
uint32_t restart_total;
|
||||
|
||||
|
@ -247,7 +256,9 @@ struct gtm_thread
|
|||
// The lock that provides access to serial mode. Non-serialized
|
||||
// transactions acquire read locks; a serialized transaction aquires
|
||||
// a write lock.
|
||||
static gtm_rwlock serial_lock;
|
||||
// Accessed from assembly language, thus the "asm" specifier on
|
||||
// the name, avoiding complex name mangling.
|
||||
static gtm_rwlock serial_lock __asm__(UPFX "gtm_serial_lock");
|
||||
|
||||
// The head of the list of all threads' transactions.
|
||||
static gtm_thread *list_of_threads;
|
||||
|
@ -277,15 +288,8 @@ struct gtm_thread
|
|||
|
||||
// Invoked from assembly language, thus the "asm" specifier on
|
||||
// the name, avoiding complex name mangling.
|
||||
#ifdef __USER_LABEL_PREFIX__
|
||||
#define UPFX1(t) UPFX(t)
|
||||
#define UPFX(t) #t
|
||||
static uint32_t begin_transaction(uint32_t, const gtm_jmpbuf *)
|
||||
__asm__(UPFX1(__USER_LABEL_PREFIX__) "GTM_begin_transaction") ITM_REGPARM;
|
||||
#else
|
||||
static uint32_t begin_transaction(uint32_t, const gtm_jmpbuf *)
|
||||
__asm__("GTM_begin_transaction") ITM_REGPARM;
|
||||
#endif
|
||||
__asm__(UPFX "GTM_begin_transaction") ITM_REGPARM;
|
||||
// In eh_cpp.cc
|
||||
void revert_cpp_exceptions (gtm_transaction_cp *cp = 0);
|
||||
|
||||
|
@ -338,7 +342,9 @@ extern gtm_cacheline_mask gtm_mask_stack(gtm_cacheline *, gtm_cacheline_mask);
|
|||
|
||||
// Control variable for the HTM fastpath that uses serial mode as fallback.
|
||||
// Non-zero if the HTM fastpath is enabled. See gtm_thread::begin_transaction.
|
||||
extern uint32_t htm_fastpath;
|
||||
// Accessed from assembly language, thus the "asm" specifier on
|
||||
// the name, avoiding complex name mangling.
|
||||
extern uint32_t htm_fastpath __asm__(UPFX "gtm_htm_fastpath");
|
||||
|
||||
} // namespace GTM
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue