Add custom HTM fast path for RTM on x86_64.

* libitm_i.h (gtm_thread): Assign an asm name to serial_lock.
	(htm_fastpath): Assign an asm name.
	* libitm.h (_ITM_codeProperties): Add non-ABI flags used by custom
	HTM fast paths.
	(_ITM_actions): Likewise.
	* config/x86/target.h (HTM_CUSTOM_FASTPATH): Enable custom fastpath on
	x86_64.
	* config/x86/sjlj.S (_ITM_beginTransaction): Add custom HTM fast path.
	* config/posix/rwlock.h (gtm_rwlock): Update comments.  Move summary
	field to the start of the structure.
	* config/linux/rwlock.h (gtm_rwlock): Update comments.
	* beginend.cc (gtm_thread::begin_transaction): Add retry policy
	handling for custom HTM fast paths.

From-SVN: r202101
This commit is contained in:
Torvald Riegel 2013-08-30 10:33:41 +00:00 committed by Torvald Riegel
parent 8595a07d8d
commit bec9ec3fc1
8 changed files with 198 additions and 31 deletions

View file

@ -1,3 +1,19 @@
2013-08-30 Torvald Riegel <triegel@redhat.com>
* libitm_i.h (gtm_thread): Assign an asm name to serial_lock.
(htm_fastpath): Assign an asm name.
* libitm.h (_ITM_codeProperties): Add non-ABI flags used by custom
HTM fast paths.
(_ITM_actions): Likewise.
* config/x86/target.h (HTM_CUSTOM_FASTPATH): Enable custom fastpath on
x86_64.
* config/x86/sjlj.S (_ITM_beginTransaction): Add custom HTM fast path.
* config/posix/rwlock.h (gtm_rwlock): Update comments. Move summary
field to the start of the structure.
* config/linux/rwlock.h (gtm_rwlock): Update comments.
* beginend.cc (gtm_thread::begin_transaction): Add retry policy
handling for custom HTM fast paths.
2013-08-14 Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
Revert:
2013-08-02 Andreas Krebbel <Andreas.Krebbel@de.ibm.com>

View file

@ -165,7 +165,7 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb)
if (unlikely(prop & pr_undoLogCode))
GTM_fatal("pr_undoLogCode not supported");
#if defined(USE_HTM_FASTPATH) && !defined(HTM_CUSTOM_FASTPATH)
#ifdef USE_HTM_FASTPATH
// HTM fastpath. Only chosen in the absence of transaction_cancel to allow
// using an uninstrumented code path.
// The fastpath is enabled only by dispatch_htm's method group, which uses
@ -187,6 +187,7 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb)
// indeed in serial mode, and HW transactions should never need serial mode
// for any internal changes (e.g., they never abort visibly to the STM code
// and thus do not trigger the standard retry handling).
#ifndef HTM_CUSTOM_FASTPATH
if (likely(htm_fastpath && (prop & pr_hasNoAbort)))
{
for (uint32_t t = htm_fastpath; t; t--)
@ -237,6 +238,49 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb)
}
}
}
#else
// If we have a custom HTM fastpath in ITM_beginTransaction, we implement
// just the retry policy here. We communicate with the custom fastpath
// through additional property bits and return codes, and either transfer
// control back to the custom fastpath or run the fallback mechanism. The
// fastpath synchronization algorithm itself is the same.
// pr_HTMRetryableAbort states that a HW transaction started by the custom
// HTM fastpath aborted, and that we thus have to decide whether to retry
// the fastpath (returning a_tryHTMFastPath) or just proceed with the
// fallback method.
if (likely(htm_fastpath && (prop & pr_HTMRetryableAbort)))
{
tx = gtm_thr();
if (unlikely(tx == NULL))
{
// See below.
tx = new gtm_thread();
set_gtm_thr(tx);
}
// If this is the first abort, reset the retry count. We abuse
// restart_total for the retry count, which is fine because our only
// other fallback will use serial transactions, which don't use
// restart_total but will reset it when committing.
if (!(prop & pr_HTMRetriedAfterAbort))
tx->restart_total = htm_fastpath;
if (--tx->restart_total > 0)
{
// Wait until any concurrent serial-mode transactions have finished.
// Essentially the same code as above.
if (serial_lock.is_write_locked())
{
if (tx->nesting > 0)
goto stop_custom_htm_fastpath;
serial_lock.read_lock(tx);
serial_lock.read_unlock(tx);
}
// Let ITM_beginTransaction retry the custom HTM fastpath.
return a_tryHTMFastPath;
}
}
stop_custom_htm_fastpath:
#endif
#endif
tx = gtm_thr();

View file

@ -39,6 +39,11 @@ struct gtm_thread;
//
// In this implementation, writers are given highest priority access but
// read-to-write upgrades do not have a higher priority than writers.
//
// Do not change the layout of this class; it must remain a POD type with
// standard layout, and the WRITERS field must be first (i.e., so the
// assembler code can assume that its address is equal to the address of the
// respective instance of the class).
class gtm_rwlock
{

View file

@ -44,19 +44,25 @@ struct gtm_thread;
//
// In this implementation, writers are given highest priority access but
// read-to-write upgrades do not have a higher priority than writers.
//
// Do not change the layout of this class; it must remain a POD type with
// standard layout, and the SUMMARY field must be first (i.e., so the
// assembler code can assume that its address is equal to the address of the
// respective instance of the class).
class gtm_rwlock
{
pthread_mutex_t mutex; // Held if manipulating any field.
pthread_cond_t c_readers; // Readers wait here
pthread_cond_t c_writers; // Writers wait here for writers
pthread_cond_t c_confirmed_writers; // Writers wait here for readers
static const unsigned a_writer = 1; // An active writer.
static const unsigned w_writer = 2; // The w_writers field != 0
static const unsigned w_reader = 4; // The w_readers field != 0
std::atomic<unsigned int> summary; // Bitmask of the above.
pthread_mutex_t mutex; // Held if manipulating any field.
pthread_cond_t c_readers; // Readers wait here
pthread_cond_t c_writers; // Writers wait here for writers
pthread_cond_t c_confirmed_writers; // Writers wait here for readers
unsigned int a_readers; // Nr active readers as observed by a writer
unsigned int w_readers; // Nr waiting readers
unsigned int w_writers; // Nr waiting writers

View file

@ -24,6 +24,7 @@
#include "asmcfi.h"
#include "config.h"
#define CONCAT1(a, b) CONCAT2(a, b)
#define CONCAT2(a, b) a ## b
@ -52,6 +53,19 @@
# endif
#endif
/* These are duplicates of the canonical definitions in libitm.h. Note that
the code relies on pr_uninstrumentedCode == a_runUninstrumentedCode. */
#define pr_uninstrumentedCode 0x02
#define pr_hasNoAbort 0x08
#define pr_HTMRetryableAbort 0x800000
#define pr_HTMRetriedAfterAbort 0x1000000
#define a_runInstrumentedCode 0x01
#define a_runUninstrumentedCode 0x02
#define a_tryHTMFastPath 0x20
#define _XABORT_EXPLICIT (1 << 0)
#define _XABORT_RETRY (1 << 1)
.text
.align 4
@ -60,20 +74,83 @@
SYM(_ITM_beginTransaction):
cfi_startproc
#ifdef __x86_64__
#ifdef HAVE_AS_RTM
/* Custom HTM fast path. We start the HW transaction here and let
gtm_thread::begin_transaction (aka GTM_begin_transaction) decide
how to proceed on aborts: We either retry the fast path, or fall
back to another execution method. RTM restores all registers after
a HW transaction abort, so we can do the SW setjmp after aborts,
and we have to because we might choose a SW fall back. However,
we have to explicitly save/restore the first argument (edi). */
cmpl $0, SYM(gtm_htm_fastpath)(%rip)
jz .Lno_htm
testl $pr_hasNoAbort, %edi
jz .Lno_htm
.Lhtm_fastpath:
xbegin .Ltxn_abort
/* Monitor the serial lock (specifically, the 32b writer/summary field
at its start), and only continue if there is no serial-mode
transaction. Note that we might be just a nested transaction and
our outermost transaction might be in serial mode; we check for
this case in the retry policy implementation. */
cmpl $0, SYM(gtm_serial_lock)(%rip)
jnz 1f
/* Everything is good. Run the transaction, preferably using the
uninstrumented code path. Note that the following works because
pr_uninstrumentedCode == a_runUninstrumentedCode. */
andl $pr_uninstrumentedCode, %edi
mov $a_runInstrumentedCode, %eax
cmovnz %edi, %eax
ret
/* There is a serial-mode transaction, so abort (see htm_abort()
regarding the abort code). */
1: xabort $0xff
.Ltxn_abort:
/* If it might make sense to retry the HTM fast path, let the C++
code decide. */
testl $(_XABORT_RETRY|_XABORT_EXPLICIT), %eax
jz .Lno_htm
orl $pr_HTMRetryableAbort, %edi
/* Let the C++ code handle the retry policy. */
.Lno_htm:
#endif
leaq 8(%rsp), %rax
subq $56, %rsp
cfi_def_cfa_offset(64)
movq %rax, (%rsp)
movq %rbx, 8(%rsp)
movq %rbp, 16(%rsp)
movq %r12, 24(%rsp)
movq %r13, 32(%rsp)
movq %r14, 40(%rsp)
movq %r15, 48(%rsp)
movq %rsp, %rsi
subq $72, %rsp
cfi_adjust_cfa_offset(72)
/* Store edi for future HTM fast path retries. We use a stack slot
lower than the jmpbuf so that the jmpbuf's rip field will overlap
with the proper return address on the stack. */
movl %edi, 8(%rsp)
/* Save the jmpbuf for any non-HTM-fastpath execution method.
Because rsp-based addressing is 1 byte larger and we've got rax
handy, use it. */
movq %rax, -64(%rax)
movq %rbx, -56(%rax)
movq %rbp, -48(%rax)
movq %r12, -40(%rax)
movq %r13, -32(%rax)
movq %r14, -24(%rax)
movq %r15, -16(%rax)
leaq -64(%rax), %rsi
call SYM(GTM_begin_transaction)
addq $56, %rsp
cfi_def_cfa_offset(8)
movl 8(%rsp), %edi
addq $72, %rsp
cfi_adjust_cfa_offset(-72)
#ifdef HAVE_AS_RTM
/* If a_tryHTMFastPath was returned, then we need to retry the
fast path. We also restore edi and set pr_HTMRetriedAfterAbort
to state that we have retried the fast path already (it's harmless
if this bit is set even if we don't retry the fast path because it
is checked iff pr_HTMRetryableAbort is set). We clear
pr_HTMRetryableAbort because it applies to a previous HW
transaction attempt. */
cmpl $a_tryHTMFastPath, %eax
jnz 2f
andl $(0xffffffff-pr_HTMRetryableAbort), %edi
orl $pr_HTMRetriedAfterAbort, %edi
jmp .Lhtm_fastpath
2:
#endif
#else
leal 4(%esp), %ecx
movl 4(%esp), %eax

View file

@ -70,6 +70,10 @@ cpu_relax (void)
// See gtm_thread::begin_transaction for how these functions are used.
#ifdef HAVE_AS_RTM
#define USE_HTM_FASTPATH
#ifdef __x86_64__
// Use the custom fastpath in ITM_beginTransaction.
#define HTM_CUSTOM_FASTPATH
#endif
static inline bool
htm_available ()

View file

@ -72,7 +72,9 @@ typedef enum
inIrrevocableTransaction
} _ITM_howExecuting;
/* Values to describe properties of code, passed in to beginTransaction */
/* Values to describe properties of code, passed in to beginTransaction.
Some of these constants are duplicated in some of the ITM_beginTransaction
implementations, so update those too when applying any changes. */
typedef enum
{
pr_instrumentedCode = 0x0001,
@ -95,10 +97,16 @@ typedef enum
pr_exceptionBlock = 0x1000,
pr_hasElse = 0x2000,
pr_readOnly = 0x4000,
pr_hasNoSimpleReads = 0x400000
pr_hasNoSimpleReads = 0x400000,
/* These are not part of the ABI but used for custom HTM fast paths. See
ITM_beginTransaction and gtm_thread::begin_transaction. */
pr_HTMRetryableAbort = 0x800000,
pr_HTMRetriedAfterAbort = 0x1000000
} _ITM_codeProperties;
/* Result from startTransaction that describes what actions to take. */
/* Result from startTransaction that describes what actions to take.
Some of these constants are duplicated in some of the ITM_beginTransaction
implementations, so update those too when applying any changes. */
typedef enum
{
a_runInstrumentedCode = 0x01,
@ -106,6 +114,7 @@ typedef enum
a_saveLiveVariables = 0x04,
a_restoreLiveVariables = 0x08,
a_abortTransaction = 0x10,
a_tryHTMFastPath = 0x20
} _ITM_actions;
typedef struct

View file

@ -87,6 +87,14 @@ enum gtm_restart_reason
#include "dispatch.h"
#include "containers.h"
#ifdef __USER_LABEL_PREFIX__
# define UPFX UPFX1(__USER_LABEL_PREFIX__)
# define UPFX1(t) UPFX2(t)
# define UPFX2(t) #t
#else
# define UPFX
#endif
namespace GTM HIDDEN {
// This type is private to alloc.c, but needs to be defined so that
@ -230,6 +238,7 @@ struct gtm_thread
// be used for the next iteration of the transaction.
// Only restart_total is reset to zero when the transaction commits, the
// other counters are total values for all previously executed transactions.
// restart_total is also used by the HTM fastpath in a different way.
uint32_t restart_reason[NUM_RESTARTS];
uint32_t restart_total;
@ -247,7 +256,9 @@ struct gtm_thread
// The lock that provides access to serial mode. Non-serialized
// transactions acquire read locks; a serialized transaction aquires
// a write lock.
static gtm_rwlock serial_lock;
// Accessed from assembly language, thus the "asm" specifier on
// the name, avoiding complex name mangling.
static gtm_rwlock serial_lock __asm__(UPFX "gtm_serial_lock");
// The head of the list of all threads' transactions.
static gtm_thread *list_of_threads;
@ -277,15 +288,8 @@ struct gtm_thread
// Invoked from assembly language, thus the "asm" specifier on
// the name, avoiding complex name mangling.
#ifdef __USER_LABEL_PREFIX__
#define UPFX1(t) UPFX(t)
#define UPFX(t) #t
static uint32_t begin_transaction(uint32_t, const gtm_jmpbuf *)
__asm__(UPFX1(__USER_LABEL_PREFIX__) "GTM_begin_transaction") ITM_REGPARM;
#else
static uint32_t begin_transaction(uint32_t, const gtm_jmpbuf *)
__asm__("GTM_begin_transaction") ITM_REGPARM;
#endif
__asm__(UPFX "GTM_begin_transaction") ITM_REGPARM;
// In eh_cpp.cc
void revert_cpp_exceptions (gtm_transaction_cp *cp = 0);
@ -338,7 +342,9 @@ extern gtm_cacheline_mask gtm_mask_stack(gtm_cacheline *, gtm_cacheline_mask);
// Control variable for the HTM fastpath that uses serial mode as fallback.
// Non-zero if the HTM fastpath is enabled. See gtm_thread::begin_transaction.
extern uint32_t htm_fastpath;
// Accessed from assembly language, thus the "asm" specifier on
// the name, avoiding complex name mangling.
extern uint32_t htm_fastpath __asm__(UPFX "gtm_htm_fastpath");
} // namespace GTM