libatomic: Enable lock-free 128-bit atomics on AArch64

Enable lock-free 128-bit atomics on AArch64.  This is backwards compatible with
existing binaries (as for these GCC always calls into libatomic, so all 128-bit
atomic uses in a process are switched), gives better performance than locking
atomics and is what most users expect.

128-bit atomic loads use a load/store exclusive loop if LSE2 is not supported.
This results in an implicit store which is invisible to software as long as the
given address is writeable (which will be true when using atomics in real code).

This doesn't yet change __atomic_is_lock_free eventhough all atomics are finally
lock-free on AArch64.

libatomic:
	* config/linux/aarch64/atomic_16.S: Implement lock-free ARMv8.0 atomics.
	(libat_exchange_16): Merge RELEASE and ACQ_REL/SEQ_CST cases.
	* config/linux/aarch64/host-config.h: Use atomic_16.S for baseline v8.0.
This commit is contained in:
Wilco Dijkstra 2023-12-01 17:28:57 +00:00
parent bbdb72ba29
commit 3fa689f6ed
2 changed files with 161 additions and 50 deletions

View file

@ -22,6 +22,22 @@
<http://www.gnu.org/licenses/>. */
/* AArch64 128-bit lock-free atomic implementation.
128-bit atomics are now lock-free for all AArch64 architecture versions.
This is backwards compatible with existing binaries (as we swap all uses
of 128-bit atomics via an ifunc) and gives better performance than locking
atomics.
128-bit atomic loads use a exclusive loop if LSE2 is not supported.
This results in an implicit store which is invisible to software as long
as the given address is writeable. Since all other atomics have explicit
writes, this will be true when using atomics in actual code.
The libat_<op>_16 entry points are ARMv8.0.
The libat_<op>_16_i1 entry points are used when LSE2 is available. */
.arch armv8-a+lse
#define ENTRY(name) \
@ -37,6 +53,10 @@ name: \
.cfi_endproc; \
.size name, .-name;
#define ALIAS(alias,name) \
.global alias; \
.set alias, name;
#define res0 x0
#define res1 x1
#define in0 x2
@ -70,6 +90,24 @@ name: \
#define SEQ_CST 5
ENTRY (libat_load_16)
mov x5, x0
cbnz w1, 2f
/* RELAXED. */
1: ldxp res0, res1, [x5]
stxp w4, res0, res1, [x5]
cbnz w4, 1b
ret
/* ACQUIRE/CONSUME/SEQ_CST. */
2: ldaxp res0, res1, [x5]
stxp w4, res0, res1, [x5]
cbnz w4, 2b
ret
END (libat_load_16)
ENTRY (libat_load_16_i1)
cbnz w1, 1f
@ -93,6 +131,23 @@ ENTRY (libat_load_16_i1)
END (libat_load_16_i1)
ENTRY (libat_store_16)
cbnz w4, 2f
/* RELAXED. */
1: ldxp xzr, tmp0, [x0]
stxp w4, in0, in1, [x0]
cbnz w4, 1b
ret
/* RELEASE/SEQ_CST. */
2: ldxp xzr, tmp0, [x0]
stlxp w4, in0, in1, [x0]
cbnz w4, 2b
ret
END (libat_store_16)
ENTRY (libat_store_16_i1)
cbnz w4, 1f
@ -101,14 +156,14 @@ ENTRY (libat_store_16_i1)
ret
/* RELEASE/SEQ_CST. */
1: ldaxp xzr, tmp0, [x0]
1: ldxp xzr, tmp0, [x0]
stlxp w4, in0, in1, [x0]
cbnz w4, 1b
ret
END (libat_store_16_i1)
ENTRY (libat_exchange_16_i1)
ENTRY (libat_exchange_16)
mov x5, x0
cbnz w4, 2f
@ -126,22 +181,60 @@ ENTRY (libat_exchange_16_i1)
stxp w4, in0, in1, [x5]
cbnz w4, 3b
ret
4:
/* RELEASE/ACQ_REL/SEQ_CST. */
4: ldaxp res0, res1, [x5]
stlxp w4, in0, in1, [x5]
cbnz w4, 4b
ret
END (libat_exchange_16)
ENTRY (libat_compare_exchange_16)
ldp exp0, exp1, [x1]
cbz w4, 3f
cmp w4, RELEASE
b.ne 6f
b.hs 5f
/* RELEASE. */
5: ldxp res0, res1, [x5]
stlxp w4, in0, in1, [x5]
/* ACQUIRE/CONSUME. */
1: ldaxp tmp0, tmp1, [x0]
cmp tmp0, exp0
ccmp tmp1, exp1, 0, eq
csel tmp0, in0, tmp0, eq
csel tmp1, in1, tmp1, eq
stxp w4, tmp0, tmp1, [x0]
cbnz w4, 1b
beq 2f
stp tmp0, tmp1, [x1]
2: cset x0, eq
ret
/* RELAXED. */
3: ldxp tmp0, tmp1, [x0]
cmp tmp0, exp0
ccmp tmp1, exp1, 0, eq
csel tmp0, in0, tmp0, eq
csel tmp1, in1, tmp1, eq
stxp w4, tmp0, tmp1, [x0]
cbnz w4, 3b
beq 4f
stp tmp0, tmp1, [x1]
4: cset x0, eq
ret
/* RELEASE/ACQ_REL/SEQ_CST. */
5: ldaxp tmp0, tmp1, [x0]
cmp tmp0, exp0
ccmp tmp1, exp1, 0, eq
csel tmp0, in0, tmp0, eq
csel tmp1, in1, tmp1, eq
stlxp w4, tmp0, tmp1, [x0]
cbnz w4, 5b
beq 6f
stp tmp0, tmp1, [x1]
6: cset x0, eq
ret
/* ACQ_REL/SEQ_CST. */
6: ldaxp res0, res1, [x5]
stlxp w4, in0, in1, [x5]
cbnz w4, 6b
ret
END (libat_exchange_16_i1)
END (libat_compare_exchange_16)
ENTRY (libat_compare_exchange_16_i1)
@ -180,7 +273,7 @@ ENTRY (libat_compare_exchange_16_i1)
END (libat_compare_exchange_16_i1)
ENTRY (libat_fetch_add_16_i1)
ENTRY (libat_fetch_add_16)
mov x5, x0
cbnz w4, 2f
@ -199,10 +292,10 @@ ENTRY (libat_fetch_add_16_i1)
stlxp w4, tmp0, tmp1, [x5]
cbnz w4, 2b
ret
END (libat_fetch_add_16_i1)
END (libat_fetch_add_16)
ENTRY (libat_add_fetch_16_i1)
ENTRY (libat_add_fetch_16)
mov x5, x0
cbnz w4, 2f
@ -221,10 +314,10 @@ ENTRY (libat_add_fetch_16_i1)
stlxp w4, res0, res1, [x5]
cbnz w4, 2b
ret
END (libat_add_fetch_16_i1)
END (libat_add_fetch_16)
ENTRY (libat_fetch_sub_16_i1)
ENTRY (libat_fetch_sub_16)
mov x5, x0
cbnz w4, 2f
@ -243,10 +336,10 @@ ENTRY (libat_fetch_sub_16_i1)
stlxp w4, tmp0, tmp1, [x5]
cbnz w4, 2b
ret
END (libat_fetch_sub_16_i1)
END (libat_fetch_sub_16)
ENTRY (libat_sub_fetch_16_i1)
ENTRY (libat_sub_fetch_16)
mov x5, x0
cbnz w4, 2f
@ -265,10 +358,10 @@ ENTRY (libat_sub_fetch_16_i1)
stlxp w4, res0, res1, [x5]
cbnz w4, 2b
ret
END (libat_sub_fetch_16_i1)
END (libat_sub_fetch_16)
ENTRY (libat_fetch_or_16_i1)
ENTRY (libat_fetch_or_16)
mov x5, x0
cbnz w4, 2f
@ -287,10 +380,10 @@ ENTRY (libat_fetch_or_16_i1)
stlxp w4, tmp0, tmp1, [x5]
cbnz w4, 2b
ret
END (libat_fetch_or_16_i1)
END (libat_fetch_or_16)
ENTRY (libat_or_fetch_16_i1)
ENTRY (libat_or_fetch_16)
mov x5, x0
cbnz w4, 2f
@ -309,10 +402,10 @@ ENTRY (libat_or_fetch_16_i1)
stlxp w4, res0, res1, [x5]
cbnz w4, 2b
ret
END (libat_or_fetch_16_i1)
END (libat_or_fetch_16)
ENTRY (libat_fetch_and_16_i1)
ENTRY (libat_fetch_and_16)
mov x5, x0
cbnz w4, 2f
@ -331,10 +424,10 @@ ENTRY (libat_fetch_and_16_i1)
stlxp w4, tmp0, tmp1, [x5]
cbnz w4, 2b
ret
END (libat_fetch_and_16_i1)
END (libat_fetch_and_16)
ENTRY (libat_and_fetch_16_i1)
ENTRY (libat_and_fetch_16)
mov x5, x0
cbnz w4, 2f
@ -353,10 +446,10 @@ ENTRY (libat_and_fetch_16_i1)
stlxp w4, res0, res1, [x5]
cbnz w4, 2b
ret
END (libat_and_fetch_16_i1)
END (libat_and_fetch_16)
ENTRY (libat_fetch_xor_16_i1)
ENTRY (libat_fetch_xor_16)
mov x5, x0
cbnz w4, 2f
@ -375,10 +468,10 @@ ENTRY (libat_fetch_xor_16_i1)
stlxp w4, tmp0, tmp1, [x5]
cbnz w4, 2b
ret
END (libat_fetch_xor_16_i1)
END (libat_fetch_xor_16)
ENTRY (libat_xor_fetch_16_i1)
ENTRY (libat_xor_fetch_16)
mov x5, x0
cbnz w4, 2f
@ -397,10 +490,10 @@ ENTRY (libat_xor_fetch_16_i1)
stlxp w4, res0, res1, [x5]
cbnz w4, 2b
ret
END (libat_xor_fetch_16_i1)
END (libat_xor_fetch_16)
ENTRY (libat_fetch_nand_16_i1)
ENTRY (libat_fetch_nand_16)
mov x5, x0
mvn in0, in0
mvn in1, in1
@ -421,10 +514,10 @@ ENTRY (libat_fetch_nand_16_i1)
stlxp w4, tmp0, tmp1, [x5]
cbnz w4, 2b
ret
END (libat_fetch_nand_16_i1)
END (libat_fetch_nand_16)
ENTRY (libat_nand_fetch_16_i1)
ENTRY (libat_nand_fetch_16)
mov x5, x0
mvn in0, in0
mvn in1, in1
@ -445,21 +538,38 @@ ENTRY (libat_nand_fetch_16_i1)
stlxp w4, res0, res1, [x5]
cbnz w4, 2b
ret
END (libat_nand_fetch_16_i1)
END (libat_nand_fetch_16)
ENTRY (libat_test_and_set_16_i1)
mov w2, 1
cbnz w1, 2f
/* __atomic_test_and_set is always inlined, so this entry is unused and
only required for completeness. */
ENTRY (libat_test_and_set_16)
/* RELAXED. */
swpb w0, w2, [x0]
/* RELAXED/ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
mov x5, x0
1: ldaxrb w0, [x5]
stlxrb w4, w2, [x5]
cbnz w4, 1b
ret
END (libat_test_and_set_16)
/* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
2: swpalb w0, w2, [x0]
ret
END (libat_test_and_set_16_i1)
/* Alias entry points which are the same in baseline and LSE2. */
ALIAS (libat_exchange_16_i1, libat_exchange_16)
ALIAS (libat_fetch_add_16_i1, libat_fetch_add_16)
ALIAS (libat_add_fetch_16_i1, libat_add_fetch_16)
ALIAS (libat_fetch_sub_16_i1, libat_fetch_sub_16)
ALIAS (libat_sub_fetch_16_i1, libat_sub_fetch_16)
ALIAS (libat_fetch_or_16_i1, libat_fetch_or_16)
ALIAS (libat_or_fetch_16_i1, libat_or_fetch_16)
ALIAS (libat_fetch_and_16_i1, libat_fetch_and_16)
ALIAS (libat_and_fetch_16_i1, libat_and_fetch_16)
ALIAS (libat_fetch_xor_16_i1, libat_fetch_xor_16)
ALIAS (libat_xor_fetch_16_i1, libat_xor_fetch_16)
ALIAS (libat_fetch_nand_16_i1, libat_fetch_nand_16)
ALIAS (libat_nand_fetch_16_i1, libat_nand_fetch_16)
ALIAS (libat_test_and_set_16_i1, libat_test_and_set_16)
/* GNU_PROPERTY_AARCH64_* macros from elf.h for use in asm code. */

View file

@ -35,12 +35,13 @@
#endif
#define IFUNC_NCOND(N) (1)
#if N == 16 && IFUNC_ALT != 0
#endif /* HAVE_IFUNC */
/* All 128-bit atomic functions are defined in aarch64/atomic_16.S. */
#if N == 16
# define DONE 1
#endif
#endif /* HAVE_IFUNC */
#ifdef HWCAP_USCAT
#define MIDR_IMPLEMENTOR(midr) (((midr) >> 24) & 255)