diff --git a/libatomic/config/linux/aarch64/atomic_16.S b/libatomic/config/linux/aarch64/atomic_16.S index 05439ce394b..a099037179b 100644 --- a/libatomic/config/linux/aarch64/atomic_16.S +++ b/libatomic/config/linux/aarch64/atomic_16.S @@ -22,6 +22,22 @@ . */ +/* AArch64 128-bit lock-free atomic implementation. + + 128-bit atomics are now lock-free for all AArch64 architecture versions. + This is backwards compatible with existing binaries (as we swap all uses + of 128-bit atomics via an ifunc) and gives better performance than locking + atomics. + + 128-bit atomic loads use a exclusive loop if LSE2 is not supported. + This results in an implicit store which is invisible to software as long + as the given address is writeable. Since all other atomics have explicit + writes, this will be true when using atomics in actual code. + + The libat__16 entry points are ARMv8.0. + The libat__16_i1 entry points are used when LSE2 is available. */ + + .arch armv8-a+lse #define ENTRY(name) \ @@ -37,6 +53,10 @@ name: \ .cfi_endproc; \ .size name, .-name; +#define ALIAS(alias,name) \ + .global alias; \ + .set alias, name; + #define res0 x0 #define res1 x1 #define in0 x2 @@ -70,6 +90,24 @@ name: \ #define SEQ_CST 5 +ENTRY (libat_load_16) + mov x5, x0 + cbnz w1, 2f + + /* RELAXED. */ +1: ldxp res0, res1, [x5] + stxp w4, res0, res1, [x5] + cbnz w4, 1b + ret + + /* ACQUIRE/CONSUME/SEQ_CST. */ +2: ldaxp res0, res1, [x5] + stxp w4, res0, res1, [x5] + cbnz w4, 2b + ret +END (libat_load_16) + + ENTRY (libat_load_16_i1) cbnz w1, 1f @@ -93,6 +131,23 @@ ENTRY (libat_load_16_i1) END (libat_load_16_i1) +ENTRY (libat_store_16) + cbnz w4, 2f + + /* RELAXED. */ +1: ldxp xzr, tmp0, [x0] + stxp w4, in0, in1, [x0] + cbnz w4, 1b + ret + + /* RELEASE/SEQ_CST. */ +2: ldxp xzr, tmp0, [x0] + stlxp w4, in0, in1, [x0] + cbnz w4, 2b + ret +END (libat_store_16) + + ENTRY (libat_store_16_i1) cbnz w4, 1f @@ -101,14 +156,14 @@ ENTRY (libat_store_16_i1) ret /* RELEASE/SEQ_CST. */ -1: ldaxp xzr, tmp0, [x0] +1: ldxp xzr, tmp0, [x0] stlxp w4, in0, in1, [x0] cbnz w4, 1b ret END (libat_store_16_i1) -ENTRY (libat_exchange_16_i1) +ENTRY (libat_exchange_16) mov x5, x0 cbnz w4, 2f @@ -126,22 +181,60 @@ ENTRY (libat_exchange_16_i1) stxp w4, in0, in1, [x5] cbnz w4, 3b ret -4: + + /* RELEASE/ACQ_REL/SEQ_CST. */ +4: ldaxp res0, res1, [x5] + stlxp w4, in0, in1, [x5] + cbnz w4, 4b + ret +END (libat_exchange_16) + + +ENTRY (libat_compare_exchange_16) + ldp exp0, exp1, [x1] + cbz w4, 3f cmp w4, RELEASE - b.ne 6f + b.hs 5f - /* RELEASE. */ -5: ldxp res0, res1, [x5] - stlxp w4, in0, in1, [x5] + /* ACQUIRE/CONSUME. */ +1: ldaxp tmp0, tmp1, [x0] + cmp tmp0, exp0 + ccmp tmp1, exp1, 0, eq + csel tmp0, in0, tmp0, eq + csel tmp1, in1, tmp1, eq + stxp w4, tmp0, tmp1, [x0] + cbnz w4, 1b + beq 2f + stp tmp0, tmp1, [x1] +2: cset x0, eq + ret + + /* RELAXED. */ +3: ldxp tmp0, tmp1, [x0] + cmp tmp0, exp0 + ccmp tmp1, exp1, 0, eq + csel tmp0, in0, tmp0, eq + csel tmp1, in1, tmp1, eq + stxp w4, tmp0, tmp1, [x0] + cbnz w4, 3b + beq 4f + stp tmp0, tmp1, [x1] +4: cset x0, eq + ret + + /* RELEASE/ACQ_REL/SEQ_CST. */ +5: ldaxp tmp0, tmp1, [x0] + cmp tmp0, exp0 + ccmp tmp1, exp1, 0, eq + csel tmp0, in0, tmp0, eq + csel tmp1, in1, tmp1, eq + stlxp w4, tmp0, tmp1, [x0] cbnz w4, 5b + beq 6f + stp tmp0, tmp1, [x1] +6: cset x0, eq ret - - /* ACQ_REL/SEQ_CST. */ -6: ldaxp res0, res1, [x5] - stlxp w4, in0, in1, [x5] - cbnz w4, 6b - ret -END (libat_exchange_16_i1) +END (libat_compare_exchange_16) ENTRY (libat_compare_exchange_16_i1) @@ -180,7 +273,7 @@ ENTRY (libat_compare_exchange_16_i1) END (libat_compare_exchange_16_i1) -ENTRY (libat_fetch_add_16_i1) +ENTRY (libat_fetch_add_16) mov x5, x0 cbnz w4, 2f @@ -199,10 +292,10 @@ ENTRY (libat_fetch_add_16_i1) stlxp w4, tmp0, tmp1, [x5] cbnz w4, 2b ret -END (libat_fetch_add_16_i1) +END (libat_fetch_add_16) -ENTRY (libat_add_fetch_16_i1) +ENTRY (libat_add_fetch_16) mov x5, x0 cbnz w4, 2f @@ -221,10 +314,10 @@ ENTRY (libat_add_fetch_16_i1) stlxp w4, res0, res1, [x5] cbnz w4, 2b ret -END (libat_add_fetch_16_i1) +END (libat_add_fetch_16) -ENTRY (libat_fetch_sub_16_i1) +ENTRY (libat_fetch_sub_16) mov x5, x0 cbnz w4, 2f @@ -243,10 +336,10 @@ ENTRY (libat_fetch_sub_16_i1) stlxp w4, tmp0, tmp1, [x5] cbnz w4, 2b ret -END (libat_fetch_sub_16_i1) +END (libat_fetch_sub_16) -ENTRY (libat_sub_fetch_16_i1) +ENTRY (libat_sub_fetch_16) mov x5, x0 cbnz w4, 2f @@ -265,10 +358,10 @@ ENTRY (libat_sub_fetch_16_i1) stlxp w4, res0, res1, [x5] cbnz w4, 2b ret -END (libat_sub_fetch_16_i1) +END (libat_sub_fetch_16) -ENTRY (libat_fetch_or_16_i1) +ENTRY (libat_fetch_or_16) mov x5, x0 cbnz w4, 2f @@ -287,10 +380,10 @@ ENTRY (libat_fetch_or_16_i1) stlxp w4, tmp0, tmp1, [x5] cbnz w4, 2b ret -END (libat_fetch_or_16_i1) +END (libat_fetch_or_16) -ENTRY (libat_or_fetch_16_i1) +ENTRY (libat_or_fetch_16) mov x5, x0 cbnz w4, 2f @@ -309,10 +402,10 @@ ENTRY (libat_or_fetch_16_i1) stlxp w4, res0, res1, [x5] cbnz w4, 2b ret -END (libat_or_fetch_16_i1) +END (libat_or_fetch_16) -ENTRY (libat_fetch_and_16_i1) +ENTRY (libat_fetch_and_16) mov x5, x0 cbnz w4, 2f @@ -331,10 +424,10 @@ ENTRY (libat_fetch_and_16_i1) stlxp w4, tmp0, tmp1, [x5] cbnz w4, 2b ret -END (libat_fetch_and_16_i1) +END (libat_fetch_and_16) -ENTRY (libat_and_fetch_16_i1) +ENTRY (libat_and_fetch_16) mov x5, x0 cbnz w4, 2f @@ -353,10 +446,10 @@ ENTRY (libat_and_fetch_16_i1) stlxp w4, res0, res1, [x5] cbnz w4, 2b ret -END (libat_and_fetch_16_i1) +END (libat_and_fetch_16) -ENTRY (libat_fetch_xor_16_i1) +ENTRY (libat_fetch_xor_16) mov x5, x0 cbnz w4, 2f @@ -375,10 +468,10 @@ ENTRY (libat_fetch_xor_16_i1) stlxp w4, tmp0, tmp1, [x5] cbnz w4, 2b ret -END (libat_fetch_xor_16_i1) +END (libat_fetch_xor_16) -ENTRY (libat_xor_fetch_16_i1) +ENTRY (libat_xor_fetch_16) mov x5, x0 cbnz w4, 2f @@ -397,10 +490,10 @@ ENTRY (libat_xor_fetch_16_i1) stlxp w4, res0, res1, [x5] cbnz w4, 2b ret -END (libat_xor_fetch_16_i1) +END (libat_xor_fetch_16) -ENTRY (libat_fetch_nand_16_i1) +ENTRY (libat_fetch_nand_16) mov x5, x0 mvn in0, in0 mvn in1, in1 @@ -421,10 +514,10 @@ ENTRY (libat_fetch_nand_16_i1) stlxp w4, tmp0, tmp1, [x5] cbnz w4, 2b ret -END (libat_fetch_nand_16_i1) +END (libat_fetch_nand_16) -ENTRY (libat_nand_fetch_16_i1) +ENTRY (libat_nand_fetch_16) mov x5, x0 mvn in0, in0 mvn in1, in1 @@ -445,21 +538,38 @@ ENTRY (libat_nand_fetch_16_i1) stlxp w4, res0, res1, [x5] cbnz w4, 2b ret -END (libat_nand_fetch_16_i1) +END (libat_nand_fetch_16) -ENTRY (libat_test_and_set_16_i1) - mov w2, 1 - cbnz w1, 2f +/* __atomic_test_and_set is always inlined, so this entry is unused and + only required for completeness. */ +ENTRY (libat_test_and_set_16) - /* RELAXED. */ - swpb w0, w2, [x0] + /* RELAXED/ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ + mov x5, x0 +1: ldaxrb w0, [x5] + stlxrb w4, w2, [x5] + cbnz w4, 1b ret +END (libat_test_and_set_16) - /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ -2: swpalb w0, w2, [x0] - ret -END (libat_test_and_set_16_i1) + +/* Alias entry points which are the same in baseline and LSE2. */ + +ALIAS (libat_exchange_16_i1, libat_exchange_16) +ALIAS (libat_fetch_add_16_i1, libat_fetch_add_16) +ALIAS (libat_add_fetch_16_i1, libat_add_fetch_16) +ALIAS (libat_fetch_sub_16_i1, libat_fetch_sub_16) +ALIAS (libat_sub_fetch_16_i1, libat_sub_fetch_16) +ALIAS (libat_fetch_or_16_i1, libat_fetch_or_16) +ALIAS (libat_or_fetch_16_i1, libat_or_fetch_16) +ALIAS (libat_fetch_and_16_i1, libat_fetch_and_16) +ALIAS (libat_and_fetch_16_i1, libat_and_fetch_16) +ALIAS (libat_fetch_xor_16_i1, libat_fetch_xor_16) +ALIAS (libat_xor_fetch_16_i1, libat_xor_fetch_16) +ALIAS (libat_fetch_nand_16_i1, libat_fetch_nand_16) +ALIAS (libat_nand_fetch_16_i1, libat_nand_fetch_16) +ALIAS (libat_test_and_set_16_i1, libat_test_and_set_16) /* GNU_PROPERTY_AARCH64_* macros from elf.h for use in asm code. */ diff --git a/libatomic/config/linux/aarch64/host-config.h b/libatomic/config/linux/aarch64/host-config.h index 9747accd88f..ac4d922ca5c 100644 --- a/libatomic/config/linux/aarch64/host-config.h +++ b/libatomic/config/linux/aarch64/host-config.h @@ -35,12 +35,13 @@ #endif #define IFUNC_NCOND(N) (1) -#if N == 16 && IFUNC_ALT != 0 +#endif /* HAVE_IFUNC */ + +/* All 128-bit atomic functions are defined in aarch64/atomic_16.S. */ +#if N == 16 # define DONE 1 #endif -#endif /* HAVE_IFUNC */ - #ifdef HWCAP_USCAT #define MIDR_IMPLEMENTOR(midr) (((midr) >> 24) & 255)