aarch64: Avoid same input and output Z register for gather loads
The architecture recommends that load-gather instructions avoid using the same Z register for the load address and the destination, and the Software Optimization Guides for Arm cores recommend that as well. This means that for code like: svuint64_t food (svbool_t p, uint64_t *in, svint64_t offsets, svuint64_t a) { return svadd_u64_x (p, a, svld1_gather_offset(p, in, offsets)); } we'll want to avoid generating the current: food: ld1d z0.d, p0/z, [x0, z0.d] // Z0 reused as input and output. add z0.d, z1.d, z0.d ret However, we still want to avoid generating extra moves where there were none before, so the tight aarch64-sve-acle.exp tests for load gathers should still pass as they are. This patch implements that recommendation for the load gather patterns by: * duplicating the alternatives * marking the output operand as early clobber * Tying the input Z register operand in the original alternatives to 0 * Penalising the original alternatives with '?' This results in a large-ish patch in terms of diff lines but the new compact syntax (thanks Tamar) makes it quite a readable an regular change. The benchmark numbers on a Neoverse V1 on fprate look okay: diff 503.bwaves_r 0.00% 507.cactuBSSN_r 0.00% 508.namd_r 0.00% 510.parest_r 0.55% 511.povray_r 0.22% 519.lbm_r 0.00% 521.wrf_r 0.00% 526.blender_r 0.00% 527.cam4_r 0.56% 538.imagick_r 0.00% 544.nab_r 0.00% 549.fotonik3d_r 0.00% 554.roms_r 0.00% fprate 0.10% Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ChangeLog: * config/aarch64/aarch64-sve.md (mask_gather_load<mode><v_int_container>): Add alternatives to prefer to avoid same input and output Z register. (mask_gather_load<mode><v_int_container>): Likewise. (*mask_gather_load<mode><v_int_container>_<su>xtw_unpacked): Likewise. (*mask_gather_load<mode><v_int_container>_sxtw): Likewise. (*mask_gather_load<mode><v_int_container>_uxtw): Likewise. (@aarch64_gather_load_<ANY_EXTEND:optab><SVE_4HSI:mode><SVE_4BHI:mode>): Likewise. (@aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>): Likewise. (*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode> <SVE_2BHSI:mode>_<ANY_EXTEND2:su>xtw_unpacked): Likewise. (*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode> <SVE_2BHSI:mode>_sxtw): Likewise. (*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode> <SVE_2BHSI:mode>_uxtw): Likewise. (@aarch64_ldff1_gather<mode>): Likewise. (@aarch64_ldff1_gather<mode>): Likewise. (*aarch64_ldff1_gather<mode>_sxtw): Likewise. (*aarch64_ldff1_gather<mode>_uxtw): Likewise. (@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx4_WIDE:mode> <VNx4_NARROW:mode>): Likewise. (@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode> <VNx2_NARROW:mode>): Likewise. (*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode> <VNx2_NARROW:mode>_sxtw): Likewise. (*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode> <VNx2_NARROW:mode>_uxtw): Likewise. * config/aarch64/aarch64-sve2.md (@aarch64_gather_ldnt<mode>): Likewise. (@aarch64_gather_ldnt_<ANY_EXTEND:optab><SVE_FULL_SDI:mode> <SVE_PARTIAL_I:mode>): Likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/gather_earlyclobber.c: New test. * gcc.target/aarch64/sve2/gather_earlyclobber.c: New test.
This commit is contained in:
parent
4d9d207c66
commit
b375c5340b
4 changed files with 263 additions and 71 deletions
|
@ -1429,12 +1429,18 @@
|
|||
UNSPEC_LD1_GATHER))]
|
||||
"TARGET_SVE"
|
||||
{@ [cons: =0, 1, 2, 3, 4, 5 ]
|
||||
[w, Z, w, Ui1, Ui1, Upl ] ld1<Vesize>\t%0.s, %5/z, [%2.s]
|
||||
[w, vgw, w, Ui1, Ui1, Upl ] ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1]
|
||||
[w, rk, w, Z, Ui1, Upl ] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
|
||||
[w, rk, w, Ui1, Ui1, Upl ] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
|
||||
[w, rk, w, Z, i, Upl ] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
|
||||
[w, rk, w, Ui1, i, Upl ] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
|
||||
[&w, Z, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s]
|
||||
[?w, Z, 0, Ui1, Ui1, Upl] ^
|
||||
[&w, vgw, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1]
|
||||
[?w, vgw, 0, Ui1, Ui1, Upl] ^
|
||||
[&w, rk, w, Z, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
|
||||
[?w, rk, 0, Z, Ui1, Upl] ^
|
||||
[&w, rk, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
|
||||
[?w, rk, 0, Ui1, Ui1, Upl] ^
|
||||
[&w, rk, w, Z, i, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
|
||||
[?w, rk, 0, Z, i, Upl] ^
|
||||
[&w, rk, w, Ui1, i, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
|
||||
[?w, rk, 0, Ui1, i, Upl] ^
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -1451,11 +1457,15 @@
|
|||
(mem:BLK (scratch))]
|
||||
UNSPEC_LD1_GATHER))]
|
||||
"TARGET_SVE"
|
||||
{@ [cons: =0, 1, 2, 3, 4, 5 ]
|
||||
[w, Z, w, i, Ui1, Upl ] ld1<Vesize>\t%0.d, %5/z, [%2.d]
|
||||
[w, vgd, w, i, Ui1, Upl ] ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1]
|
||||
[w, rk, w, i, Ui1, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d]
|
||||
[w, rk, w, i, i, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]
|
||||
{@ [cons: =0, 1, 2, 3, 4, 5]
|
||||
[&w, Z, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d]
|
||||
[?w, Z, 0, i, Ui1, Upl] ^
|
||||
[&w, vgd, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1]
|
||||
[?w, vgd, 0, i, Ui1, Upl] ^
|
||||
[&w, rk, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d]
|
||||
[?w, rk, 0, i, Ui1, Upl] ^
|
||||
[&w, rk, w, i, i, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]
|
||||
[?w, rk, 0, i, i, Upl] ^
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -1475,9 +1485,11 @@
|
|||
(mem:BLK (scratch))]
|
||||
UNSPEC_LD1_GATHER))]
|
||||
"TARGET_SVE"
|
||||
{@ [cons: =0, 1, 2, 3, 4, 5 ]
|
||||
[w, rk, w, i, Ui1, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw]
|
||||
[w, rk, w, i, i, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw %p4]
|
||||
{@ [cons: =0, 1, 2, 3, 4, 5]
|
||||
[&w, rk, w, i, Ui1, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw]
|
||||
[?w, rk, 0, i, Ui1, Upl ] ^
|
||||
[&w, rk, w, i, i, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw %p4]
|
||||
[?w, rk, 0, i, i, Upl ] ^
|
||||
}
|
||||
"&& !CONSTANT_P (operands[6])"
|
||||
{
|
||||
|
@ -1503,9 +1515,11 @@
|
|||
(mem:BLK (scratch))]
|
||||
UNSPEC_LD1_GATHER))]
|
||||
"TARGET_SVE"
|
||||
{@ [cons: =0, 1, 2, 3, 4, 5 ]
|
||||
[w, rk, w, i, Ui1, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
|
||||
[w, rk, w, i, i, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]
|
||||
{@ [cons: =0, 1, 2, 3, 4, 5]
|
||||
[&w, rk, w, i, Ui1, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
|
||||
[?w, rk, 0, i, Ui1, Upl ] ^
|
||||
[&w, rk, w, i, i, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]
|
||||
[?w, rk, 0, i, i, Upl ] ^
|
||||
}
|
||||
"&& !CONSTANT_P (operands[6])"
|
||||
{
|
||||
|
@ -1528,9 +1542,11 @@
|
|||
(mem:BLK (scratch))]
|
||||
UNSPEC_LD1_GATHER))]
|
||||
"TARGET_SVE"
|
||||
{@ [cons: =0, 1, 2, 3, 4, 5 ]
|
||||
[w, rk, w, i, Ui1, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
|
||||
[w, rk, w, i, i, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]
|
||||
{@ [cons: =0, 1, 2, 3, 4, 5]
|
||||
[&w, rk, w, i, Ui1, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
|
||||
[?w, rk, 0, i, Ui1, Upl ] ^
|
||||
[&w, rk, w, i, i, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]
|
||||
[?w, rk, 0, i, i, Upl ] ^
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -1563,13 +1579,19 @@
|
|||
UNSPEC_LD1_GATHER))]
|
||||
UNSPEC_PRED_X))]
|
||||
"TARGET_SVE && (~<SVE_4HSI:narrower_mask> & <SVE_4BHI:self_mask>) == 0"
|
||||
{@ [cons: =0, 1, 2, 3, 4, 5, 6 ]
|
||||
[w, Z, w, Ui1, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s]
|
||||
[w, vg<SVE_4BHI:Vesize>, w, Ui1, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s, #%1]
|
||||
[w, rk, w, Z, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
|
||||
[w, rk, w, Ui1, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
|
||||
[w, rk, w, Z, i, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
|
||||
[w, rk, w, Ui1, i, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
|
||||
{@ [cons: =0, 1, 2, 3, 4, 5, 6]
|
||||
[&w, Z, w, Ui1, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s]
|
||||
[?w, Z, 0, Ui1, Ui1, Upl, UplDnm] ^
|
||||
[&w, vg<SVE_4BHI:Vesize>, w, Ui1, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s, #%1]
|
||||
[?w, vg<SVE_4BHI:Vesize>, 0, Ui1, Ui1, Upl, UplDnm] ^
|
||||
[&w, rk, w, Z, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
|
||||
[?w, rk, 0, Z, Ui1, Upl, UplDnm] ^
|
||||
[&w, rk, w, Ui1, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
|
||||
[?w, rk, 0, Ui1, Ui1, Upl, UplDnm] ^
|
||||
[&w, rk, w, Z, i, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
|
||||
[?w, rk, 0, Z, i, Upl, UplDnm] ^
|
||||
[&w, rk, w, Ui1, i, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
|
||||
[?w, rk, 0, Ui1, i, Upl, UplDnm] ^
|
||||
}
|
||||
"&& !CONSTANT_P (operands[6])"
|
||||
{
|
||||
|
@ -1595,10 +1617,14 @@
|
|||
UNSPEC_PRED_X))]
|
||||
"TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
|
||||
{@ [cons: =0, 1, 2, 3, 4, 5, 6]
|
||||
[w, Z, w, i, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d]
|
||||
[w, vg<SVE_2BHSI:Vesize>, w, i, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d, #%1]
|
||||
[w, rk, w, i, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d]
|
||||
[w, rk, w, i, i, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]
|
||||
[&w, Z, w, i, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d]
|
||||
[?w, Z, 0, i, Ui1, Upl, UplDnm] ^
|
||||
[&w, vg<SVE_2BHSI:Vesize>, w, i, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d, #%1]
|
||||
[?w, vg<SVE_2BHSI:Vesize>, 0, i, Ui1, Upl, UplDnm] ^
|
||||
[&w, rk, w, i, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d]
|
||||
[?w, rk, 0, i, Ui1, Upl, UplDnm] ^
|
||||
[&w, rk, w, i, i, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]
|
||||
[?w, rk, 0, i, i, Upl, UplDnm] ^
|
||||
}
|
||||
"&& !CONSTANT_P (operands[6])"
|
||||
{
|
||||
|
@ -1627,8 +1653,10 @@
|
|||
UNSPEC_PRED_X))]
|
||||
"TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
|
||||
{@ [cons: =0, 1, 2, 3, 4, 5]
|
||||
[w, rk, w, i, Ui1, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw]
|
||||
[w, rk, w, i, i, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw %p4]
|
||||
[&w, rk, w, i, Ui1, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw]
|
||||
[?w, rk, 0, i, Ui1, Upl ] ^
|
||||
[&w, rk, w, i, i, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw %p4]
|
||||
[?w, rk, 0, i, i, Upl ] ^
|
||||
}
|
||||
"&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
|
||||
{
|
||||
|
@ -1660,8 +1688,10 @@
|
|||
UNSPEC_PRED_X))]
|
||||
"TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
|
||||
{@ [cons: =0, 1, 2, 3, 4, 5]
|
||||
[w, rk, w, i, Ui1, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
|
||||
[w, rk, w, i, i, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]
|
||||
[&w, rk, w, i, Ui1, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
|
||||
[?w, rk, 0, i, Ui1, Upl ] ^
|
||||
[&w, rk, w, i, i, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]
|
||||
[?w, rk, 0, i, i, Upl ] ^
|
||||
}
|
||||
"&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
|
||||
{
|
||||
|
@ -1690,8 +1720,10 @@
|
|||
UNSPEC_PRED_X))]
|
||||
"TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
|
||||
{@ [cons: =0, 1, 2, 3, 4, 5]
|
||||
[w, rk, w, i, Ui1, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
|
||||
[w, rk, w, i, i, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]
|
||||
[&w, rk, w, i, Ui1, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
|
||||
[?w, rk, 0, i, Ui1, Upl ] ^
|
||||
[&w, rk, w, i, i, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]
|
||||
[?w, rk, 0, i, i, Upl ] ^
|
||||
}
|
||||
"&& !CONSTANT_P (operands[7])"
|
||||
{
|
||||
|
@ -1721,13 +1753,19 @@
|
|||
(reg:VNx16BI FFRT_REGNUM)]
|
||||
UNSPEC_LDFF1_GATHER))]
|
||||
"TARGET_SVE"
|
||||
{@ [cons: =0, 1, 2, 3, 4, 5 ]
|
||||
[w, Z, w, i, Ui1, Upl] ldff1w\t%0.s, %5/z, [%2.s]
|
||||
[w, vgw, w, i, Ui1, Upl] ldff1w\t%0.s, %5/z, [%2.s, #%1]
|
||||
[w, rk, w, Z, Ui1, Upl] ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw]
|
||||
[w, rk, w, Ui1, Ui1, Upl] ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw]
|
||||
[w, rk, w, Z, i, Upl] ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
|
||||
[w, rk, w, Ui1, i, Upl] ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
|
||||
{@ [cons: =0, 1, 2, 3, 4, 5 ]
|
||||
[&w, Z, w, i, Ui1, Upl] ldff1w\t%0.s, %5/z, [%2.s]
|
||||
[?w, Z, 0, i, Ui1, Upl] ^
|
||||
[&w, vgw, w, i, Ui1, Upl] ldff1w\t%0.s, %5/z, [%2.s, #%1]
|
||||
[?w, vgw, 0, i, Ui1, Upl] ^
|
||||
[&w, rk, w, Z, Ui1, Upl] ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw]
|
||||
[?w, rk, 0, Z, Ui1, Upl] ^
|
||||
[&w, rk, w, Ui1, Ui1, Upl] ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw]
|
||||
[?w, rk, 0, Ui1, Ui1, Upl] ^
|
||||
[&w, rk, w, Z, i, Upl] ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
|
||||
[?w, rk, 0, Z, i, Upl] ^
|
||||
[&w, rk, w, Ui1, i, Upl] ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
|
||||
[?w, rk, 0, Ui1, i, Upl] ^
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -1745,11 +1783,15 @@
|
|||
(reg:VNx16BI FFRT_REGNUM)]
|
||||
UNSPEC_LDFF1_GATHER))]
|
||||
"TARGET_SVE"
|
||||
{@ [cons: =0, 1, 2, 3, 4, 5]
|
||||
[w, Z, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%2.d]
|
||||
[w, vgd, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%2.d, #%1]
|
||||
[w, rk, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d]
|
||||
[w, rk, w, i, i, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]
|
||||
{@ [cons: =0, 1, 2, 3, 4, 5 ]
|
||||
[&w, Z, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%2.d]
|
||||
[?w, Z, 0, i, Ui1, Upl ] ^
|
||||
[&w, vgd, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%2.d, #%1]
|
||||
[?w, vgd, 0, i, Ui1, Upl ] ^
|
||||
[&w, rk, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d]
|
||||
[?w, rk, 0, i, Ui1, Upl ] ^
|
||||
[&w, rk, w, i, i, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]
|
||||
[?w, rk, 0, i, i, Upl ] ^
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -1772,8 +1814,10 @@
|
|||
UNSPEC_LDFF1_GATHER))]
|
||||
"TARGET_SVE"
|
||||
{@ [cons: =0, 1, 2, 3, 4, 5]
|
||||
[w, rk, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw]
|
||||
[w, rk, w, i, i, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw %p4]
|
||||
[&w, rk, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw]
|
||||
[?w, rk, 0, i, Ui1, Upl ] ^
|
||||
[&w, rk, w, i, i, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw %p4]
|
||||
[?w, rk, 0, i, i, Upl ] ^
|
||||
}
|
||||
"&& !CONSTANT_P (operands[6])"
|
||||
{
|
||||
|
@ -1797,8 +1841,10 @@
|
|||
UNSPEC_LDFF1_GATHER))]
|
||||
"TARGET_SVE"
|
||||
{@ [cons: =0, 1, 2, 3, 4, 5]
|
||||
[w, rk, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw]
|
||||
[w, rk, w, i, i, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw %p4]
|
||||
[&w, rk, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw]
|
||||
[?w, rk, 0, i, Ui1, Upl ] ^
|
||||
[&w, rk, w, i, i, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw %p4]
|
||||
[?w, rk, 0, i, i, Upl ] ^
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -1833,12 +1879,18 @@
|
|||
UNSPEC_PRED_X))]
|
||||
"TARGET_SVE"
|
||||
{@ [cons: =0, 1, 2, 3, 4, 5, 6]
|
||||
[w, Z, w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s]
|
||||
[w, vg<VNx4_NARROW:Vesize>, w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s, #%1]
|
||||
[w, rk, w, Z, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
|
||||
[w, rk, w, Ui1, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
|
||||
[w, rk, w, Z, i, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
|
||||
[w, rk, w, Ui1, i, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
|
||||
[&w, Z, w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s]
|
||||
[?w, Z, 0, i, Ui1, Upl, UplDnm] ^
|
||||
[&w, vg<VNx4_NARROW:Vesize>, w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s, #%1]
|
||||
[?w, vg<VNx4_NARROW:Vesize>, 0, i, Ui1, Upl, UplDnm] ^
|
||||
[&w, rk, w, Z, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
|
||||
[?w, rk, 0, Z, Ui1, Upl, UplDnm] ^
|
||||
[&w, rk, w, Ui1, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
|
||||
[?w, rk, 0, Ui1, Ui1, Upl, UplDnm] ^
|
||||
[&w, rk, w, Z, i, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
|
||||
[?w, rk, 0, Z, i, Upl, UplDnm] ^
|
||||
[&w, rk, w, Ui1, i, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
|
||||
[?w, rk, 0, Ui1, i, Upl, UplDnm] ^
|
||||
}
|
||||
"&& !CONSTANT_P (operands[6])"
|
||||
{
|
||||
|
@ -1865,10 +1917,14 @@
|
|||
UNSPEC_PRED_X))]
|
||||
"TARGET_SVE"
|
||||
{@ [cons: =0, 1, 2, 3, 4, 5, 6]
|
||||
[w, Z, w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d]
|
||||
[w, vg<VNx2_NARROW:Vesize>, w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d, #%1]
|
||||
[w, rk, w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d]
|
||||
[w, rk, w, i, i, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]
|
||||
[&w, Z, w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d]
|
||||
[?w, Z, 0, i, Ui1, Upl, UplDnm] ^
|
||||
[&w, vg<VNx2_NARROW:Vesize>, w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d, #%1]
|
||||
[?w, vg<VNx2_NARROW:Vesize>, 0, i, Ui1, Upl, UplDnm] ^
|
||||
[&w, rk, w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d]
|
||||
[?w, rk, 0, i, Ui1, Upl, UplDnm] ^
|
||||
[&w, rk, w, i, i, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]
|
||||
[?w, rk, w, i, i, Upl, UplDnm] ^
|
||||
}
|
||||
"&& !CONSTANT_P (operands[6])"
|
||||
{
|
||||
|
@ -1899,8 +1955,10 @@
|
|||
UNSPEC_PRED_X))]
|
||||
"TARGET_SVE"
|
||||
{@ [cons: =0, 1, 2, 3, 4, 5]
|
||||
[w, rk, w, i, Ui1, Upl ] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
|
||||
[w, rk, w, i, i, Upl ] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]
|
||||
[&w, rk, w, i, Ui1, Upl ] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
|
||||
[?w, rk, 0, i, Ui1, Upl ] ^
|
||||
[&w, rk, w, i, i, Upl ] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]
|
||||
[?w, rk, 0, i, i, Upl ] ^
|
||||
}
|
||||
"&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
|
||||
{
|
||||
|
@ -1929,8 +1987,10 @@
|
|||
UNSPEC_PRED_X))]
|
||||
"TARGET_SVE"
|
||||
{@ [cons: =0, 1, 2, 3, 4, 5]
|
||||
[w, rk, w, i, Ui1, Upl ] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
|
||||
[w, rk, w, i, i, Upl ] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]
|
||||
[&w, rk, w, i, Ui1, Upl ] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
|
||||
[?w, rk, 0, i, Ui1, Upl ] ^
|
||||
[&w, rk, w, i, i, Upl ] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]
|
||||
[?w, rk, 0, i, i, Upl ] ^
|
||||
}
|
||||
"&& !CONSTANT_P (operands[7])"
|
||||
{
|
||||
|
|
|
@ -111,8 +111,10 @@
|
|||
UNSPEC_LDNT1_GATHER))]
|
||||
"TARGET_SVE2"
|
||||
{@ [cons: =0, 1, 2, 3]
|
||||
[w, Upl, Z, w ] ldnt1<Vesize>\t%0.<Vetype>, %1/z, [%3.<Vetype>]
|
||||
[w, Upl, r, w ] ldnt1<Vesize>\t%0.<Vetype>, %1/z, [%3.<Vetype>, %2]
|
||||
[&w, Upl, Z, w ] ldnt1<Vesize>\t%0.<Vetype>, %1/z, [%3.<Vetype>]
|
||||
[?w, Upl, Z, 0 ] ^
|
||||
[&w, Upl, r, w ] ldnt1<Vesize>\t%0.<Vetype>, %1/z, [%3.<Vetype>, %2]
|
||||
[?w, Upl, r, 0 ] ^
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -132,8 +134,10 @@
|
|||
"TARGET_SVE2
|
||||
&& (~<SVE_FULL_SDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
|
||||
{@ [cons: =0, 1, 2, 3, 4]
|
||||
[w, Upl, Z, w, UplDnm] ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, [%3.<SVE_FULL_SDI:Vetype>]
|
||||
[w, Upl, r, w, UplDnm] ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, [%3.<SVE_FULL_SDI:Vetype>, %2]
|
||||
[&w, Upl, Z, w, UplDnm] ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, [%3.<SVE_FULL_SDI:Vetype>]
|
||||
[?w, Upl, Z, 0, UplDnm] ^
|
||||
[&w, Upl, r, w, UplDnm] ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, [%3.<SVE_FULL_SDI:Vetype>, %2]
|
||||
[?w, Upl, r, 0, UplDnm] ^
|
||||
}
|
||||
"&& !CONSTANT_P (operands[4])"
|
||||
{
|
||||
|
|
96
gcc/testsuite/gcc.target/aarch64/sve/gather_earlyclobber.c
Normal file
96
gcc/testsuite/gcc.target/aarch64/sve/gather_earlyclobber.c
Normal file
|
@ -0,0 +1,96 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-O2" } */
|
||||
/* { dg-final { check-function-bodies "**" "" } } */
|
||||
|
||||
#include <arm_sve.h>
|
||||
|
||||
/*
|
||||
** food:
|
||||
** ld1d (?:z[1-9][0-9]*)\.d, p0/z, \[x0, z0\.d\]
|
||||
** ...
|
||||
** ret
|
||||
*/
|
||||
|
||||
svuint64_t
|
||||
food (svbool_t p, uint64_t *in, svint64_t offsets, svuint64_t a)
|
||||
{
|
||||
return svadd_u64_x (p, a, svld1_gather_offset(p, in, offsets));
|
||||
}
|
||||
|
||||
/*
|
||||
** foodb:
|
||||
** ld1d (?:z[1-9][0-9]*)\.d, p0/z, \[z0\.d\]
|
||||
** ...
|
||||
** ret
|
||||
*/
|
||||
|
||||
svuint64_t
|
||||
foodb (svbool_t p, svuint64_t bases, svuint64_t a)
|
||||
{
|
||||
return svadd_u64_x (p, a, svld1_gather_u64 (p, bases));
|
||||
}
|
||||
|
||||
/*
|
||||
** foodff:
|
||||
** ldff1d (?:z[1-9][0-9]*)\.d, p0/z, \[z0\.d\]
|
||||
** ...
|
||||
** ret
|
||||
*/
|
||||
|
||||
svuint64_t
|
||||
foodff (svbool_t p, svuint64_t bases, svuint64_t a)
|
||||
{
|
||||
return svadd_u64_x (p, a, svldff1_gather_u64 (p, bases));
|
||||
}
|
||||
|
||||
/*
|
||||
** foow:
|
||||
** ld1w (?:z[1-9][0-9]*)\.s, p0/z, \[z0\.s\]
|
||||
** ...
|
||||
** ret
|
||||
*/
|
||||
|
||||
svuint32_t
|
||||
foow (svbool_t p, svuint32_t bases, svuint32_t a)
|
||||
{
|
||||
return svadd_u32_x (p, a, svld1_gather_u32 (p, bases));
|
||||
}
|
||||
|
||||
/*
|
||||
** foowff:
|
||||
** ldff1w (?:z[1-9][0-9]*)\.s, p0/z, \[z0\.s\]
|
||||
** ...
|
||||
** ret
|
||||
*/
|
||||
|
||||
svuint32_t
|
||||
foowff (svbool_t p, svuint32_t bases, svuint32_t a)
|
||||
{
|
||||
return svadd_u32_x (p, a, svldff1_gather_u32 (p, bases));
|
||||
}
|
||||
|
||||
/*
|
||||
** fooubd:
|
||||
** ld1b (?:z[1-9][0-9]*)\.d, p0/z, \[x0, z0\.d\]
|
||||
** ...
|
||||
** ret
|
||||
*/
|
||||
|
||||
svuint64_t
|
||||
fooubd (svbool_t p, uint8_t *base, svuint64_t offsets, svuint64_t a)
|
||||
{
|
||||
return svadd_u64_x (p, a, svld1ub_gather_offset_u64 (p, base, offsets));
|
||||
}
|
||||
|
||||
/*
|
||||
** foosbd:
|
||||
** ld1sb (?:z[1-9][0-9]*)\.d, p0/z, \[x0, z0\.d\]
|
||||
** ...
|
||||
** ret
|
||||
*/
|
||||
svint64_t
|
||||
foosbd (svbool_t p, int8_t *base, svint64_t offsets, svint64_t a)
|
||||
{
|
||||
return svadd_s64_x (p, a, svld1sb_gather_offset_s64 (p, base, offsets));
|
||||
}
|
||||
|
32
gcc/testsuite/gcc.target/aarch64/sve2/gather_earlyclobber.c
Normal file
32
gcc/testsuite/gcc.target/aarch64/sve2/gather_earlyclobber.c
Normal file
|
@ -0,0 +1,32 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-O2" } */
|
||||
/* { dg-final { check-function-bodies "**" "" } } */
|
||||
|
||||
#include <arm_sve.h>
|
||||
|
||||
/*
|
||||
** foownt:
|
||||
** ldnt1w (?:z[1-9][0-9]*)\.s, p0/z, \[z0\.s\]
|
||||
** ...
|
||||
** ret
|
||||
*/
|
||||
|
||||
svuint32_t
|
||||
foownt (svbool_t p, svuint32_t bases, svuint32_t a)
|
||||
{
|
||||
return svadd_u32_x (p, a, svldnt1_gather_u32 (p, bases));
|
||||
}
|
||||
|
||||
/*
|
||||
** foodbnt:
|
||||
** ldnt1d (?:z[1-9][0-9]*)\.d, p0/z, \[z0\.d\]
|
||||
** ...
|
||||
** ret
|
||||
*/
|
||||
|
||||
svuint64_t
|
||||
foodbnt (svbool_t p, svuint64_t bases, svuint64_t a)
|
||||
{
|
||||
return svadd_u64_x (p, a, svldnt1_gather_u64 (p, bases));
|
||||
}
|
||||
|
Loading…
Add table
Reference in a new issue