diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 0cc0ae78400..9bb9c61967b 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -120,7 +120,6 @@ extern int arm_coproc_mem_operand_no_writeback (rtx); extern int arm_coproc_mem_operand_wb (rtx, int); extern int neon_vector_mem_operand (rtx, int, bool); extern int mve_vector_mem_operand (machine_mode, rtx, bool); -bool arm_mve_mode_and_operands_type_check (machine_mode, rtx, rtx); extern int neon_struct_mem_operand (rtx); extern rtx *neon_vcmla_lane_prepare_operands (rtx *); diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 022ef6c3f1d..8105b39e7a4 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -13277,14 +13277,18 @@ arm_coproc_mem_operand_wb (rtx op, int wb_level) /* Match: (plus (reg) - (const)). */ + (const)) + + The encoded immediate for 16-bit modes is multiplied by 2, + while the encoded immediate for 32-bit and 64-bit modes is + multiplied by 4. */ + int factor = MIN (GET_MODE_SIZE (GET_MODE (op)), 4); if (GET_CODE (ind) == PLUS && REG_P (XEXP (ind, 0)) && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode) && CONST_INT_P (XEXP (ind, 1)) - && INTVAL (XEXP (ind, 1)) > -1024 - && INTVAL (XEXP (ind, 1)) < 1024 - && (INTVAL (XEXP (ind, 1)) & 3) == 0) + && IN_RANGE (INTVAL (XEXP (ind, 1)), -255 * factor, 255 * factor) + && (INTVAL (XEXP (ind, 1)) & (factor - 1)) == 0) return TRUE; return FALSE; @@ -33578,17 +33582,4 @@ arm_mode_base_reg_class (machine_mode mode) struct gcc_target targetm = TARGET_INITIALIZER; -bool -arm_mve_mode_and_operands_type_check (machine_mode mode, rtx op0, rtx op1) -{ - if (!(TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT)) - return true; - else if (mode == E_BFmode) - return false; - else if ((s_register_operand (op0, mode) && MEM_P (op1)) - || (s_register_operand (op1, mode) && MEM_P (op0))) - return false; - return true; -} - #include "gt-arm.h" diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index c4fa116ab77..147c4a50c72 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -7289,7 +7289,9 @@ (define_insn "*arm32_mov" [(set (match_operand:HFBF 0 "nonimmediate_operand" "=r,m,r,r") (match_operand:HFBF 1 "general_operand" " m,r,r,F"))] - "TARGET_32BIT && !TARGET_HARD_FLOAT + "TARGET_32BIT + && !TARGET_HARD_FLOAT + && !TARGET_HAVE_MVE && ( s_register_operand (operands[0], mode) || s_register_operand (operands[1], mode))" "* diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md index ff229aa9845..789e3332abb 100644 --- a/gcc/config/arm/constraints.md +++ b/gcc/config/arm/constraints.md @@ -454,10 +454,13 @@ (define_memory_constraint "Uj" "@internal - In ARM/Thumb-2 state an VFP load/store address which does not support - writeback at all (eg vldr.16)." + In ARM/Thumb-2 state a VFP load/store address that supports writeback + for Neon but not for MVE" (and (match_code "mem") - (match_test "TARGET_32BIT && arm_coproc_mem_operand_no_writeback (op)"))) + (match_test "TARGET_32BIT") + (match_test "TARGET_HAVE_MVE + ? arm_coproc_mem_operand_no_writeback (op) + : neon_vector_mem_operand (op, 2, true)"))) (define_memory_constraint "Uy" "@internal diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md index 6a2bc5a789f..72707c17929 100644 --- a/gcc/config/arm/vfp.md +++ b/gcc/config/arm/vfp.md @@ -387,31 +387,15 @@ (set_attr "arch" "t2,any,any,any,a,t2,any,any,any,any,any,any")] ) -(define_insn "*mov_load_vfp_hf16" - [(set (match_operand:HF 0 "s_register_operand" "=t") - (match_operand:HF 1 "memory_operand" "Uj"))] - "TARGET_HAVE_MVE_FLOAT" - "vldr.16\\t%0, %E1" -) - -(define_insn "*mov_store_vfp_hf16" - [(set (match_operand:HF 0 "memory_operand" "=Uj") - (match_operand:HF 1 "s_register_operand" "t"))] - "TARGET_HAVE_MVE_FLOAT" - "vstr.16\\t%1, %E0" -) - ;; HFmode and BFmode moves (define_insn "*mov_vfp_16" [(set (match_operand:HFBF 0 "nonimmediate_operand" - "= ?r,?m,t,r,t,r,t, t, Um,r") + "= ?r,?m,t,r,t,r,t, t, Uj,r") (match_operand:HFBF 1 "general_operand" - " m,r,t,r,r,t,Dv,Um,t, F"))] + " m,r,t,r,r,t,Dv,Uj,t, F"))] "TARGET_32BIT - && TARGET_VFP_FP16INST - && arm_mve_mode_and_operands_type_check (mode, operands[0], - operands[1]) + && (TARGET_VFP_FP16INST || TARGET_HAVE_MVE) && (s_register_operand (operands[0], mode) || s_register_operand (operands[1], mode))" { @@ -430,9 +414,15 @@ case 6: /* S register from immediate. */ return \"vmov.f16\\t%0, %1\t%@ __\"; case 7: /* S register from memory. */ - return \"vld1.16\\t{%z0}, %A1\"; + if (TARGET_HAVE_MVE) + return \"vldr.16\\t%0, %1\"; + else + return \"vld1.16\\t{%z0}, %A1\"; case 8: /* Memory from S register. */ - return \"vst1.16\\t{%z1}, %A0\"; + if (TARGET_HAVE_MVE) + return \"vstr.16\\t%1, %0\"; + else + return \"vst1.16\\t{%z1}, %A0\"; case 9: /* ARM register from constant. */ { long bits; diff --git a/gcc/testsuite/gcc.target/arm/armv8_1m-fp16-move-1.c b/gcc/testsuite/gcc.target/arm/armv8_1m-fp16-move-1.c new file mode 100644 index 00000000000..67a9f416adf --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/armv8_1m-fp16-move-1.c @@ -0,0 +1,418 @@ +/* { dg-do compile } */ +/* { dg-options "-O -mfloat-abi=hard -mfp16-format=ieee" } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +/* +** r_w: +** vmov.f16 r0, s0 @ __fp16 +** bx lr +*/ +void +r_w (_Float16 s0) +{ + register _Float16 r0 asm ("r0"); + r0 = s0; + asm volatile ("" :: "r" (r0)); +} + +/* +** w_r: +** vmov.f16 s0, r0 @ __fp16 +** bx lr +*/ +_Float16 +w_r () +{ + register _Float16 r0 asm ("r0"); + asm volatile ("" : "=r" (r0)); + return r0; +} + +/* +** w_w: +** vmov s1, s0 @ __fp16 +** bx lr +*/ +void +w_w (_Float16 s0) +{ + register _Float16 s1 asm ("s1"); + s1 = s0; + asm volatile ("" :: "w" (s1)); +} + +/* +** r_m_m128: +** sub (r[0-9]+), r0, #256 +** ldrh r1, \[\1\] @ __fp16 +** bx lr +*/ +void +r_m_m128 (_Float16 *r0) +{ + register _Float16 r1 asm ("r1"); + r1 = r0[-128]; + asm volatile ("" :: "r" (r1)); +} + +/* +** r_m_m127: +** ldrh r1, \[r0, #-254\] @ __fp16 +** bx lr +*/ +void +r_m_m127 (_Float16 *r0) +{ + register _Float16 r1 asm ("r1"); + r1 = r0[-127]; + asm volatile ("" :: "r" (r1)); +} + +/* +** r_m_m1: +** ldrh r1, \[r0, #-2\] @ __fp16 +** bx lr +*/ +void +r_m_m1 (_Float16 *r0) +{ + register _Float16 r1 asm ("r1"); + r1 = r0[-1]; + asm volatile ("" :: "r" (r1)); +} + +/* +** r_m_0: +** ldrh r1, \[r0\] @ __fp16 +** bx lr +*/ +void +r_m_0 (_Float16 *r0) +{ + register _Float16 r1 asm ("r1"); + r1 = r0[0]; + asm volatile ("" :: "r" (r1)); +} + +/* +** r_m_1: +** ldrh r1, \[r0, #2\] @ __fp16 +** bx lr +*/ +void +r_m_1 (_Float16 *r0) +{ + register _Float16 r1 asm ("r1"); + r1 = r0[1]; + asm volatile ("" :: "r" (r1)); +} + +/* +** r_m_255: +** ldrh r1, \[r0, #510\] @ __fp16 +** bx lr +*/ +void +r_m_255 (_Float16 *r0) +{ + register _Float16 r1 asm ("r1"); + r1 = r0[255]; + asm volatile ("" :: "r" (r1)); +} + +/* +** r_m_256: +** ldrh r1, \[r0, #512\] @ __fp16 +** bx lr +*/ +void +r_m_256 (_Float16 *r0) +{ + register _Float16 r1 asm ("r1"); + r1 = r0[256]; + asm volatile ("" :: "r" (r1)); +} + +/* ??? This could be done in one instruction, but without mve.fp, + it makes more sense for memory_operand to enforce the GPR range. */ +/* +** w_m_m128: +** sub (r[0-9]+), r0, #256 +** vldr.16 s0, \[\1\] +** bx lr +*/ +void +w_m_m128 (_Float16 *r0) +{ + register _Float16 s0 asm ("s0"); + s0 = r0[-128]; + asm volatile ("" :: "w" (s0)); +} + +/* +** w_m_m127: +** vldr.16 s0, \[r0, #-254\] +** bx lr +*/ +void +w_m_m127 (_Float16 *r0) +{ + register _Float16 s0 asm ("s0"); + s0 = r0[-127]; + asm volatile ("" :: "w" (s0)); +} + +/* +** w_m_m1: +** vldr.16 s0, \[r0, #-2\] +** bx lr +*/ +void +w_m_m1 (_Float16 *r0) +{ + register _Float16 s0 asm ("s0"); + s0 = r0[-1]; + asm volatile ("" :: "w" (s0)); +} + +/* +** w_m_0: +** vldr.16 s0, \[r0\] +** bx lr +*/ +void +w_m_0 (_Float16 *r0) +{ + register _Float16 s0 asm ("s0"); + s0 = r0[0]; + asm volatile ("" :: "w" (s0)); +} + +/* +** w_m_1: +** vldr.16 s0, \[r0, #2\] +** bx lr +*/ +void +w_m_1 (_Float16 *r0) +{ + register _Float16 s0 asm ("s0"); + s0 = r0[1]; + asm volatile ("" :: "w" (s0)); +} + +/* +** w_m_255: +** vldr.16 s0, \[r0, #510\] +** bx lr +*/ +void +w_m_255 (_Float16 *r0) +{ + register _Float16 s0 asm ("s0"); + s0 = r0[255]; + asm volatile ("" :: "w" (s0)); +} + +/* +** w_m_256: +** add (r[0-9]+), r0, #512 +** vldr.16 s0, \[\1\] +** bx lr +*/ +void +w_m_256 (_Float16 *r0) +{ + register _Float16 s0 asm ("s0"); + s0 = r0[256]; + asm volatile ("" :: "w" (s0)); +} + +/* +** m_m128_r: +** sub (r[0-9]+), r0, #256 +** strh r1, \[\1\] @ __fp16 +** bx lr +*/ +void +m_m128_r (_Float16 *r0) +{ + register _Float16 r1 asm ("r1"); + asm volatile ("" : "=r" (r1)); + r0[-128] = r1; +} + +/* +** m_m127_r: +** strh r1, \[r0, #-254\] @ __fp16 +** bx lr +*/ +void +m_m127_r (_Float16 *r0) +{ + register _Float16 r1 asm ("r1"); + asm volatile ("" : "=r" (r1)); + r0[-127] = r1; +} + +/* +** m_m1_r: +** strh r1, \[r0, #-2\] @ __fp16 +** bx lr +*/ +void +m_m1_r (_Float16 *r0) +{ + register _Float16 r1 asm ("r1"); + asm volatile ("" : "=r" (r1)); + r0[-1] = r1; +} + +/* +** m_0_r: +** strh r1, \[r0\] @ __fp16 +** bx lr +*/ +void +m_0_r (_Float16 *r0) +{ + register _Float16 r1 asm ("r1"); + asm volatile ("" : "=r" (r1)); + r0[0] = r1; +} + +/* +** m_1_r: +** strh r1, \[r0, #2\] @ __fp16 +** bx lr +*/ +void +m_1_r (_Float16 *r0) +{ + register _Float16 r1 asm ("r1"); + asm volatile ("" : "=r" (r1)); + r0[1] = r1; +} + +/* +** m_255_r: +** strh r1, \[r0, #510\] @ __fp16 +** bx lr +*/ +void +m_255_r (_Float16 *r0) +{ + register _Float16 r1 asm ("r1"); + asm volatile ("" : "=r" (r1)); + r0[255] = r1; +} + +/* +** m_256_r: +** strh r1, \[r0, #512\] @ __fp16 +** bx lr +*/ +void +m_256_r (_Float16 *r0) +{ + register _Float16 r1 asm ("r1"); + asm volatile ("" : "=r" (r1)); + r0[256] = r1; +} + +/* ??? This could be done in one instruction, but without mve.fp, + it makes more sense for memory_operand to enforce the GPR range. */ +/* +** m_m128_w: +** sub (r[0-9]+), r0, #256 +** vstr.16 s0, \[\1\] +** bx lr +*/ +void +m_m128_w (_Float16 *r0) +{ + register _Float16 s0 asm ("s0"); + asm volatile ("" : "=w" (s0)); + r0[-128] = s0; +} + +/* +** m_m127_w: +** vstr.16 s0, \[r0, #-254\] +** bx lr +*/ +void +m_m127_w (_Float16 *r0) +{ + register _Float16 s0 asm ("s0"); + asm volatile ("" : "=w" (s0)); + r0[-127] = s0; +} + +/* +** m_m1_w: +** vstr.16 s0, \[r0, #-2\] +** bx lr +*/ +void +m_m1_w (_Float16 *r0) +{ + register _Float16 s0 asm ("s0"); + asm volatile ("" : "=w" (s0)); + r0[-1] = s0; +} + +/* +** m_0_w: +** vstr.16 s0, \[r0\] +** bx lr +*/ +void +m_0_w (_Float16 *r0) +{ + register _Float16 s0 asm ("s0"); + asm volatile ("" : "=w" (s0)); + r0[0] = s0; +} + +/* +** m_1_w: +** vstr.16 s0, \[r0, #2\] +** bx lr +*/ +void +m_1_w (_Float16 *r0) +{ + register _Float16 s0 asm ("s0"); + asm volatile ("" : "=w" (s0)); + r0[1] = s0; +} + +/* +** m_255_w: +** vstr.16 s0, \[r0, #510\] +** bx lr +*/ +void +m_255_w (_Float16 *r0) +{ + register _Float16 s0 asm ("s0"); + asm volatile ("" : "=w" (s0)); + r0[255] = s0; +} + +/* +** m_256_w: +** add (r[0-9]+), r0, #512 +** vstr.16 s0, \[\1\] +** bx lr +*/ +void +m_256_w (_Float16 *r0) +{ + register _Float16 s0 asm ("s0"); + asm volatile ("" : "=w" (s0)); + r0[256] = s0; +} diff --git a/gcc/testsuite/gcc.target/arm/armv8_1m-fp32-move-1.c b/gcc/testsuite/gcc.target/arm/armv8_1m-fp32-move-1.c new file mode 100644 index 00000000000..1ecb839bfe7 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/armv8_1m-fp32-move-1.c @@ -0,0 +1,420 @@ +/* { dg-do compile } */ +/* { dg-options "-O -mfloat-abi=hard" } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +/* +** r_w: +** vmov r0, s0 +** bx lr +*/ +void +r_w (float s0) +{ + register float r0 asm ("r0"); + r0 = s0; + asm volatile ("" :: "r" (r0)); +} + +/* +** w_r: +** vmov s0, r0 +** bx lr +*/ +float +w_r () +{ + register float r0 asm ("r0"); + asm volatile ("" : "=r" (r0)); + return r0; +} + +/* +** w_w: +** vmov.f32 s1, s0 +** bx lr +*/ +void +w_w (float s0) +{ + register float s1 asm ("s1"); + s1 = s0; + asm volatile ("" :: "w" (s1)); +} + +/* +** r_m_m64: +** sub (r[0-9]+), r0, #256 +** ldr r1, \[\1\] @ float +** bx lr +*/ +void +r_m_m64 (float *r0) +{ + register float r1 asm ("r1"); + r1 = r0[-64]; + asm volatile ("" :: "r" (r1)); +} + +/* +** r_m_m63: +** ldr r1, \[r0, #-252\] @ float +** bx lr +*/ +void +r_m_m63 (float *r0) +{ + register float r1 asm ("r1"); + r1 = r0[-63]; + asm volatile ("" :: "r" (r1)); +} + +/* +** r_m_m1: +** ldr r1, \[r0, #-4\] @ float +** bx lr +*/ +void +r_m_m1 (float *r0) +{ + register float r1 asm ("r1"); + r1 = r0[-1]; + asm volatile ("" :: "r" (r1)); +} + +/* +** r_m_0: +** ldr r1, \[r0\] @ float +** bx lr +*/ +void +r_m_0 (float *r0) +{ + register float r1 asm ("r1"); + r1 = r0[0]; + asm volatile ("" :: "r" (r1)); +} + +/* +** r_m_1: +** ldr r1, \[r0, #4\] @ float +** bx lr +*/ +void +r_m_1 (float *r0) +{ + register float r1 asm ("r1"); + r1 = r0[1]; + asm volatile ("" :: "r" (r1)); +} + +/* +** r_m_255: +** ldr r1, \[r0, #1020\] @ float +** bx lr +*/ +void +r_m_255 (float *r0) +{ + register float r1 asm ("r1"); + r1 = r0[255]; + asm volatile ("" :: "r" (r1)); +} + +/* +** r_m_256: +** add (r[0-9]+), r0, #1024 +** ldr r1, \[r0\] @ float +** bx lr +*/ +void +r_m_256 (float *r0) +{ + register float r1 asm ("r1"); + r1 = r0[256]; + asm volatile ("" :: "r" (r1)); +} + +/* ??? This could be done in one instruction, but without mve.fp, + it makes more sense for memory_operand to enforce the GPR range. */ +/* +** w_m_m64: +** sub (r[0-9]+), r0, #256 +** vldr.32 s0, \[\1\] +** bx lr +*/ +void +w_m_m64 (float *r0) +{ + register float s0 asm ("s0"); + s0 = r0[-64]; + asm volatile ("" :: "w" (s0)); +} + +/* +** w_m_m63: +** vldr.32 s0, \[r0, #-252\] +** bx lr +*/ +void +w_m_m63 (float *r0) +{ + register float s0 asm ("s0"); + s0 = r0[-63]; + asm volatile ("" :: "w" (s0)); +} + +/* +** w_m_m1: +** vldr.32 s0, \[r0, #-4\] +** bx lr +*/ +void +w_m_m1 (float *r0) +{ + register float s0 asm ("s0"); + s0 = r0[-1]; + asm volatile ("" :: "w" (s0)); +} + +/* +** w_m_0: +** vldr.32 s0, \[r0\] +** bx lr +*/ +void +w_m_0 (float *r0) +{ + register float s0 asm ("s0"); + s0 = r0[0]; + asm volatile ("" :: "w" (s0)); +} + +/* +** w_m_1: +** vldr.32 s0, \[r0, #4\] +** bx lr +*/ +void +w_m_1 (float *r0) +{ + register float s0 asm ("s0"); + s0 = r0[1]; + asm volatile ("" :: "w" (s0)); +} + +/* +** w_m_255: +** vldr.32 s0, \[r0, #1020\] +** bx lr +*/ +void +w_m_255 (float *r0) +{ + register float s0 asm ("s0"); + s0 = r0[255]; + asm volatile ("" :: "w" (s0)); +} + +/* +** w_m_256: +** add (r[0-9]+), r0, #1024 +** vldr.32 s0, \[\1\] +** bx lr +*/ +void +w_m_256 (float *r0) +{ + register float s0 asm ("s0"); + s0 = r0[256]; + asm volatile ("" :: "w" (s0)); +} + +/* +** m_m64_r: +** sub (r[0-9]+), r0, #256 +** str r1, \[\1\] @ float +** bx lr +*/ +void +m_m64_r (float *r0) +{ + register float r1 asm ("r1"); + asm volatile ("" : "=r" (r1)); + r0[-64] = r1; +} + +/* +** m_m63_r: +** str r1, \[r0, #-252\] @ float +** bx lr +*/ +void +m_m63_r (float *r0) +{ + register float r1 asm ("r1"); + asm volatile ("" : "=r" (r1)); + r0[-63] = r1; +} + +/* +** m_m1_r: +** str r1, \[r0, #-4\] @ float +** bx lr +*/ +void +m_m1_r (float *r0) +{ + register float r1 asm ("r1"); + asm volatile ("" : "=r" (r1)); + r0[-1] = r1; +} + +/* +** m_0_r: +** str r1, \[r0\] @ float +** bx lr +*/ +void +m_0_r (float *r0) +{ + register float r1 asm ("r1"); + asm volatile ("" : "=r" (r1)); + r0[0] = r1; +} + +/* +** m_1_r: +** str r1, \[r0, #4\] @ float +** bx lr +*/ +void +m_1_r (float *r0) +{ + register float r1 asm ("r1"); + asm volatile ("" : "=r" (r1)); + r0[1] = r1; +} + +/* +** m_255_r: +** str r1, \[r0, #1020\] @ float +** bx lr +*/ +void +m_255_r (float *r0) +{ + register float r1 asm ("r1"); + asm volatile ("" : "=r" (r1)); + r0[255] = r1; +} + +/* +** m_256_r: +** add (r[0-9]+), r0, #1024 +** str r1, \[r0\] @ float +** bx lr +*/ +void +m_256_r (float *r0) +{ + register float r1 asm ("r1"); + asm volatile ("" : "=r" (r1)); + r0[256] = r1; +} + +/* ??? This could be done in one instruction, but without mve.fp, + it makes more sense for memory_operand to enforce the GPR range. */ +/* +** m_m64_w: +** sub (r[0-9]+), r0, #256 +** vstr.32 s0, \[\1\] +** bx lr +*/ +void +m_m64_w (float *r0) +{ + register float s0 asm ("s0"); + asm volatile ("" : "=w" (s0)); + r0[-64] = s0; +} + +/* +** m_m63_w: +** vstr.32 s0, \[r0, #-252\] +** bx lr +*/ +void +m_m63_w (float *r0) +{ + register float s0 asm ("s0"); + asm volatile ("" : "=w" (s0)); + r0[-63] = s0; +} + +/* +** m_m1_w: +** vstr.32 s0, \[r0, #-4\] +** bx lr +*/ +void +m_m1_w (float *r0) +{ + register float s0 asm ("s0"); + asm volatile ("" : "=w" (s0)); + r0[-1] = s0; +} + +/* +** m_0_w: +** vstr.32 s0, \[r0\] +** bx lr +*/ +void +m_0_w (float *r0) +{ + register float s0 asm ("s0"); + asm volatile ("" : "=w" (s0)); + r0[0] = s0; +} + +/* +** m_1_w: +** vstr.32 s0, \[r0, #4\] +** bx lr +*/ +void +m_1_w (float *r0) +{ + register float s0 asm ("s0"); + asm volatile ("" : "=w" (s0)); + r0[1] = s0; +} + +/* +** m_255_w: +** vstr.32 s0, \[r0, #1020\] +** bx lr +*/ +void +m_255_w (float *r0) +{ + register float s0 asm ("s0"); + asm volatile ("" : "=w" (s0)); + r0[255] = s0; +} + +/* +** m_256_w: +** add (r[0-9]+), r0, #1024 +** vstr.32 s0, \[\1\] +** bx lr +*/ +void +m_256_w (float *r0) +{ + register float s0 asm ("s0"); + asm volatile ("" : "=w" (s0)); + r0[256] = s0; +} diff --git a/gcc/testsuite/gcc.target/arm/armv8_1m-fp64-move-1.c b/gcc/testsuite/gcc.target/arm/armv8_1m-fp64-move-1.c new file mode 100644 index 00000000000..3f81350697a --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/armv8_1m-fp64-move-1.c @@ -0,0 +1,426 @@ +/* { dg-do compile } */ +/* { dg-options "-O -mfloat-abi=hard" } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +/* +** r_w: +** vmov r0, r1, d0 +** bx lr +*/ +void +r_w (double d0) +{ + register double r0 asm ("r0"); + r0 = d0; + asm volatile ("" :: "r" (r0)); +} + +/* +** w_r: +** vmov d0, r0, r1 +** bx lr +*/ +double +w_r () +{ + register double r0 asm ("r0"); + asm volatile ("" : "=r" (r0)); + return r0; +} + +/* +** w_w: +** ( +** vmov.f32 s2, s0 +** vmov.f32 s3, s1 +** | +** vmov.f32 s3, s1 +** vmov.f32 s2, s0 +** ) +** bx lr +*/ +void +w_w (double d0) +{ + register double d1 asm ("d1"); + d1 = d0; + asm volatile ("" :: "w" (d1)); +} + +/* +** r_m_m32: +** sub (r[0-9]+), r0, #256 +** ldrd r2, \[\1\] +** bx lr +*/ +void +r_m_m32 (double *r0) +{ + register double r2 asm ("r2"); + r2 = r0[-32]; + asm volatile ("" :: "r" (r2)); +} + +/* +** r_m_m31: +** ldrd r2, \[r0, #-248\] +** bx lr +*/ +void +r_m_m31 (double *r0) +{ + register double r2 asm ("r2"); + r2 = r0[-31]; + asm volatile ("" :: "r" (r2)); +} + +/* +** r_m_m1: +** ldrd r2, \[r0, #-8\] +** bx lr +*/ +void +r_m_m1 (double *r0) +{ + register double r2 asm ("r2"); + r2 = r0[-1]; + asm volatile ("" :: "r" (r2)); +} + +/* +** r_m_0: +** ldrd r2, \[r0\] +** bx lr +*/ +void +r_m_0 (double *r0) +{ + register double r2 asm ("r2"); + r2 = r0[0]; + asm volatile ("" :: "r" (r2)); +} + +/* +** r_m_1: +** ldrd r2, \[r0, #8\] +** bx lr +*/ +void +r_m_1 (double *r0) +{ + register double r2 asm ("r2"); + r2 = r0[1]; + asm volatile ("" :: "r" (r2)); +} + +/* +** r_m_127: +** ldrd r2, \[r0, #1016\] +** bx lr +*/ +void +r_m_127 (double *r0) +{ + register double r2 asm ("r2"); + r2 = r0[127]; + asm volatile ("" :: "r" (r2)); +} + +/* +** r_m_128: +** add (r[0-9]+), r0, #1024 +** ldrd r2, \[r0\] +** bx lr +*/ +void +r_m_128 (double *r0) +{ + register double r2 asm ("r2"); + r2 = r0[128]; + asm volatile ("" :: "r" (r2)); +} + +/* ??? This could be done in one instruction, but without mve.fp, + it makes more sense for memory_operand to enforce the GPR range. */ +/* +** w_m_m32: +** sub (r[0-9]+), r0, #256 +** vldr.64 d0, \[\1\] +** bx lr +*/ +void +w_m_m32 (double *r0) +{ + register double d0 asm ("d0"); + d0 = r0[-32]; + asm volatile ("" :: "w" (d0)); +} + +/* +** w_m_m31: +** vldr.64 d0, \[r0, #-248\] +** bx lr +*/ +void +w_m_m31 (double *r0) +{ + register double d0 asm ("d0"); + d0 = r0[-31]; + asm volatile ("" :: "w" (d0)); +} + +/* +** w_m_m1: +** vldr.64 d0, \[r0, #-8\] +** bx lr +*/ +void +w_m_m1 (double *r0) +{ + register double d0 asm ("d0"); + d0 = r0[-1]; + asm volatile ("" :: "w" (d0)); +} + +/* +** w_m_0: +** vldr.64 d0, \[r0\] +** bx lr +*/ +void +w_m_0 (double *r0) +{ + register double d0 asm ("d0"); + d0 = r0[0]; + asm volatile ("" :: "w" (d0)); +} + +/* +** w_m_1: +** vldr.64 d0, \[r0, #8\] +** bx lr +*/ +void +w_m_1 (double *r0) +{ + register double d0 asm ("d0"); + d0 = r0[1]; + asm volatile ("" :: "w" (d0)); +} + +/* +** w_m_127: +** vldr.64 d0, \[r0, #1016\] +** bx lr +*/ +void +w_m_127 (double *r0) +{ + register double d0 asm ("d0"); + d0 = r0[127]; + asm volatile ("" :: "w" (d0)); +} + +/* +** w_m_128: +** add (r[0-9]+), r0, #1024 +** vldr.64 d0, \[\1\] +** bx lr +*/ +void +w_m_128 (double *r0) +{ + register double d0 asm ("d0"); + d0 = r0[128]; + asm volatile ("" :: "w" (d0)); +} + +/* +** m_m32_r: +** sub (r[0-9]+), r0, #256 +** strd r2, \[\1\] +** bx lr +*/ +void +m_m32_r (double *r0) +{ + register double r2 asm ("r2"); + asm volatile ("" : "=r" (r2)); + r0[-32] = r2; +} + +/* +** m_m31_r: +** strd r2, \[r0, #-248\] +** bx lr +*/ +void +m_m31_r (double *r0) +{ + register double r2 asm ("r2"); + asm volatile ("" : "=r" (r2)); + r0[-31] = r2; +} + +/* +** m_m1_r: +** strd r2, \[r0, #-8\] +** bx lr +*/ +void +m_m1_r (double *r0) +{ + register double r2 asm ("r2"); + asm volatile ("" : "=r" (r2)); + r0[-1] = r2; +} + +/* +** m_0_r: +** strd r2, \[r0\] +** bx lr +*/ +void +m_0_r (double *r0) +{ + register double r2 asm ("r2"); + asm volatile ("" : "=r" (r2)); + r0[0] = r2; +} + +/* +** m_1_r: +** strd r2, \[r0, #8\] +** bx lr +*/ +void +m_1_r (double *r0) +{ + register double r2 asm ("r2"); + asm volatile ("" : "=r" (r2)); + r0[1] = r2; +} + +/* +** m_127_r: +** strd r2, \[r0, #1016\] +** bx lr +*/ +void +m_127_r (double *r0) +{ + register double r2 asm ("r2"); + asm volatile ("" : "=r" (r2)); + r0[127] = r2; +} + +/* +** m_128_r: +** add (r[0-9]+), r0, #1024 +** strd r2, \[r0\] +** bx lr +*/ +void +m_128_r (double *r0) +{ + register double r2 asm ("r2"); + asm volatile ("" : "=r" (r2)); + r0[128] = r2; +} + +/* ??? This could be done in one instruction, but without mve.fp, + it makes more sense for memory_operand to enforce the GPR range. */ +/* +** m_m32_w: +** sub (r[0-9]+), r0, #256 +** vstr.64 d0, \[\1\] +** bx lr +*/ +void +m_m32_w (double *r0) +{ + register double d0 asm ("d0"); + asm volatile ("" : "=w" (d0)); + r0[-32] = d0; +} + +/* +** m_m31_w: +** vstr.64 d0, \[r0, #-248\] +** bx lr +*/ +void +m_m31_w (double *r0) +{ + register double d0 asm ("d0"); + asm volatile ("" : "=w" (d0)); + r0[-31] = d0; +} + +/* +** m_m1_w: +** vstr.64 d0, \[r0, #-8\] +** bx lr +*/ +void +m_m1_w (double *r0) +{ + register double d0 asm ("d0"); + asm volatile ("" : "=w" (d0)); + r0[-1] = d0; +} + +/* +** m_0_w: +** vstr.64 d0, \[r0\] +** bx lr +*/ +void +m_0_w (double *r0) +{ + register double d0 asm ("d0"); + asm volatile ("" : "=w" (d0)); + r0[0] = d0; +} + +/* +** m_1_w: +** vstr.64 d0, \[r0, #8\] +** bx lr +*/ +void +m_1_w (double *r0) +{ + register double d0 asm ("d0"); + asm volatile ("" : "=w" (d0)); + r0[1] = d0; +} + +/* +** m_127_w: +** vstr.64 d0, \[r0, #1016\] +** bx lr +*/ +void +m_127_w (double *r0) +{ + register double d0 asm ("d0"); + asm volatile ("" : "=w" (d0)); + r0[127] = d0; +} + +/* +** m_128_w: +** add (r[0-9]+), r0, #1024 +** vstr.64 d0, \[\1\] +** bx lr +*/ +void +m_128_w (double *r0) +{ + register double d0 asm ("d0"); + asm volatile ("" : "=w" (d0)); + r0[128] = d0; +} diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve-vldstr16-no-writeback.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve-vldstr16-no-writeback.c index 0a69aced8b4..50b195300d8 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve-vldstr16-no-writeback.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve-vldstr16-no-writeback.c @@ -13,5 +13,6 @@ fn1 (__fp16 *pSrc) pDst[i] = high; } -/* { dg-final { scan-assembler {vldr\.16\ts[0-9]+, \[r[0-9]+\]\n} } } */ -/* { dg-final { scan-assembler {vstr\.16\ts[0-9]+, \[r[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {vldr\.16\ts[0-9]+, \[r[0-9]+(, #-?[0-9]+)?\]\n} } } */ +/* { dg-final { scan-assembler-not {vldr\.16\t[^\n]*\]!} } } */ +/* { dg-final { scan-assembler-not {vstr\.16\t[^\n]*\]!} } } */