arm: Fix fp16 move patterns for base MVE

This patch fixes ICEs in gcc.dg/torture/float16-basic.c for
-march=armv8.1-m.main+mve -mfloat-abi=hard.  The problem was
that an fp16 argument was (rightly) being passed in FPRs,
but the fp16 move patterns only handled GPRs.  LRA then cycled
trying to look for a way of handling the FPR.

It looks like there are three related problems here:

(1) We're using the wrong fp16 move pattern for base MVE.
    *mov<mode>_vfp_<mode>16 (the pattern we use for +mve.fp)
    works for base MVE too.

(2) The fp16 MVE load and store patterns are separate from the
    main move patterns.  The loads and stores should instead be
    alternatives of the main move patterns, so that LRA knows
    what to do with pseudo registers that become stack slots.

(3) The range restrictions for the loads and stores were wrong
    for fp16: we were enforcing a multiple of 4 in [-255*4, 255*4]
    instead of a multiple of 2 in [-255*2, 255*2].

(2) came from a patch to prevent writeback being used for MVE.
That patch also added a Uj constraint to enforce the correct
memory types for MVE.  I think the simplest fix is therefore to merge
the loads and stores back into the main pattern and extend the Uj
constraint so that it acts like Um for non-MVE.

The testcase for that patch was mve-vldstr16-no-writeback.c, whose
main function is:

void
fn1 (__fp16 *pSrc)
{
  __fp16 high;
  __fp16 *pDst = 0;
  unsigned i;
  for (i = 0;; i++)
    if (pSrc[i])
      pDst[i] = high;
}

Fixing (2) causes the store part to fail, not because we're using
writeback, but because we decide to use GPRs to store high (which is
uninitialised, and so gets replaced with zero).  This patch therefore
adds some scan-assembler-nots instead.  (I wondered about changing the
testcase to initialise high, but that seemed like a bad idea for
a regression test.)

For (3): MVE seems to be the only thing to use arm_coproc_mem_operand_wb
(and its various interfaces) for 16-bit scalars: the Neon patterns only
use it for 32-bit scalars.

I've added new tests to try the various FPR alternatives of the
move patterns.  The range of offsets that GCC uses for FPR loads
and stores is the intersection of the range allowed for GPRs and
FPRs, so the tests include GPR<->memory tests as well.

The fp32 and fp64 tests already pass, they're just there for
completeness.

gcc/
	* config/arm/arm-protos.h (arm_mve_mode_and_operands_type_check):
	Delete.
	* config/arm/arm.c (arm_coproc_mem_operand_wb): Use a scale factor
	of 2 rather than 4 for 16-bit modes.
	(arm_mve_mode_and_operands_type_check): Delete.
	* config/arm/constraints.md (Uj): Allow writeback for Neon,
	but continue to disallow it for MVE.
	* config/arm/arm.md (*arm32_mov<HFBF:mode>): Add !TARGET_HAVE_MVE.
	* config/arm/vfp.md (*mov_load_vfp_hf16, *mov_store_vfp_hf16): Fold
	back into...
	(*mov<mode>_vfp_<mode>16): ...here but use Uj for the FPR memory
	constraints.  Use for base MVE too.

gcc/testsuite/
	* gcc.target/arm/mve/intrinsics/mve-vldstr16-no-writeback.c: Allow
	the store to use GPRs instead of FPRs.  Add scan-assembler-nots
	for writeback.
	* gcc.target/arm/armv8_1m-fp16-move-1.c: New test.
	* gcc.target/arm/armv8_1m-fp32-move-1.c: Likewise.
	* gcc.target/arm/armv8_1m-fp64-move-1.c: Likewise.
This commit is contained in:
Richard Sandiford 2020-09-25 12:45:25 +01:00
parent 4dcc7f03b5
commit 6abd428605
9 changed files with 1295 additions and 45 deletions

View file

@ -120,7 +120,6 @@ extern int arm_coproc_mem_operand_no_writeback (rtx);
extern int arm_coproc_mem_operand_wb (rtx, int);
extern int neon_vector_mem_operand (rtx, int, bool);
extern int mve_vector_mem_operand (machine_mode, rtx, bool);
bool arm_mve_mode_and_operands_type_check (machine_mode, rtx, rtx);
extern int neon_struct_mem_operand (rtx);
extern rtx *neon_vcmla_lane_prepare_operands (rtx *);

View file

@ -13277,14 +13277,18 @@ arm_coproc_mem_operand_wb (rtx op, int wb_level)
/* Match:
(plus (reg)
(const)). */
(const))
The encoded immediate for 16-bit modes is multiplied by 2,
while the encoded immediate for 32-bit and 64-bit modes is
multiplied by 4. */
int factor = MIN (GET_MODE_SIZE (GET_MODE (op)), 4);
if (GET_CODE (ind) == PLUS
&& REG_P (XEXP (ind, 0))
&& REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
&& CONST_INT_P (XEXP (ind, 1))
&& INTVAL (XEXP (ind, 1)) > -1024
&& INTVAL (XEXP (ind, 1)) < 1024
&& (INTVAL (XEXP (ind, 1)) & 3) == 0)
&& IN_RANGE (INTVAL (XEXP (ind, 1)), -255 * factor, 255 * factor)
&& (INTVAL (XEXP (ind, 1)) & (factor - 1)) == 0)
return TRUE;
return FALSE;
@ -33578,17 +33582,4 @@ arm_mode_base_reg_class (machine_mode mode)
struct gcc_target targetm = TARGET_INITIALIZER;
bool
arm_mve_mode_and_operands_type_check (machine_mode mode, rtx op0, rtx op1)
{
if (!(TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT))
return true;
else if (mode == E_BFmode)
return false;
else if ((s_register_operand (op0, mode) && MEM_P (op1))
|| (s_register_operand (op1, mode) && MEM_P (op0)))
return false;
return true;
}
#include "gt-arm.h"

View file

@ -7289,7 +7289,9 @@
(define_insn "*arm32_mov<mode>"
[(set (match_operand:HFBF 0 "nonimmediate_operand" "=r,m,r,r")
(match_operand:HFBF 1 "general_operand" " m,r,r,F"))]
"TARGET_32BIT && !TARGET_HARD_FLOAT
"TARGET_32BIT
&& !TARGET_HARD_FLOAT
&& !TARGET_HAVE_MVE
&& ( s_register_operand (operands[0], <MODE>mode)
|| s_register_operand (operands[1], <MODE>mode))"
"*

View file

@ -454,10 +454,13 @@
(define_memory_constraint "Uj"
"@internal
In ARM/Thumb-2 state an VFP load/store address which does not support
writeback at all (eg vldr.16)."
In ARM/Thumb-2 state a VFP load/store address that supports writeback
for Neon but not for MVE"
(and (match_code "mem")
(match_test "TARGET_32BIT && arm_coproc_mem_operand_no_writeback (op)")))
(match_test "TARGET_32BIT")
(match_test "TARGET_HAVE_MVE
? arm_coproc_mem_operand_no_writeback (op)
: neon_vector_mem_operand (op, 2, true)")))
(define_memory_constraint "Uy"
"@internal

View file

@ -387,31 +387,15 @@
(set_attr "arch" "t2,any,any,any,a,t2,any,any,any,any,any,any")]
)
(define_insn "*mov_load_vfp_hf16"
[(set (match_operand:HF 0 "s_register_operand" "=t")
(match_operand:HF 1 "memory_operand" "Uj"))]
"TARGET_HAVE_MVE_FLOAT"
"vldr.16\\t%0, %E1"
)
(define_insn "*mov_store_vfp_hf16"
[(set (match_operand:HF 0 "memory_operand" "=Uj")
(match_operand:HF 1 "s_register_operand" "t"))]
"TARGET_HAVE_MVE_FLOAT"
"vstr.16\\t%1, %E0"
)
;; HFmode and BFmode moves
(define_insn "*mov<mode>_vfp_<mode>16"
[(set (match_operand:HFBF 0 "nonimmediate_operand"
"= ?r,?m,t,r,t,r,t, t, Um,r")
"= ?r,?m,t,r,t,r,t, t, Uj,r")
(match_operand:HFBF 1 "general_operand"
" m,r,t,r,r,t,Dv,Um,t, F"))]
" m,r,t,r,r,t,Dv,Uj,t, F"))]
"TARGET_32BIT
&& TARGET_VFP_FP16INST
&& arm_mve_mode_and_operands_type_check (<MODE>mode, operands[0],
operands[1])
&& (TARGET_VFP_FP16INST || TARGET_HAVE_MVE)
&& (s_register_operand (operands[0], <MODE>mode)
|| s_register_operand (operands[1], <MODE>mode))"
{
@ -430,9 +414,15 @@
case 6: /* S register from immediate. */
return \"vmov.f16\\t%0, %1\t%@ __<fporbf>\";
case 7: /* S register from memory. */
return \"vld1.16\\t{%z0}, %A1\";
if (TARGET_HAVE_MVE)
return \"vldr.16\\t%0, %1\";
else
return \"vld1.16\\t{%z0}, %A1\";
case 8: /* Memory from S register. */
return \"vst1.16\\t{%z1}, %A0\";
if (TARGET_HAVE_MVE)
return \"vstr.16\\t%1, %0\";
else
return \"vst1.16\\t{%z1}, %A0\";
case 9: /* ARM register from constant. */
{
long bits;

View file

@ -0,0 +1,418 @@
/* { dg-do compile } */
/* { dg-options "-O -mfloat-abi=hard -mfp16-format=ieee" } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-final { check-function-bodies "**" "" } } */
/*
** r_w:
** vmov.f16 r0, s0 @ __fp16
** bx lr
*/
void
r_w (_Float16 s0)
{
register _Float16 r0 asm ("r0");
r0 = s0;
asm volatile ("" :: "r" (r0));
}
/*
** w_r:
** vmov.f16 s0, r0 @ __fp16
** bx lr
*/
_Float16
w_r ()
{
register _Float16 r0 asm ("r0");
asm volatile ("" : "=r" (r0));
return r0;
}
/*
** w_w:
** vmov s1, s0 @ __fp16
** bx lr
*/
void
w_w (_Float16 s0)
{
register _Float16 s1 asm ("s1");
s1 = s0;
asm volatile ("" :: "w" (s1));
}
/*
** r_m_m128:
** sub (r[0-9]+), r0, #256
** ldrh r1, \[\1\] @ __fp16
** bx lr
*/
void
r_m_m128 (_Float16 *r0)
{
register _Float16 r1 asm ("r1");
r1 = r0[-128];
asm volatile ("" :: "r" (r1));
}
/*
** r_m_m127:
** ldrh r1, \[r0, #-254\] @ __fp16
** bx lr
*/
void
r_m_m127 (_Float16 *r0)
{
register _Float16 r1 asm ("r1");
r1 = r0[-127];
asm volatile ("" :: "r" (r1));
}
/*
** r_m_m1:
** ldrh r1, \[r0, #-2\] @ __fp16
** bx lr
*/
void
r_m_m1 (_Float16 *r0)
{
register _Float16 r1 asm ("r1");
r1 = r0[-1];
asm volatile ("" :: "r" (r1));
}
/*
** r_m_0:
** ldrh r1, \[r0\] @ __fp16
** bx lr
*/
void
r_m_0 (_Float16 *r0)
{
register _Float16 r1 asm ("r1");
r1 = r0[0];
asm volatile ("" :: "r" (r1));
}
/*
** r_m_1:
** ldrh r1, \[r0, #2\] @ __fp16
** bx lr
*/
void
r_m_1 (_Float16 *r0)
{
register _Float16 r1 asm ("r1");
r1 = r0[1];
asm volatile ("" :: "r" (r1));
}
/*
** r_m_255:
** ldrh r1, \[r0, #510\] @ __fp16
** bx lr
*/
void
r_m_255 (_Float16 *r0)
{
register _Float16 r1 asm ("r1");
r1 = r0[255];
asm volatile ("" :: "r" (r1));
}
/*
** r_m_256:
** ldrh r1, \[r0, #512\] @ __fp16
** bx lr
*/
void
r_m_256 (_Float16 *r0)
{
register _Float16 r1 asm ("r1");
r1 = r0[256];
asm volatile ("" :: "r" (r1));
}
/* ??? This could be done in one instruction, but without mve.fp,
it makes more sense for memory_operand to enforce the GPR range. */
/*
** w_m_m128:
** sub (r[0-9]+), r0, #256
** vldr.16 s0, \[\1\]
** bx lr
*/
void
w_m_m128 (_Float16 *r0)
{
register _Float16 s0 asm ("s0");
s0 = r0[-128];
asm volatile ("" :: "w" (s0));
}
/*
** w_m_m127:
** vldr.16 s0, \[r0, #-254\]
** bx lr
*/
void
w_m_m127 (_Float16 *r0)
{
register _Float16 s0 asm ("s0");
s0 = r0[-127];
asm volatile ("" :: "w" (s0));
}
/*
** w_m_m1:
** vldr.16 s0, \[r0, #-2\]
** bx lr
*/
void
w_m_m1 (_Float16 *r0)
{
register _Float16 s0 asm ("s0");
s0 = r0[-1];
asm volatile ("" :: "w" (s0));
}
/*
** w_m_0:
** vldr.16 s0, \[r0\]
** bx lr
*/
void
w_m_0 (_Float16 *r0)
{
register _Float16 s0 asm ("s0");
s0 = r0[0];
asm volatile ("" :: "w" (s0));
}
/*
** w_m_1:
** vldr.16 s0, \[r0, #2\]
** bx lr
*/
void
w_m_1 (_Float16 *r0)
{
register _Float16 s0 asm ("s0");
s0 = r0[1];
asm volatile ("" :: "w" (s0));
}
/*
** w_m_255:
** vldr.16 s0, \[r0, #510\]
** bx lr
*/
void
w_m_255 (_Float16 *r0)
{
register _Float16 s0 asm ("s0");
s0 = r0[255];
asm volatile ("" :: "w" (s0));
}
/*
** w_m_256:
** add (r[0-9]+), r0, #512
** vldr.16 s0, \[\1\]
** bx lr
*/
void
w_m_256 (_Float16 *r0)
{
register _Float16 s0 asm ("s0");
s0 = r0[256];
asm volatile ("" :: "w" (s0));
}
/*
** m_m128_r:
** sub (r[0-9]+), r0, #256
** strh r1, \[\1\] @ __fp16
** bx lr
*/
void
m_m128_r (_Float16 *r0)
{
register _Float16 r1 asm ("r1");
asm volatile ("" : "=r" (r1));
r0[-128] = r1;
}
/*
** m_m127_r:
** strh r1, \[r0, #-254\] @ __fp16
** bx lr
*/
void
m_m127_r (_Float16 *r0)
{
register _Float16 r1 asm ("r1");
asm volatile ("" : "=r" (r1));
r0[-127] = r1;
}
/*
** m_m1_r:
** strh r1, \[r0, #-2\] @ __fp16
** bx lr
*/
void
m_m1_r (_Float16 *r0)
{
register _Float16 r1 asm ("r1");
asm volatile ("" : "=r" (r1));
r0[-1] = r1;
}
/*
** m_0_r:
** strh r1, \[r0\] @ __fp16
** bx lr
*/
void
m_0_r (_Float16 *r0)
{
register _Float16 r1 asm ("r1");
asm volatile ("" : "=r" (r1));
r0[0] = r1;
}
/*
** m_1_r:
** strh r1, \[r0, #2\] @ __fp16
** bx lr
*/
void
m_1_r (_Float16 *r0)
{
register _Float16 r1 asm ("r1");
asm volatile ("" : "=r" (r1));
r0[1] = r1;
}
/*
** m_255_r:
** strh r1, \[r0, #510\] @ __fp16
** bx lr
*/
void
m_255_r (_Float16 *r0)
{
register _Float16 r1 asm ("r1");
asm volatile ("" : "=r" (r1));
r0[255] = r1;
}
/*
** m_256_r:
** strh r1, \[r0, #512\] @ __fp16
** bx lr
*/
void
m_256_r (_Float16 *r0)
{
register _Float16 r1 asm ("r1");
asm volatile ("" : "=r" (r1));
r0[256] = r1;
}
/* ??? This could be done in one instruction, but without mve.fp,
it makes more sense for memory_operand to enforce the GPR range. */
/*
** m_m128_w:
** sub (r[0-9]+), r0, #256
** vstr.16 s0, \[\1\]
** bx lr
*/
void
m_m128_w (_Float16 *r0)
{
register _Float16 s0 asm ("s0");
asm volatile ("" : "=w" (s0));
r0[-128] = s0;
}
/*
** m_m127_w:
** vstr.16 s0, \[r0, #-254\]
** bx lr
*/
void
m_m127_w (_Float16 *r0)
{
register _Float16 s0 asm ("s0");
asm volatile ("" : "=w" (s0));
r0[-127] = s0;
}
/*
** m_m1_w:
** vstr.16 s0, \[r0, #-2\]
** bx lr
*/
void
m_m1_w (_Float16 *r0)
{
register _Float16 s0 asm ("s0");
asm volatile ("" : "=w" (s0));
r0[-1] = s0;
}
/*
** m_0_w:
** vstr.16 s0, \[r0\]
** bx lr
*/
void
m_0_w (_Float16 *r0)
{
register _Float16 s0 asm ("s0");
asm volatile ("" : "=w" (s0));
r0[0] = s0;
}
/*
** m_1_w:
** vstr.16 s0, \[r0, #2\]
** bx lr
*/
void
m_1_w (_Float16 *r0)
{
register _Float16 s0 asm ("s0");
asm volatile ("" : "=w" (s0));
r0[1] = s0;
}
/*
** m_255_w:
** vstr.16 s0, \[r0, #510\]
** bx lr
*/
void
m_255_w (_Float16 *r0)
{
register _Float16 s0 asm ("s0");
asm volatile ("" : "=w" (s0));
r0[255] = s0;
}
/*
** m_256_w:
** add (r[0-9]+), r0, #512
** vstr.16 s0, \[\1\]
** bx lr
*/
void
m_256_w (_Float16 *r0)
{
register _Float16 s0 asm ("s0");
asm volatile ("" : "=w" (s0));
r0[256] = s0;
}

View file

@ -0,0 +1,420 @@
/* { dg-do compile } */
/* { dg-options "-O -mfloat-abi=hard" } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-final { check-function-bodies "**" "" } } */
/*
** r_w:
** vmov r0, s0
** bx lr
*/
void
r_w (float s0)
{
register float r0 asm ("r0");
r0 = s0;
asm volatile ("" :: "r" (r0));
}
/*
** w_r:
** vmov s0, r0
** bx lr
*/
float
w_r ()
{
register float r0 asm ("r0");
asm volatile ("" : "=r" (r0));
return r0;
}
/*
** w_w:
** vmov.f32 s1, s0
** bx lr
*/
void
w_w (float s0)
{
register float s1 asm ("s1");
s1 = s0;
asm volatile ("" :: "w" (s1));
}
/*
** r_m_m64:
** sub (r[0-9]+), r0, #256
** ldr r1, \[\1\] @ float
** bx lr
*/
void
r_m_m64 (float *r0)
{
register float r1 asm ("r1");
r1 = r0[-64];
asm volatile ("" :: "r" (r1));
}
/*
** r_m_m63:
** ldr r1, \[r0, #-252\] @ float
** bx lr
*/
void
r_m_m63 (float *r0)
{
register float r1 asm ("r1");
r1 = r0[-63];
asm volatile ("" :: "r" (r1));
}
/*
** r_m_m1:
** ldr r1, \[r0, #-4\] @ float
** bx lr
*/
void
r_m_m1 (float *r0)
{
register float r1 asm ("r1");
r1 = r0[-1];
asm volatile ("" :: "r" (r1));
}
/*
** r_m_0:
** ldr r1, \[r0\] @ float
** bx lr
*/
void
r_m_0 (float *r0)
{
register float r1 asm ("r1");
r1 = r0[0];
asm volatile ("" :: "r" (r1));
}
/*
** r_m_1:
** ldr r1, \[r0, #4\] @ float
** bx lr
*/
void
r_m_1 (float *r0)
{
register float r1 asm ("r1");
r1 = r0[1];
asm volatile ("" :: "r" (r1));
}
/*
** r_m_255:
** ldr r1, \[r0, #1020\] @ float
** bx lr
*/
void
r_m_255 (float *r0)
{
register float r1 asm ("r1");
r1 = r0[255];
asm volatile ("" :: "r" (r1));
}
/*
** r_m_256:
** add (r[0-9]+), r0, #1024
** ldr r1, \[r0\] @ float
** bx lr
*/
void
r_m_256 (float *r0)
{
register float r1 asm ("r1");
r1 = r0[256];
asm volatile ("" :: "r" (r1));
}
/* ??? This could be done in one instruction, but without mve.fp,
it makes more sense for memory_operand to enforce the GPR range. */
/*
** w_m_m64:
** sub (r[0-9]+), r0, #256
** vldr.32 s0, \[\1\]
** bx lr
*/
void
w_m_m64 (float *r0)
{
register float s0 asm ("s0");
s0 = r0[-64];
asm volatile ("" :: "w" (s0));
}
/*
** w_m_m63:
** vldr.32 s0, \[r0, #-252\]
** bx lr
*/
void
w_m_m63 (float *r0)
{
register float s0 asm ("s0");
s0 = r0[-63];
asm volatile ("" :: "w" (s0));
}
/*
** w_m_m1:
** vldr.32 s0, \[r0, #-4\]
** bx lr
*/
void
w_m_m1 (float *r0)
{
register float s0 asm ("s0");
s0 = r0[-1];
asm volatile ("" :: "w" (s0));
}
/*
** w_m_0:
** vldr.32 s0, \[r0\]
** bx lr
*/
void
w_m_0 (float *r0)
{
register float s0 asm ("s0");
s0 = r0[0];
asm volatile ("" :: "w" (s0));
}
/*
** w_m_1:
** vldr.32 s0, \[r0, #4\]
** bx lr
*/
void
w_m_1 (float *r0)
{
register float s0 asm ("s0");
s0 = r0[1];
asm volatile ("" :: "w" (s0));
}
/*
** w_m_255:
** vldr.32 s0, \[r0, #1020\]
** bx lr
*/
void
w_m_255 (float *r0)
{
register float s0 asm ("s0");
s0 = r0[255];
asm volatile ("" :: "w" (s0));
}
/*
** w_m_256:
** add (r[0-9]+), r0, #1024
** vldr.32 s0, \[\1\]
** bx lr
*/
void
w_m_256 (float *r0)
{
register float s0 asm ("s0");
s0 = r0[256];
asm volatile ("" :: "w" (s0));
}
/*
** m_m64_r:
** sub (r[0-9]+), r0, #256
** str r1, \[\1\] @ float
** bx lr
*/
void
m_m64_r (float *r0)
{
register float r1 asm ("r1");
asm volatile ("" : "=r" (r1));
r0[-64] = r1;
}
/*
** m_m63_r:
** str r1, \[r0, #-252\] @ float
** bx lr
*/
void
m_m63_r (float *r0)
{
register float r1 asm ("r1");
asm volatile ("" : "=r" (r1));
r0[-63] = r1;
}
/*
** m_m1_r:
** str r1, \[r0, #-4\] @ float
** bx lr
*/
void
m_m1_r (float *r0)
{
register float r1 asm ("r1");
asm volatile ("" : "=r" (r1));
r0[-1] = r1;
}
/*
** m_0_r:
** str r1, \[r0\] @ float
** bx lr
*/
void
m_0_r (float *r0)
{
register float r1 asm ("r1");
asm volatile ("" : "=r" (r1));
r0[0] = r1;
}
/*
** m_1_r:
** str r1, \[r0, #4\] @ float
** bx lr
*/
void
m_1_r (float *r0)
{
register float r1 asm ("r1");
asm volatile ("" : "=r" (r1));
r0[1] = r1;
}
/*
** m_255_r:
** str r1, \[r0, #1020\] @ float
** bx lr
*/
void
m_255_r (float *r0)
{
register float r1 asm ("r1");
asm volatile ("" : "=r" (r1));
r0[255] = r1;
}
/*
** m_256_r:
** add (r[0-9]+), r0, #1024
** str r1, \[r0\] @ float
** bx lr
*/
void
m_256_r (float *r0)
{
register float r1 asm ("r1");
asm volatile ("" : "=r" (r1));
r0[256] = r1;
}
/* ??? This could be done in one instruction, but without mve.fp,
it makes more sense for memory_operand to enforce the GPR range. */
/*
** m_m64_w:
** sub (r[0-9]+), r0, #256
** vstr.32 s0, \[\1\]
** bx lr
*/
void
m_m64_w (float *r0)
{
register float s0 asm ("s0");
asm volatile ("" : "=w" (s0));
r0[-64] = s0;
}
/*
** m_m63_w:
** vstr.32 s0, \[r0, #-252\]
** bx lr
*/
void
m_m63_w (float *r0)
{
register float s0 asm ("s0");
asm volatile ("" : "=w" (s0));
r0[-63] = s0;
}
/*
** m_m1_w:
** vstr.32 s0, \[r0, #-4\]
** bx lr
*/
void
m_m1_w (float *r0)
{
register float s0 asm ("s0");
asm volatile ("" : "=w" (s0));
r0[-1] = s0;
}
/*
** m_0_w:
** vstr.32 s0, \[r0\]
** bx lr
*/
void
m_0_w (float *r0)
{
register float s0 asm ("s0");
asm volatile ("" : "=w" (s0));
r0[0] = s0;
}
/*
** m_1_w:
** vstr.32 s0, \[r0, #4\]
** bx lr
*/
void
m_1_w (float *r0)
{
register float s0 asm ("s0");
asm volatile ("" : "=w" (s0));
r0[1] = s0;
}
/*
** m_255_w:
** vstr.32 s0, \[r0, #1020\]
** bx lr
*/
void
m_255_w (float *r0)
{
register float s0 asm ("s0");
asm volatile ("" : "=w" (s0));
r0[255] = s0;
}
/*
** m_256_w:
** add (r[0-9]+), r0, #1024
** vstr.32 s0, \[\1\]
** bx lr
*/
void
m_256_w (float *r0)
{
register float s0 asm ("s0");
asm volatile ("" : "=w" (s0));
r0[256] = s0;
}

View file

@ -0,0 +1,426 @@
/* { dg-do compile } */
/* { dg-options "-O -mfloat-abi=hard" } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-final { check-function-bodies "**" "" } } */
/*
** r_w:
** vmov r0, r1, d0
** bx lr
*/
void
r_w (double d0)
{
register double r0 asm ("r0");
r0 = d0;
asm volatile ("" :: "r" (r0));
}
/*
** w_r:
** vmov d0, r0, r1
** bx lr
*/
double
w_r ()
{
register double r0 asm ("r0");
asm volatile ("" : "=r" (r0));
return r0;
}
/*
** w_w:
** (
** vmov.f32 s2, s0
** vmov.f32 s3, s1
** |
** vmov.f32 s3, s1
** vmov.f32 s2, s0
** )
** bx lr
*/
void
w_w (double d0)
{
register double d1 asm ("d1");
d1 = d0;
asm volatile ("" :: "w" (d1));
}
/*
** r_m_m32:
** sub (r[0-9]+), r0, #256
** ldrd r2, \[\1\]
** bx lr
*/
void
r_m_m32 (double *r0)
{
register double r2 asm ("r2");
r2 = r0[-32];
asm volatile ("" :: "r" (r2));
}
/*
** r_m_m31:
** ldrd r2, \[r0, #-248\]
** bx lr
*/
void
r_m_m31 (double *r0)
{
register double r2 asm ("r2");
r2 = r0[-31];
asm volatile ("" :: "r" (r2));
}
/*
** r_m_m1:
** ldrd r2, \[r0, #-8\]
** bx lr
*/
void
r_m_m1 (double *r0)
{
register double r2 asm ("r2");
r2 = r0[-1];
asm volatile ("" :: "r" (r2));
}
/*
** r_m_0:
** ldrd r2, \[r0\]
** bx lr
*/
void
r_m_0 (double *r0)
{
register double r2 asm ("r2");
r2 = r0[0];
asm volatile ("" :: "r" (r2));
}
/*
** r_m_1:
** ldrd r2, \[r0, #8\]
** bx lr
*/
void
r_m_1 (double *r0)
{
register double r2 asm ("r2");
r2 = r0[1];
asm volatile ("" :: "r" (r2));
}
/*
** r_m_127:
** ldrd r2, \[r0, #1016\]
** bx lr
*/
void
r_m_127 (double *r0)
{
register double r2 asm ("r2");
r2 = r0[127];
asm volatile ("" :: "r" (r2));
}
/*
** r_m_128:
** add (r[0-9]+), r0, #1024
** ldrd r2, \[r0\]
** bx lr
*/
void
r_m_128 (double *r0)
{
register double r2 asm ("r2");
r2 = r0[128];
asm volatile ("" :: "r" (r2));
}
/* ??? This could be done in one instruction, but without mve.fp,
it makes more sense for memory_operand to enforce the GPR range. */
/*
** w_m_m32:
** sub (r[0-9]+), r0, #256
** vldr.64 d0, \[\1\]
** bx lr
*/
void
w_m_m32 (double *r0)
{
register double d0 asm ("d0");
d0 = r0[-32];
asm volatile ("" :: "w" (d0));
}
/*
** w_m_m31:
** vldr.64 d0, \[r0, #-248\]
** bx lr
*/
void
w_m_m31 (double *r0)
{
register double d0 asm ("d0");
d0 = r0[-31];
asm volatile ("" :: "w" (d0));
}
/*
** w_m_m1:
** vldr.64 d0, \[r0, #-8\]
** bx lr
*/
void
w_m_m1 (double *r0)
{
register double d0 asm ("d0");
d0 = r0[-1];
asm volatile ("" :: "w" (d0));
}
/*
** w_m_0:
** vldr.64 d0, \[r0\]
** bx lr
*/
void
w_m_0 (double *r0)
{
register double d0 asm ("d0");
d0 = r0[0];
asm volatile ("" :: "w" (d0));
}
/*
** w_m_1:
** vldr.64 d0, \[r0, #8\]
** bx lr
*/
void
w_m_1 (double *r0)
{
register double d0 asm ("d0");
d0 = r0[1];
asm volatile ("" :: "w" (d0));
}
/*
** w_m_127:
** vldr.64 d0, \[r0, #1016\]
** bx lr
*/
void
w_m_127 (double *r0)
{
register double d0 asm ("d0");
d0 = r0[127];
asm volatile ("" :: "w" (d0));
}
/*
** w_m_128:
** add (r[0-9]+), r0, #1024
** vldr.64 d0, \[\1\]
** bx lr
*/
void
w_m_128 (double *r0)
{
register double d0 asm ("d0");
d0 = r0[128];
asm volatile ("" :: "w" (d0));
}
/*
** m_m32_r:
** sub (r[0-9]+), r0, #256
** strd r2, \[\1\]
** bx lr
*/
void
m_m32_r (double *r0)
{
register double r2 asm ("r2");
asm volatile ("" : "=r" (r2));
r0[-32] = r2;
}
/*
** m_m31_r:
** strd r2, \[r0, #-248\]
** bx lr
*/
void
m_m31_r (double *r0)
{
register double r2 asm ("r2");
asm volatile ("" : "=r" (r2));
r0[-31] = r2;
}
/*
** m_m1_r:
** strd r2, \[r0, #-8\]
** bx lr
*/
void
m_m1_r (double *r0)
{
register double r2 asm ("r2");
asm volatile ("" : "=r" (r2));
r0[-1] = r2;
}
/*
** m_0_r:
** strd r2, \[r0\]
** bx lr
*/
void
m_0_r (double *r0)
{
register double r2 asm ("r2");
asm volatile ("" : "=r" (r2));
r0[0] = r2;
}
/*
** m_1_r:
** strd r2, \[r0, #8\]
** bx lr
*/
void
m_1_r (double *r0)
{
register double r2 asm ("r2");
asm volatile ("" : "=r" (r2));
r0[1] = r2;
}
/*
** m_127_r:
** strd r2, \[r0, #1016\]
** bx lr
*/
void
m_127_r (double *r0)
{
register double r2 asm ("r2");
asm volatile ("" : "=r" (r2));
r0[127] = r2;
}
/*
** m_128_r:
** add (r[0-9]+), r0, #1024
** strd r2, \[r0\]
** bx lr
*/
void
m_128_r (double *r0)
{
register double r2 asm ("r2");
asm volatile ("" : "=r" (r2));
r0[128] = r2;
}
/* ??? This could be done in one instruction, but without mve.fp,
it makes more sense for memory_operand to enforce the GPR range. */
/*
** m_m32_w:
** sub (r[0-9]+), r0, #256
** vstr.64 d0, \[\1\]
** bx lr
*/
void
m_m32_w (double *r0)
{
register double d0 asm ("d0");
asm volatile ("" : "=w" (d0));
r0[-32] = d0;
}
/*
** m_m31_w:
** vstr.64 d0, \[r0, #-248\]
** bx lr
*/
void
m_m31_w (double *r0)
{
register double d0 asm ("d0");
asm volatile ("" : "=w" (d0));
r0[-31] = d0;
}
/*
** m_m1_w:
** vstr.64 d0, \[r0, #-8\]
** bx lr
*/
void
m_m1_w (double *r0)
{
register double d0 asm ("d0");
asm volatile ("" : "=w" (d0));
r0[-1] = d0;
}
/*
** m_0_w:
** vstr.64 d0, \[r0\]
** bx lr
*/
void
m_0_w (double *r0)
{
register double d0 asm ("d0");
asm volatile ("" : "=w" (d0));
r0[0] = d0;
}
/*
** m_1_w:
** vstr.64 d0, \[r0, #8\]
** bx lr
*/
void
m_1_w (double *r0)
{
register double d0 asm ("d0");
asm volatile ("" : "=w" (d0));
r0[1] = d0;
}
/*
** m_127_w:
** vstr.64 d0, \[r0, #1016\]
** bx lr
*/
void
m_127_w (double *r0)
{
register double d0 asm ("d0");
asm volatile ("" : "=w" (d0));
r0[127] = d0;
}
/*
** m_128_w:
** add (r[0-9]+), r0, #1024
** vstr.64 d0, \[\1\]
** bx lr
*/
void
m_128_w (double *r0)
{
register double d0 asm ("d0");
asm volatile ("" : "=w" (d0));
r0[128] = d0;
}

View file

@ -13,5 +13,6 @@ fn1 (__fp16 *pSrc)
pDst[i] = high;
}
/* { dg-final { scan-assembler {vldr\.16\ts[0-9]+, \[r[0-9]+\]\n} } } */
/* { dg-final { scan-assembler {vstr\.16\ts[0-9]+, \[r[0-9]+\]\n} } } */
/* { dg-final { scan-assembler {vldr\.16\ts[0-9]+, \[r[0-9]+(, #-?[0-9]+)?\]\n} } } */
/* { dg-final { scan-assembler-not {vldr\.16\t[^\n]*\]!} } } */
/* { dg-final { scan-assembler-not {vstr\.16\t[^\n]*\]!} } } */