[AArch64] Add support for 64-bit vector-mode ldp/stp
* config/aarch64/aarch64.c (aarch64_mode_valid_for_sched_fusion_p): New function. (fusion_load_store): Use it. * config/aarch64/aarch64-ldpstp.md: Add new peephole2s for ldp and stp in VD modes. * config/aarch64/aarch64-simd.md (load_pair<mode>, VD): New pattern. (store_pair<mode>, VD): Likewise. * gcc.target/aarch64/stp_vec_64_1.c: New test. * gcc.target/aarch64/ldp_vec_64_1.c: Likewise. From-SVN: r229094
This commit is contained in:
parent
361efe0562
commit
abc5231831
7 changed files with 135 additions and 2 deletions
|
@ -1,3 +1,13 @@
|
|||
2015-10-20 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
|
||||
|
||||
* config/aarch64/aarch64.c (aarch64_mode_valid_for_sched_fusion_p):
|
||||
New function.
|
||||
(fusion_load_store): Use it.
|
||||
* config/aarch64/aarch64-ldpstp.md: Add new peephole2s for
|
||||
ldp and stp in VD modes.
|
||||
* config/aarch64/aarch64-simd.md (load_pair<mode>, VD): New pattern.
|
||||
(store_pair<mode>, VD): Likewise.
|
||||
|
||||
2015-10-20 Vladimir Makarov <vmakarov@redhat.com>
|
||||
|
||||
PR rtl-optimization/67609
|
||||
|
|
|
@ -98,6 +98,47 @@
|
|||
}
|
||||
})
|
||||
|
||||
(define_peephole2
|
||||
[(set (match_operand:VD 0 "register_operand" "")
|
||||
(match_operand:VD 1 "aarch64_mem_pair_operand" ""))
|
||||
(set (match_operand:VD 2 "register_operand" "")
|
||||
(match_operand:VD 3 "memory_operand" ""))]
|
||||
"aarch64_operands_ok_for_ldpstp (operands, true, <MODE>mode)"
|
||||
[(parallel [(set (match_dup 0) (match_dup 1))
|
||||
(set (match_dup 2) (match_dup 3))])]
|
||||
{
|
||||
rtx base, offset_1, offset_2;
|
||||
|
||||
extract_base_offset_in_addr (operands[1], &base, &offset_1);
|
||||
extract_base_offset_in_addr (operands[3], &base, &offset_2);
|
||||
if (INTVAL (offset_1) > INTVAL (offset_2))
|
||||
{
|
||||
std::swap (operands[0], operands[2]);
|
||||
std::swap (operands[1], operands[3]);
|
||||
}
|
||||
})
|
||||
|
||||
(define_peephole2
|
||||
[(set (match_operand:VD 0 "aarch64_mem_pair_operand" "")
|
||||
(match_operand:VD 1 "register_operand" ""))
|
||||
(set (match_operand:VD 2 "memory_operand" "")
|
||||
(match_operand:VD 3 "register_operand" ""))]
|
||||
"TARGET_SIMD && aarch64_operands_ok_for_ldpstp (operands, false, <MODE>mode)"
|
||||
[(parallel [(set (match_dup 0) (match_dup 1))
|
||||
(set (match_dup 2) (match_dup 3))])]
|
||||
{
|
||||
rtx base, offset_1, offset_2;
|
||||
|
||||
extract_base_offset_in_addr (operands[0], &base, &offset_1);
|
||||
extract_base_offset_in_addr (operands[2], &base, &offset_2);
|
||||
if (INTVAL (offset_1) > INTVAL (offset_2))
|
||||
{
|
||||
std::swap (operands[0], operands[2]);
|
||||
std::swap (operands[1], operands[3]);
|
||||
}
|
||||
})
|
||||
|
||||
|
||||
;; Handle sign/zero extended consecutive load/store.
|
||||
|
||||
(define_peephole2
|
||||
|
|
|
@ -153,6 +153,34 @@
|
|||
(set_attr "length" "4,4,4,8,8,8,4")]
|
||||
)
|
||||
|
||||
(define_insn "load_pair<mode>"
|
||||
[(set (match_operand:VD 0 "register_operand" "=w")
|
||||
(match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
|
||||
(set (match_operand:VD 2 "register_operand" "=w")
|
||||
(match_operand:VD 3 "memory_operand" "m"))]
|
||||
"TARGET_SIMD
|
||||
&& rtx_equal_p (XEXP (operands[3], 0),
|
||||
plus_constant (Pmode,
|
||||
XEXP (operands[1], 0),
|
||||
GET_MODE_SIZE (<MODE>mode)))"
|
||||
"ldp\\t%d0, %d2, %1"
|
||||
[(set_attr "type" "neon_ldp")]
|
||||
)
|
||||
|
||||
(define_insn "store_pair<mode>"
|
||||
[(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
|
||||
(match_operand:VD 1 "register_operand" "w"))
|
||||
(set (match_operand:VD 2 "memory_operand" "=m")
|
||||
(match_operand:VD 3 "register_operand" "w"))]
|
||||
"TARGET_SIMD
|
||||
&& rtx_equal_p (XEXP (operands[2], 0),
|
||||
plus_constant (Pmode,
|
||||
XEXP (operands[0], 0),
|
||||
GET_MODE_SIZE (<MODE>mode)))"
|
||||
"stp\\t%d1, %d3, %0"
|
||||
[(set_attr "type" "neon_stp")]
|
||||
)
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:VQ 0 "register_operand" "")
|
||||
(match_operand:VQ 1 "register_operand" ""))]
|
||||
|
|
|
@ -3468,6 +3468,18 @@ offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
|
|||
&& offset % GET_MODE_SIZE (mode) == 0);
|
||||
}
|
||||
|
||||
/* Return true if MODE is one of the modes for which we
|
||||
support LDP/STP operations. */
|
||||
|
||||
static bool
|
||||
aarch64_mode_valid_for_sched_fusion_p (machine_mode mode)
|
||||
{
|
||||
return mode == SImode || mode == DImode
|
||||
|| mode == SFmode || mode == DFmode
|
||||
|| (aarch64_vector_mode_supported_p (mode)
|
||||
&& GET_MODE_SIZE (mode) == 8);
|
||||
}
|
||||
|
||||
/* Return true if X is a valid address for machine mode MODE. If it is,
|
||||
fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
|
||||
effect. OUTER_CODE is PARALLEL for a load/store pair. */
|
||||
|
@ -12813,8 +12825,9 @@ fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
|
|||
src = SET_SRC (x);
|
||||
dest = SET_DEST (x);
|
||||
|
||||
if (GET_MODE (dest) != SImode && GET_MODE (dest) != DImode
|
||||
&& GET_MODE (dest) != SFmode && GET_MODE (dest) != DFmode)
|
||||
machine_mode dest_mode = GET_MODE (dest);
|
||||
|
||||
if (!aarch64_mode_valid_for_sched_fusion_p (dest_mode))
|
||||
return SCHED_FUSION_NONE;
|
||||
|
||||
if (GET_CODE (src) == SIGN_EXTEND)
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
2015-10-20 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
|
||||
|
||||
* gcc.target/aarch64/stp_vec_64_1.c: New test.
|
||||
* gcc.target/aarch64/ldp_vec_64_1.c: Likewise.
|
||||
|
||||
2015-10-20 Alan Lawrence <alan.lawrence@arm.com>
|
||||
|
||||
* lib/target-supports.exp (check_effective_target_vect64): Add AArch64.
|
||||
|
|
16
gcc/testsuite/gcc.target/aarch64/ldp_vec_64_1.c
Normal file
16
gcc/testsuite/gcc.target/aarch64/ldp_vec_64_1.c
Normal file
|
@ -0,0 +1,16 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-Ofast" } */
|
||||
|
||||
typedef int int32x2_t __attribute__ ((__vector_size__ ((8))));
|
||||
|
||||
void
|
||||
foo (int32x2_t *foo, int32x2_t *bar)
|
||||
{
|
||||
int i = 0;
|
||||
int32x2_t val = { 3, 2 };
|
||||
|
||||
for (i = 0; i < 1024; i+=2)
|
||||
foo[i] = bar[i] + bar[i + 1];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "ldp\td\[0-9\]+, d\[0-9\]" } } */
|
20
gcc/testsuite/gcc.target/aarch64/stp_vec_64_1.c
Normal file
20
gcc/testsuite/gcc.target/aarch64/stp_vec_64_1.c
Normal file
|
@ -0,0 +1,20 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-Ofast" } */
|
||||
|
||||
|
||||
typedef int int32x2_t __attribute__ ((__vector_size__ ((8))));
|
||||
|
||||
void
|
||||
bar (int32x2_t *foo)
|
||||
{
|
||||
int i = 0;
|
||||
int32x2_t val = { 3, 2 };
|
||||
|
||||
for (i = 0; i < 256; i+=2)
|
||||
{
|
||||
foo[i] = val;
|
||||
foo[i+1] = val;
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "stp\td\[0-9\]+, d\[0-9\]" } } */
|
Loading…
Add table
Reference in a new issue