[AArch64] Expand DImode constant stores to two SImode stores when profitable
* config/aarch64/aarch64.md (mov<mode>): Call aarch64_split_dimode_const_store on DImode constant stores. * config/aarch64/aarch64-protos.h (aarch64_split_dimode_const_store): New prototype. * config/aarch64/aarch64.c (aarch64_split_dimode_const_store): New function. * gcc.target/aarch64/store_repeating_constant_1.c: New test. * gcc.target/aarch64/store_repeating_constant_2.c: Likewise. From-SVN: r242551
This commit is contained in:
parent
54e63f0028
commit
141a3ccff1
7 changed files with 103 additions and 0 deletions
|
@ -1,3 +1,12 @@
|
|||
2016-11-17 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
|
||||
|
||||
* config/aarch64/aarch64.md (mov<mode>): Call
|
||||
aarch64_split_dimode_const_store on DImode constant stores.
|
||||
* config/aarch64/aarch64-protos.h (aarch64_split_dimode_const_store):
|
||||
New prototype.
|
||||
* config/aarch64/aarch64.c (aarch64_split_dimode_const_store): New
|
||||
function.
|
||||
|
||||
2016-11-17 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
|
||||
Richard Biener <rguenther@suse.de>
|
||||
|
||||
|
|
|
@ -337,6 +337,7 @@ bool aarch64_simd_scalar_immediate_valid_for_move (rtx, machine_mode);
|
|||
bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool);
|
||||
bool aarch64_simd_valid_immediate (rtx, machine_mode, bool,
|
||||
struct simd_immediate_info *);
|
||||
bool aarch64_split_dimode_const_store (rtx, rtx);
|
||||
bool aarch64_symbolic_address_p (rtx);
|
||||
bool aarch64_uimm12_shift (HOST_WIDE_INT);
|
||||
bool aarch64_use_return_insn_p (void);
|
||||
|
|
|
@ -13211,6 +13211,63 @@ aarch64_expand_movmem (rtx *operands)
|
|||
return true;
|
||||
}
|
||||
|
||||
/* Split a DImode store of a CONST_INT SRC to MEM DST as two
|
||||
SImode stores. Handle the case when the constant has identical
|
||||
bottom and top halves. This is beneficial when the two stores can be
|
||||
merged into an STP and we avoid synthesising potentially expensive
|
||||
immediates twice. Return true if such a split is possible. */
|
||||
|
||||
bool
|
||||
aarch64_split_dimode_const_store (rtx dst, rtx src)
|
||||
{
|
||||
rtx lo = gen_lowpart (SImode, src);
|
||||
rtx hi = gen_highpart_mode (SImode, DImode, src);
|
||||
|
||||
bool size_p = optimize_function_for_size_p (cfun);
|
||||
|
||||
if (!rtx_equal_p (lo, hi))
|
||||
return false;
|
||||
|
||||
unsigned int orig_cost
|
||||
= aarch64_internal_mov_immediate (NULL_RTX, src, false, DImode);
|
||||
unsigned int lo_cost
|
||||
= aarch64_internal_mov_immediate (NULL_RTX, lo, false, SImode);
|
||||
|
||||
/* We want to transform:
|
||||
MOV x1, 49370
|
||||
MOVK x1, 0x140, lsl 16
|
||||
MOVK x1, 0xc0da, lsl 32
|
||||
MOVK x1, 0x140, lsl 48
|
||||
STR x1, [x0]
|
||||
into:
|
||||
MOV w1, 49370
|
||||
MOVK w1, 0x140, lsl 16
|
||||
STP w1, w1, [x0]
|
||||
So we want to perform this only when we save two instructions
|
||||
or more. When optimizing for size, however, accept any code size
|
||||
savings we can. */
|
||||
if (size_p && orig_cost <= lo_cost)
|
||||
return false;
|
||||
|
||||
if (!size_p
|
||||
&& (orig_cost <= lo_cost + 1))
|
||||
return false;
|
||||
|
||||
rtx mem_lo = adjust_address (dst, SImode, 0);
|
||||
if (!aarch64_mem_pair_operand (mem_lo, SImode))
|
||||
return false;
|
||||
|
||||
rtx tmp_reg = gen_reg_rtx (SImode);
|
||||
aarch64_expand_mov_immediate (tmp_reg, lo);
|
||||
rtx mem_hi = aarch64_move_pointer (mem_lo, GET_MODE_SIZE (SImode));
|
||||
/* Don't emit an explicit store pair as this may not be always profitable.
|
||||
Let the sched-fusion logic decide whether to merge them. */
|
||||
emit_move_insn (mem_lo, tmp_reg);
|
||||
emit_move_insn (mem_hi, tmp_reg);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
|
||||
|
||||
static unsigned HOST_WIDE_INT
|
||||
|
|
|
@ -1011,6 +1011,11 @@
|
|||
(match_operand:GPI 1 "general_operand" ""))]
|
||||
""
|
||||
"
|
||||
if (MEM_P (operands[0]) && CONST_INT_P (operands[1])
|
||||
&& <MODE>mode == DImode
|
||||
&& aarch64_split_dimode_const_store (operands[0], operands[1]))
|
||||
DONE;
|
||||
|
||||
if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx)
|
||||
operands[1] = force_reg (<MODE>mode, operands[1]);
|
||||
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
2016-11-17 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
|
||||
|
||||
* gcc.target/aarch64/store_repeating_constant_1.c: New test.
|
||||
* gcc.target/aarch64/store_repeating_constant_2.c: Likewise.
|
||||
|
||||
2016-11-17 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
|
||||
Richard Biener <rguenther@suse.de>
|
||||
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mtune=generic" } */
|
||||
|
||||
void
|
||||
foo (unsigned long long *a)
|
||||
{
|
||||
a[0] = 0x0140c0da0140c0daULL;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "movk\\tw.*" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "stp\tw\[0-9\]+, w\[0-9\]+.*" 1 } } */
|
|
@ -0,0 +1,15 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-Os" } */
|
||||
|
||||
/* Check that for -Os we synthesize only the bottom half and then
|
||||
store it twice with an STP rather than synthesizing it twice in each
|
||||
half of an X-reg. */
|
||||
|
||||
void
|
||||
foo (unsigned long long *a)
|
||||
{
|
||||
a[0] = 0xc0da0000c0daULL;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "mov\\tw.*" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "stp\tw\[0-9\]+, w\[0-9\]+.*" 1 } } */
|
Loading…
Add table
Reference in a new issue