AArch64: Fix overflow in memcopy expansion on aarch64.
Currently the inline memcpy expansion code for AArch64 is using a signed int to hold the number of elements to copy. When you giver give it a value larger than INT_MAX it will overflow. The overflow causes the maximum number of instructions we want to expand to check to fail since this assumes an unsigned number. This patch changes the maximum isns arithmetic to be unsigned HOST_WIDE_INT. note that the calculation *must* remained signed as the memcopy issues overlapping unaligned copies. This means the pointer must be moved back and so you need signed arithmetic. gcc/ChangeLog: PR target/97535 * config/aarch64/aarch64.c (aarch64_expand_cpymem): Use unsigned arithmetic in check. gcc/testsuite/ChangeLog: PR target/97535 * gcc.target/aarch64/pr97535.c: New test.
This commit is contained in:
parent
8eb8dcac6e
commit
0f801e0b6c
2 changed files with 25 additions and 4 deletions
|
@ -21299,6 +21299,8 @@ aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
|
|||
bool
|
||||
aarch64_expand_cpymem (rtx *operands)
|
||||
{
|
||||
/* These need to be signed as we need to perform arithmetic on n as
|
||||
signed operations. */
|
||||
int n, mode_bits;
|
||||
rtx dst = operands[0];
|
||||
rtx src = operands[1];
|
||||
|
@ -21309,21 +21311,24 @@ aarch64_expand_cpymem (rtx *operands)
|
|||
/* When optimizing for size, give a better estimate of the length of a
|
||||
memcpy call, but use the default otherwise. Moves larger than 8 bytes
|
||||
will always require an even number of instructions to do now. And each
|
||||
operation requires both a load+store, so devide the max number by 2. */
|
||||
int max_num_moves = (speed_p ? 16 : AARCH64_CALL_RATIO) / 2;
|
||||
operation requires both a load+store, so divide the max number by 2. */
|
||||
unsigned int max_num_moves = (speed_p ? 16 : AARCH64_CALL_RATIO) / 2;
|
||||
|
||||
/* We can't do anything smart if the amount to copy is not constant. */
|
||||
if (!CONST_INT_P (operands[2]))
|
||||
return false;
|
||||
|
||||
n = INTVAL (operands[2]);
|
||||
unsigned HOST_WIDE_INT tmp = INTVAL (operands[2]);
|
||||
|
||||
/* Try to keep the number of instructions low. For all cases we will do at
|
||||
most two moves for the residual amount, since we'll always overlap the
|
||||
remainder. */
|
||||
if (((n / 16) + (n % 16 ? 2 : 0)) > max_num_moves)
|
||||
if (((tmp / 16) + (tmp % 16 ? 2 : 0)) > max_num_moves)
|
||||
return false;
|
||||
|
||||
/* At this point tmp is known to have to fit inside an int. */
|
||||
n = tmp;
|
||||
|
||||
base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
|
||||
dst = adjust_automodify_address (dst, VOIDmode, base, 0);
|
||||
|
||||
|
|
16
gcc/testsuite/gcc.target/aarch64/pr97535.c
Normal file
16
gcc/testsuite/gcc.target/aarch64/pr97535.c
Normal file
|
@ -0,0 +1,16 @@
|
|||
/* { dg-do compile } */
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#define SIZE 2181038080
|
||||
|
||||
extern char raw_buffer[SIZE];
|
||||
|
||||
void setRaw(const void *raw)
|
||||
{
|
||||
memcpy(raw_buffer, raw, SIZE);
|
||||
}
|
||||
|
||||
/* At any optimization level this should be a function call
|
||||
and not inlined. */
|
||||
/* { dg-final { scan-assembler "bl\tmemcpy" } } */
|
Loading…
Add table
Reference in a new issue