expr.h (move_by_pieces_ninsns): Declare.
gcc/ * expr.h (move_by_pieces_ninsns): Declare. * expr.c (move_by_pieces_ninsns): Make external. * config/mips/mips-protos.h (mips_move_by_pieces_p): Declare. (mips_store_by_pieces_p): Likewise. * config/mips/mips.h (MOVE_BY_PIECES_P): Call mips_move_by_pieces_p. (STORE_BY_PIECES_P): Likewise mips_store_by_pieces_p. * config/mips/mips.c (mips_move_by_pieces_p): New function. (mips_store_by_pieces_p): Likewise. gcc/testsuite/ * gcc.dg/memcpy-4.c: Add nomips16 attribute for MIPS targets. Increase copy to 5 bytes. Look for at least two "mem/s/u"s, rather than a specific number. From-SVN: r182801
This commit is contained in:
parent
9f929ce61f
commit
0d8f5d625f
8 changed files with 119 additions and 33 deletions
|
@ -1,3 +1,14 @@
|
|||
2012-01-02 Richard Sandiford <rdsandiford@googlemail.com>
|
||||
|
||||
* expr.h (move_by_pieces_ninsns): Declare.
|
||||
* expr.c (move_by_pieces_ninsns): Make external.
|
||||
* config/mips/mips-protos.h (mips_move_by_pieces_p): Declare.
|
||||
(mips_store_by_pieces_p): Likewise.
|
||||
* config/mips/mips.h (MOVE_BY_PIECES_P): Call mips_move_by_pieces_p.
|
||||
(STORE_BY_PIECES_P): Likewise mips_store_by_pieces_p.
|
||||
* config/mips/mips.c (mips_move_by_pieces_p): New function.
|
||||
(mips_store_by_pieces_p): Likewise.
|
||||
|
||||
2012-01-02 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* passes.c (register_one_dump_file): Free full_name.
|
||||
|
|
|
@ -239,6 +239,8 @@ extern void mips_split_call (rtx, rtx);
|
|||
extern bool mips_get_pic_call_symbol (rtx *, int);
|
||||
extern void mips_expand_fcc_reload (rtx, rtx, rtx);
|
||||
extern void mips_set_return_address (rtx, rtx);
|
||||
extern bool mips_move_by_pieces_p (unsigned HOST_WIDE_INT, unsigned int);
|
||||
extern bool mips_store_by_pieces_p (unsigned HOST_WIDE_INT, unsigned int);
|
||||
extern bool mips_expand_block_move (rtx, rtx, rtx);
|
||||
extern void mips_expand_synci_loop (rtx, rtx);
|
||||
|
||||
|
|
|
@ -6537,6 +6537,92 @@ mips_expand_fcc_reload (rtx dest, rtx src, rtx scratch)
|
|||
emit_insn (gen_slt_sf (dest, fp2, fp1));
|
||||
}
|
||||
|
||||
/* Implement MOVE_BY_PIECES_P. */
|
||||
|
||||
bool
|
||||
mips_move_by_pieces_p (unsigned HOST_WIDE_INT size, unsigned int align)
|
||||
{
|
||||
if (HAVE_movmemsi)
|
||||
{
|
||||
/* movmemsi is meant to generate code that is at least as good as
|
||||
move_by_pieces. However, movmemsi effectively uses a by-pieces
|
||||
implementation both for moves smaller than a word and for
|
||||
word-aligned moves of no more than MIPS_MAX_MOVE_BYTES_STRAIGHT
|
||||
bytes. We should allow the tree-level optimisers to do such
|
||||
moves by pieces, as it often exposes other optimization
|
||||
opportunities. We might as well continue to use movmemsi at
|
||||
the rtl level though, as it produces better code when
|
||||
scheduling is disabled (such as at -O). */
|
||||
if (currently_expanding_to_rtl)
|
||||
return false;
|
||||
if (align < BITS_PER_WORD)
|
||||
return size < UNITS_PER_WORD;
|
||||
return size <= MIPS_MAX_MOVE_BYTES_STRAIGHT;
|
||||
}
|
||||
/* The default value. If this becomes a target hook, we should
|
||||
call the default definition instead. */
|
||||
return (move_by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1)
|
||||
< (unsigned int) MOVE_RATIO (optimize_insn_for_speed_p ()));
|
||||
}
|
||||
|
||||
/* Implement STORE_BY_PIECES_P. */
|
||||
|
||||
bool
|
||||
mips_store_by_pieces_p (unsigned HOST_WIDE_INT size, unsigned int align)
|
||||
{
|
||||
/* Storing by pieces involves moving constants into registers
|
||||
of size MIN (ALIGN, BITS_PER_WORD), then storing them.
|
||||
We need to decide whether it is cheaper to load the address of
|
||||
constant data into a register and use a block move instead. */
|
||||
|
||||
/* If the data is only byte aligned, then:
|
||||
|
||||
(a1) A block move of less than 4 bytes would involve three 3 LBs and
|
||||
3 SBs. We might as well use 3 single-instruction LIs and 3 SBs
|
||||
instead.
|
||||
|
||||
(a2) A block move of 4 bytes from aligned source data can use an
|
||||
LW/SWL/SWR sequence. This is often better than the 4 LIs and
|
||||
4 SBs that we would generate when storing by pieces. */
|
||||
if (align <= BITS_PER_UNIT)
|
||||
return size < 4;
|
||||
|
||||
/* If the data is 2-byte aligned, then:
|
||||
|
||||
(b1) A block move of less than 4 bytes would use a combination of LBs,
|
||||
LHs, SBs and SHs. We get better code by using single-instruction
|
||||
LIs, SBs and SHs instead.
|
||||
|
||||
(b2) A block move of 4 bytes from aligned source data would again use
|
||||
an LW/SWL/SWR sequence. In most cases, loading the address of
|
||||
the source data would require at least one extra instruction.
|
||||
It is often more efficient to use 2 single-instruction LIs and
|
||||
2 SHs instead.
|
||||
|
||||
(b3) A block move of up to 3 additional bytes would be like (b1).
|
||||
|
||||
(b4) A block move of 8 bytes from aligned source data can use two
|
||||
LW/SWL/SWR sequences or a single LD/SDL/SDR sequence. Both
|
||||
sequences are better than the 4 LIs and 4 SHs that we'd generate
|
||||
when storing by pieces.
|
||||
|
||||
The reasoning for higher alignments is similar:
|
||||
|
||||
(c1) A block move of less than 4 bytes would be the same as (b1).
|
||||
|
||||
(c2) A block move of 4 bytes would use an LW/SW sequence. Again,
|
||||
loading the address of the source data would typically require
|
||||
at least one extra instruction. It is generally better to use
|
||||
LUI/ORI/SW instead.
|
||||
|
||||
(c3) A block move of up to 3 additional bytes would be like (b1).
|
||||
|
||||
(c4) A block move of 8 bytes can use two LW/SW sequences or a single
|
||||
LD/SD sequence, and in these cases we've traditionally preferred
|
||||
the memory copy over the more bulky constant moves. */
|
||||
return size < 8;
|
||||
}
|
||||
|
||||
/* Emit straight-line code to move LENGTH bytes from SRC to DEST.
|
||||
Assume that the areas do not overlap. */
|
||||
|
||||
|
|
|
@ -2782,23 +2782,8 @@ while (0)
|
|||
? MIPS_MAX_MOVE_BYTES_STRAIGHT / MOVE_MAX \
|
||||
: MIPS_CALL_RATIO / 2)
|
||||
|
||||
/* movmemsi is meant to generate code that is at least as good as
|
||||
move_by_pieces. However, movmemsi effectively uses a by-pieces
|
||||
implementation both for moves smaller than a word and for word-aligned
|
||||
moves of no more than MIPS_MAX_MOVE_BYTES_STRAIGHT bytes. We should
|
||||
allow the tree-level optimisers to do such moves by pieces, as it
|
||||
often exposes other optimization opportunities. We might as well
|
||||
continue to use movmemsi at the rtl level though, as it produces
|
||||
better code when scheduling is disabled (such as at -O). */
|
||||
|
||||
#define MOVE_BY_PIECES_P(SIZE, ALIGN) \
|
||||
(HAVE_movmemsi \
|
||||
? (!currently_expanding_to_rtl \
|
||||
&& ((ALIGN) < BITS_PER_WORD \
|
||||
? (SIZE) < UNITS_PER_WORD \
|
||||
: (SIZE) <= MIPS_MAX_MOVE_BYTES_STRAIGHT)) \
|
||||
: (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \
|
||||
< (unsigned int) MOVE_RATIO (false)))
|
||||
#define MOVE_BY_PIECES_P(SIZE, ALIGN) \
|
||||
mips_move_by_pieces_p (SIZE, ALIGN)
|
||||
|
||||
/* For CLEAR_RATIO, when optimizing for size, give a better estimate
|
||||
of the length of a memset call, but use the default otherwise. */
|
||||
|
@ -2813,16 +2798,8 @@ while (0)
|
|||
#define SET_RATIO(speed) \
|
||||
((speed) ? 15 : MIPS_CALL_RATIO - 2)
|
||||
|
||||
/* STORE_BY_PIECES_P can be used when copying a constant string, but
|
||||
in that case each word takes 3 insns (lui, ori, sw), or more in
|
||||
64-bit mode, instead of 2 (lw, sw). For now we always fail this
|
||||
and let the move_by_pieces code copy the string from read-only
|
||||
memory. In the future, this could be tuned further for multi-issue
|
||||
CPUs that can issue stores down one pipe and arithmetic instructions
|
||||
down another; in that case, the lui/ori/sw combination would be a
|
||||
win for long enough strings. */
|
||||
|
||||
#define STORE_BY_PIECES_P(SIZE, ALIGN) 0
|
||||
#define STORE_BY_PIECES_P(SIZE, ALIGN) \
|
||||
mips_store_by_pieces_p (SIZE, ALIGN)
|
||||
|
||||
#ifndef __mips16
|
||||
/* Since the bits of the _init and _fini function is spread across
|
||||
|
|
|
@ -123,9 +123,6 @@ struct store_by_pieces_d
|
|||
int reverse;
|
||||
};
|
||||
|
||||
static unsigned HOST_WIDE_INT move_by_pieces_ninsns (unsigned HOST_WIDE_INT,
|
||||
unsigned int,
|
||||
unsigned int);
|
||||
static void move_by_pieces_1 (rtx (*) (rtx, ...), enum machine_mode,
|
||||
struct move_by_pieces_d *);
|
||||
static bool block_move_libcall_safe_for_call_parm (void);
|
||||
|
@ -1016,7 +1013,7 @@ move_by_pieces (rtx to, rtx from, unsigned HOST_WIDE_INT len,
|
|||
/* Return number of insns required to move L bytes by pieces.
|
||||
ALIGN (in bits) is maximum alignment we can assume. */
|
||||
|
||||
static unsigned HOST_WIDE_INT
|
||||
unsigned HOST_WIDE_INT
|
||||
move_by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned int align,
|
||||
unsigned int max_size)
|
||||
{
|
||||
|
|
|
@ -367,6 +367,10 @@ extern bool set_storage_via_setmem (rtx, rtx, rtx, unsigned int,
|
|||
succeed. */
|
||||
extern int can_move_by_pieces (unsigned HOST_WIDE_INT, unsigned int);
|
||||
|
||||
extern unsigned HOST_WIDE_INT move_by_pieces_ninsns (unsigned HOST_WIDE_INT,
|
||||
unsigned int,
|
||||
unsigned int);
|
||||
|
||||
/* Return nonzero if it is desirable to store LEN bytes generated by
|
||||
CONSTFUN with several move instructions by store_by_pieces
|
||||
function. CONSTFUNDATA is a pointer which will be passed as argument
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
2012-01-02 Richard Sandiford <rdsandiford@googlemail.com>
|
||||
|
||||
* gcc.dg/memcpy-4.c: Add nomips16 attribute for MIPS targets.
|
||||
Increase copy to 5 bytes. Look for at least two "mem/s/u"s,
|
||||
rather than a specific number.
|
||||
|
||||
2012-01-02 Paul Thomas <pault@gcc.gnu.org>
|
||||
|
||||
PR fortran/46262
|
||||
|
|
|
@ -1,11 +1,14 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-rtl-expand" } */
|
||||
|
||||
#ifdef __mips
|
||||
__attribute__((nomips16))
|
||||
#endif
|
||||
void
|
||||
f1 (char *p)
|
||||
{
|
||||
__builtin_memcpy (p, "123", 3);
|
||||
__builtin_memcpy (p, "12345", 5);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-rtl-dump-times "mem/s/u" 3 "expand" { target mips*-*-* } } } */
|
||||
/* { dg-final { scan-rtl-dump "mem/s/u.*mem/s/u" "expand" { target mips*-*-* } } } */
|
||||
/* { dg-final { cleanup-rtl-dump "expand" } } */
|
||||
|
|
Loading…
Add table
Reference in a new issue