Turn SLOW_UNALIGNED_ACCESS into a target hook
2017-09-12 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * defaults.h (SLOW_UNALIGNED_ACCESS): Delete. * target.def (slow_unaligned_access): New hook. * targhooks.h (default_slow_unaligned_access): Declare. * targhooks.c (default_slow_unaligned_access): New function. * doc/tm.texi.in (SLOW_UNALIGNED_ACCESS): Replace with... (TARGET_SLOW_UNALIGNED_ACCESS): ...this. * doc/tm.texi: Regenerate. * config/alpha/alpha.h (SLOW_UNALIGNED_ACCESS): Delete. * config/arm/arm.h (SLOW_UNALIGNED_ACCESS): Delete. * config/i386/i386.h (SLOW_UNALIGNED_ACCESS): Delete commented-out definition. * config/powerpcspe/powerpcspe.h (SLOW_UNALIGNED_ACCESS): Delete. * config/powerpcspe/powerpcspe.c (TARGET_SLOW_UNALIGNED_ACCESS): Redefine. (rs6000_slow_unaligned_access): New function. (rs6000_emit_move): Use it instead of SLOW_UNALIGNED_ACCESS. (expand_block_compare): Likewise. (expand_strn_compare): Likewise. (rs6000_rtx_costs): Likewise. * config/riscv/riscv.h (SLOW_UNALIGNED_ACCESS): Delete. (riscv_slow_unaligned_access): Likewise. * config/riscv/riscv.c (riscv_slow_unaligned_access): Rename to... (riscv_slow_unaligned_access_p): ...this and make static. (riscv_option_override): Update accordingly. (riscv_slow_unaligned_access): New function. (TARGET_SLOW_UNALIGNED_ACCESS): Redefine. * config/rs6000/rs6000.h (SLOW_UNALIGNED_ACCESS): Delete. * config/rs6000/rs6000.c (TARGET_SLOW_UNALIGNED_ACCESS): Redefine. (rs6000_slow_unaligned_access): New function. (rs6000_emit_move): Use it instead of SLOW_UNALIGNED_ACCESS. (rs6000_rtx_costs): Likewise. * config/rs6000/rs6000-string.c (expand_block_compare) (expand_strn_compare): Use targetm.slow_unaligned_access instead of SLOW_UNALIGNED_ACCESS. * config/tilegx/tilegx.h (SLOW_UNALIGNED_ACCESS): Delete. * config/tilepro/tilepro.h (SLOW_UNALIGNED_ACCESS): Delete. * calls.c (expand_call): Use targetm.slow_unaligned_access instead of SLOW_UNALIGNED_ACCESS. * expmed.c (simple_mem_bitfield_p): Likewise. * expr.c (alignment_for_piecewise_move): Likewise. (emit_group_load_1): Likewise. (emit_group_store): Likewise. (copy_blkmode_from_reg): Likewise. (emit_push_insn): Likewise. (expand_assignment): Likewise. (store_field): Likewise. (expand_expr_real_1): Likewise. * gimple-fold.c (gimple_fold_builtin_memory_op): Likewise. * lra-constraints.c (simplify_operand_subreg): Likewise. * stor-layout.c (bit_field_mode_iterator::next_mode): Likewise. * gimple-ssa-store-merging.c: Likewise in block comment at start of file. * tree-ssa-strlen.c: Include target.h. (handle_builtin_memcmp): Use targetm.slow_unaligned_access instead of SLOW_UNALIGNED_ACCESS. * system.h (SLOW_UNALIGNED_ACCESS): Poison. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r252009
This commit is contained in:
parent
41defab318
commit
e0bd6c9f0a
28 changed files with 204 additions and 142 deletions
|
@ -1,3 +1,64 @@
|
|||
2017-09-12 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
Alan Hayward <alan.hayward@arm.com>
|
||||
David Sherwood <david.sherwood@arm.com>
|
||||
|
||||
* defaults.h (SLOW_UNALIGNED_ACCESS): Delete.
|
||||
* target.def (slow_unaligned_access): New hook.
|
||||
* targhooks.h (default_slow_unaligned_access): Declare.
|
||||
* targhooks.c (default_slow_unaligned_access): New function.
|
||||
* doc/tm.texi.in (SLOW_UNALIGNED_ACCESS): Replace with...
|
||||
(TARGET_SLOW_UNALIGNED_ACCESS): ...this.
|
||||
* doc/tm.texi: Regenerate.
|
||||
* config/alpha/alpha.h (SLOW_UNALIGNED_ACCESS): Delete.
|
||||
* config/arm/arm.h (SLOW_UNALIGNED_ACCESS): Delete.
|
||||
* config/i386/i386.h (SLOW_UNALIGNED_ACCESS): Delete commented-out
|
||||
definition.
|
||||
* config/powerpcspe/powerpcspe.h (SLOW_UNALIGNED_ACCESS): Delete.
|
||||
* config/powerpcspe/powerpcspe.c (TARGET_SLOW_UNALIGNED_ACCESS):
|
||||
Redefine.
|
||||
(rs6000_slow_unaligned_access): New function.
|
||||
(rs6000_emit_move): Use it instead of SLOW_UNALIGNED_ACCESS.
|
||||
(expand_block_compare): Likewise.
|
||||
(expand_strn_compare): Likewise.
|
||||
(rs6000_rtx_costs): Likewise.
|
||||
* config/riscv/riscv.h (SLOW_UNALIGNED_ACCESS): Delete.
|
||||
(riscv_slow_unaligned_access): Likewise.
|
||||
* config/riscv/riscv.c (riscv_slow_unaligned_access): Rename to...
|
||||
(riscv_slow_unaligned_access_p): ...this and make static.
|
||||
(riscv_option_override): Update accordingly.
|
||||
(riscv_slow_unaligned_access): New function.
|
||||
(TARGET_SLOW_UNALIGNED_ACCESS): Redefine.
|
||||
* config/rs6000/rs6000.h (SLOW_UNALIGNED_ACCESS): Delete.
|
||||
* config/rs6000/rs6000.c (TARGET_SLOW_UNALIGNED_ACCESS): Redefine.
|
||||
(rs6000_slow_unaligned_access): New function.
|
||||
(rs6000_emit_move): Use it instead of SLOW_UNALIGNED_ACCESS.
|
||||
(rs6000_rtx_costs): Likewise.
|
||||
* config/rs6000/rs6000-string.c (expand_block_compare)
|
||||
(expand_strn_compare): Use targetm.slow_unaligned_access instead
|
||||
of SLOW_UNALIGNED_ACCESS.
|
||||
* config/tilegx/tilegx.h (SLOW_UNALIGNED_ACCESS): Delete.
|
||||
* config/tilepro/tilepro.h (SLOW_UNALIGNED_ACCESS): Delete.
|
||||
* calls.c (expand_call): Use targetm.slow_unaligned_access instead
|
||||
of SLOW_UNALIGNED_ACCESS.
|
||||
* expmed.c (simple_mem_bitfield_p): Likewise.
|
||||
* expr.c (alignment_for_piecewise_move): Likewise.
|
||||
(emit_group_load_1): Likewise.
|
||||
(emit_group_store): Likewise.
|
||||
(copy_blkmode_from_reg): Likewise.
|
||||
(emit_push_insn): Likewise.
|
||||
(expand_assignment): Likewise.
|
||||
(store_field): Likewise.
|
||||
(expand_expr_real_1): Likewise.
|
||||
* gimple-fold.c (gimple_fold_builtin_memory_op): Likewise.
|
||||
* lra-constraints.c (simplify_operand_subreg): Likewise.
|
||||
* stor-layout.c (bit_field_mode_iterator::next_mode): Likewise.
|
||||
* gimple-ssa-store-merging.c: Likewise in block comment at start
|
||||
of file.
|
||||
* tree-ssa-strlen.c: Include target.h.
|
||||
(handle_builtin_memcmp): Use targetm.slow_unaligned_access instead
|
||||
of SLOW_UNALIGNED_ACCESS.
|
||||
* system.h (SLOW_UNALIGNED_ACCESS): Poison.
|
||||
|
||||
2017-09-12 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
PR rtl-optimization/82185
|
||||
|
|
|
@ -3135,8 +3135,8 @@ expand_call (tree exp, rtx target, int ignore)
|
|||
&& target
|
||||
&& MEM_P (target)
|
||||
&& !(MEM_ALIGN (target) < TYPE_ALIGN (rettype)
|
||||
&& SLOW_UNALIGNED_ACCESS (TYPE_MODE (rettype),
|
||||
MEM_ALIGN (target))))
|
||||
&& targetm.slow_unaligned_access (TYPE_MODE (rettype),
|
||||
MEM_ALIGN (target))))
|
||||
structure_value_addr = XEXP (target, 0);
|
||||
else
|
||||
{
|
||||
|
|
|
@ -300,12 +300,6 @@ extern enum alpha_fp_trap_mode alpha_fptm;
|
|||
|
||||
#define STRICT_ALIGNMENT 1
|
||||
|
||||
/* Set this nonzero if unaligned move instructions are extremely slow.
|
||||
|
||||
On the Alpha, they trap. */
|
||||
|
||||
#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) 1
|
||||
|
||||
/* Standard register usage. */
|
||||
|
||||
/* Number of actual hardware registers.
|
||||
|
|
|
@ -1917,8 +1917,6 @@ enum arm_auto_incmodes
|
|||
/* Nonzero if access to memory by bytes is slow and undesirable. */
|
||||
#define SLOW_BYTE_ACCESS 0
|
||||
|
||||
#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) 1
|
||||
|
||||
/* Immediate shift counts are truncated by the output routines (or was it
|
||||
the assembler?). Shift counts in a register are truncated by ARM. Note
|
||||
that the native compiler puts too large (> 32) immediate shift counts
|
||||
|
|
|
@ -2017,20 +2017,6 @@ do { \
|
|||
/* Nonzero if access to memory by shorts is slow and undesirable. */
|
||||
#define SLOW_SHORT_ACCESS 0
|
||||
|
||||
/* Define this macro to be the value 1 if unaligned accesses have a
|
||||
cost many times greater than aligned accesses, for example if they
|
||||
are emulated in a trap handler.
|
||||
|
||||
When this macro is nonzero, the compiler will act as if
|
||||
`STRICT_ALIGNMENT' were nonzero when generating code for block
|
||||
moves. This can cause significantly more instructions to be
|
||||
produced. Therefore, do not set this macro nonzero if unaligned
|
||||
accesses only add a cycle or two to the time for a memory access.
|
||||
|
||||
If the value of this macro is always zero, it need not be defined. */
|
||||
|
||||
/* #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) 0 */
|
||||
|
||||
/* Define this macro if it is as good or better to call a constant
|
||||
function address than to call an address kept in a register.
|
||||
|
||||
|
|
|
@ -1986,6 +1986,9 @@ static const struct attribute_spec rs6000_attribute_table[] =
|
|||
#undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
|
||||
#define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
|
||||
rs6000_hard_regno_call_part_clobbered
|
||||
|
||||
#undef TARGET_SLOW_UNALIGNED_ACCESS
|
||||
#define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
|
||||
|
||||
|
||||
/* Processor table. */
|
||||
|
@ -8366,6 +8369,21 @@ rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
|
|||
return align;
|
||||
}
|
||||
|
||||
/* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
|
||||
instructions simply ignore the low bits; SPE vector memory
|
||||
instructions trap on unaligned accesses; VSX memory instructions are
|
||||
aligned to 4 or 8 bytes. */
|
||||
|
||||
static bool
|
||||
rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
|
||||
{
|
||||
return (STRICT_ALIGNMENT
|
||||
|| (!TARGET_EFFICIENT_UNALIGNED_VSX
|
||||
&& ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
|
||||
|| ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode))
|
||||
&& (int) align < VECTOR_ALIGN (mode)))));
|
||||
}
|
||||
|
||||
/* Previous GCC releases forced all vector types to have 16-byte alignment. */
|
||||
|
||||
bool
|
||||
|
@ -11015,13 +11033,14 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
|
|||
if (GET_CODE (operands[0]) == MEM
|
||||
&& GET_CODE (operands[1]) == MEM
|
||||
&& mode == DImode
|
||||
&& (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
|
||||
|| SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
|
||||
&& ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
|
||||
? 32 : MEM_ALIGN (operands[0])))
|
||||
|| SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
|
||||
? 32
|
||||
: MEM_ALIGN (operands[1]))))
|
||||
&& (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
|
||||
|| rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
|
||||
&& ! (rs6000_slow_unaligned_access (SImode,
|
||||
(MEM_ALIGN (operands[0]) > 32
|
||||
? 32 : MEM_ALIGN (operands[0])))
|
||||
|| rs6000_slow_unaligned_access (SImode,
|
||||
(MEM_ALIGN (operands[1]) > 32
|
||||
? 32 : MEM_ALIGN (operands[1]))))
|
||||
&& ! MEM_VOLATILE_P (operands [0])
|
||||
&& ! MEM_VOLATILE_P (operands [1]))
|
||||
{
|
||||
|
@ -19989,9 +20008,9 @@ expand_block_compare (rtx operands[])
|
|||
|
||||
unsigned int base_align = UINTVAL (align_rtx) / BITS_PER_UNIT;
|
||||
|
||||
/* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff. */
|
||||
if (SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src1))
|
||||
|| SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src2)))
|
||||
/* rs6000_slow_unaligned_access -- don't do unaligned stuff. */
|
||||
if (rs6000_slow_unaligned_access (word_mode, MEM_ALIGN (orig_src1))
|
||||
|| rs6000_slow_unaligned_access (word_mode, MEM_ALIGN (orig_src2)))
|
||||
return false;
|
||||
|
||||
gcc_assert (GET_MODE (target) == SImode);
|
||||
|
@ -20380,9 +20399,9 @@ expand_strn_compare (rtx operands[], int no_length)
|
|||
int align1 = MEM_ALIGN (orig_src1) / BITS_PER_UNIT;
|
||||
int align2 = MEM_ALIGN (orig_src2) / BITS_PER_UNIT;
|
||||
|
||||
/* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff. */
|
||||
if (SLOW_UNALIGNED_ACCESS (word_mode, align1)
|
||||
|| SLOW_UNALIGNED_ACCESS (word_mode, align2))
|
||||
/* rs6000_slow_unaligned_access -- don't do unaligned stuff. */
|
||||
if (rs6000_slow_unaligned_access (word_mode, align1)
|
||||
|| rs6000_slow_unaligned_access (word_mode, align2))
|
||||
return false;
|
||||
|
||||
gcc_assert (GET_MODE (target) == SImode);
|
||||
|
@ -37439,7 +37458,7 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
|
|||
than generating address, e.g., (plus (reg) (const)).
|
||||
L1 cache latency is about two instructions. */
|
||||
*total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
|
||||
if (SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (x)))
|
||||
if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
|
||||
*total += COSTS_N_INSNS (100);
|
||||
return true;
|
||||
|
||||
|
|
|
@ -998,20 +998,6 @@ enum data_align { align_abi, align_opt, align_both };
|
|||
/* Nonzero if move instructions will actually fail to work
|
||||
when given unaligned data. */
|
||||
#define STRICT_ALIGNMENT 0
|
||||
|
||||
/* Define this macro to be the value 1 if unaligned accesses have a cost
|
||||
many times greater than aligned accesses, for example if they are
|
||||
emulated in a trap handler. */
|
||||
/* Altivec vector memory instructions simply ignore the low bits; SPE vector
|
||||
memory instructions trap on unaligned accesses; VSX memory instructions are
|
||||
aligned to 4 or 8 bytes. */
|
||||
#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) \
|
||||
(STRICT_ALIGNMENT \
|
||||
|| (!TARGET_EFFICIENT_UNALIGNED_VSX \
|
||||
&& ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) && (ALIGN) < 32) \
|
||||
|| ((VECTOR_MODE_P (MODE) || FLOAT128_VECTOR_P (MODE)) \
|
||||
&& (int) (ALIGN) < VECTOR_ALIGN (MODE)))))
|
||||
|
||||
|
||||
/* Standard register usage. */
|
||||
|
||||
|
|
|
@ -217,7 +217,7 @@ struct riscv_cpu_info {
|
|||
/* Global variables for machine-dependent things. */
|
||||
|
||||
/* Whether unaligned accesses execute very slowly. */
|
||||
bool riscv_slow_unaligned_access;
|
||||
static bool riscv_slow_unaligned_access_p;
|
||||
|
||||
/* Which tuning parameters to use. */
|
||||
static const struct riscv_tune_info *tune_info;
|
||||
|
@ -3744,8 +3744,8 @@ riscv_option_override (void)
|
|||
/* Use -mtune's setting for slow_unaligned_access, even when optimizing
|
||||
for size. For architectures that trap and emulate unaligned accesses,
|
||||
the performance cost is too great, even for -Os. */
|
||||
riscv_slow_unaligned_access = (cpu->tune_info->slow_unaligned_access
|
||||
|| TARGET_STRICT_ALIGN);
|
||||
riscv_slow_unaligned_access_p = (cpu->tune_info->slow_unaligned_access
|
||||
|| TARGET_STRICT_ALIGN);
|
||||
|
||||
/* If the user hasn't specified a branch cost, use the processor's
|
||||
default. */
|
||||
|
@ -3966,6 +3966,14 @@ riscv_cannot_copy_insn_p (rtx_insn *insn)
|
|||
return recog_memoized (insn) >= 0 && get_attr_cannot_copy (insn);
|
||||
}
|
||||
|
||||
/* Implement TARGET_SLOW_UNALIGNED_ACCESS. */
|
||||
|
||||
static bool
|
||||
riscv_slow_unaligned_access (machine_mode, unsigned int)
|
||||
{
|
||||
return riscv_slow_unaligned_access_p;
|
||||
}
|
||||
|
||||
/* Initialize the GCC target structure. */
|
||||
#undef TARGET_ASM_ALIGNED_HI_OP
|
||||
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
|
||||
|
@ -4102,6 +4110,9 @@ riscv_cannot_copy_insn_p (rtx_insn *insn)
|
|||
#undef TARGET_MODES_TIEABLE_P
|
||||
#define TARGET_MODES_TIEABLE_P riscv_modes_tieable_p
|
||||
|
||||
#undef TARGET_SLOW_UNALIGNED_ACCESS
|
||||
#define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
|
||||
|
||||
struct gcc_target targetm = TARGET_INITIALIZER;
|
||||
|
||||
#include "gt-riscv.h"
|
||||
|
|
|
@ -130,8 +130,6 @@ along with GCC; see the file COPYING3. If not see
|
|||
of the privileged architecture. */
|
||||
#define STRICT_ALIGNMENT TARGET_STRICT_ALIGN
|
||||
|
||||
#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) riscv_slow_unaligned_access
|
||||
|
||||
/* Define this if you wish to imitate the way many other C compilers
|
||||
handle alignment of bitfields and the structures that contain
|
||||
them.
|
||||
|
@ -854,7 +852,6 @@ while (0)
|
|||
|
||||
#ifndef USED_FOR_TARGET
|
||||
extern const enum reg_class riscv_regno_to_class[];
|
||||
extern bool riscv_slow_unaligned_access;
|
||||
#endif
|
||||
|
||||
#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL) \
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
#include "explow.h"
|
||||
#include "expr.h"
|
||||
#include "output.h"
|
||||
#include "target.h"
|
||||
|
||||
/* Expand a block clear operation, and return 1 if successful. Return 0
|
||||
if we should let the compiler generate normal code.
|
||||
|
@ -338,9 +339,9 @@ expand_block_compare (rtx operands[])
|
|||
|
||||
unsigned int base_align = UINTVAL (align_rtx) / BITS_PER_UNIT;
|
||||
|
||||
/* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff. */
|
||||
if (SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src1))
|
||||
|| SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src2)))
|
||||
/* targetm.slow_unaligned_access -- don't do unaligned stuff. */
|
||||
if (targetm.slow_unaligned_access (word_mode, MEM_ALIGN (orig_src1))
|
||||
|| targetm.slow_unaligned_access (word_mode, MEM_ALIGN (orig_src2)))
|
||||
return false;
|
||||
|
||||
gcc_assert (GET_MODE (target) == SImode);
|
||||
|
@ -729,9 +730,9 @@ expand_strn_compare (rtx operands[], int no_length)
|
|||
int align1 = MEM_ALIGN (orig_src1) / BITS_PER_UNIT;
|
||||
int align2 = MEM_ALIGN (orig_src2) / BITS_PER_UNIT;
|
||||
|
||||
/* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff. */
|
||||
if (SLOW_UNALIGNED_ACCESS (word_mode, align1)
|
||||
|| SLOW_UNALIGNED_ACCESS (word_mode, align2))
|
||||
/* targetm.slow_unaligned_access -- don't do unaligned stuff. */
|
||||
if (targetm.slow_unaligned_access (word_mode, align1)
|
||||
|| targetm.slow_unaligned_access (word_mode, align2))
|
||||
return false;
|
||||
|
||||
gcc_assert (GET_MODE (target) == SImode);
|
||||
|
|
|
@ -1976,6 +1976,9 @@ static const struct attribute_spec rs6000_attribute_table[] =
|
|||
#undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
|
||||
#define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
|
||||
rs6000_hard_regno_call_part_clobbered
|
||||
|
||||
#undef TARGET_SLOW_UNALIGNED_ACCESS
|
||||
#define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
|
||||
|
||||
|
||||
/* Processor table. */
|
||||
|
@ -7902,6 +7905,20 @@ rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
|
|||
return align;
|
||||
}
|
||||
|
||||
/* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
|
||||
instructions simply ignore the low bits; VSX memory instructions
|
||||
are aligned to 4 or 8 bytes. */
|
||||
|
||||
static bool
|
||||
rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
|
||||
{
|
||||
return (STRICT_ALIGNMENT
|
||||
|| (!TARGET_EFFICIENT_UNALIGNED_VSX
|
||||
&& ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
|
||||
|| ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode))
|
||||
&& (int) align < VECTOR_ALIGN (mode)))));
|
||||
}
|
||||
|
||||
/* Previous GCC releases forced all vector types to have 16-byte alignment. */
|
||||
|
||||
bool
|
||||
|
@ -10500,13 +10517,14 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
|
|||
if (GET_CODE (operands[0]) == MEM
|
||||
&& GET_CODE (operands[1]) == MEM
|
||||
&& mode == DImode
|
||||
&& (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
|
||||
|| SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
|
||||
&& ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
|
||||
? 32 : MEM_ALIGN (operands[0])))
|
||||
|| SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
|
||||
? 32
|
||||
: MEM_ALIGN (operands[1]))))
|
||||
&& (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
|
||||
|| rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
|
||||
&& ! (rs6000_slow_unaligned_access (SImode,
|
||||
(MEM_ALIGN (operands[0]) > 32
|
||||
? 32 : MEM_ALIGN (operands[0])))
|
||||
|| rs6000_slow_unaligned_access (SImode,
|
||||
(MEM_ALIGN (operands[1]) > 32
|
||||
? 32 : MEM_ALIGN (operands[1]))))
|
||||
&& ! MEM_VOLATILE_P (operands [0])
|
||||
&& ! MEM_VOLATILE_P (operands [1]))
|
||||
{
|
||||
|
@ -34252,7 +34270,7 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
|
|||
than generating address, e.g., (plus (reg) (const)).
|
||||
L1 cache latency is about two instructions. */
|
||||
*total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
|
||||
if (SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (x)))
|
||||
if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
|
||||
*total += COSTS_N_INSNS (100);
|
||||
return true;
|
||||
|
||||
|
|
|
@ -968,19 +968,6 @@ enum data_align { align_abi, align_opt, align_both };
|
|||
/* Nonzero if move instructions will actually fail to work
|
||||
when given unaligned data. */
|
||||
#define STRICT_ALIGNMENT 0
|
||||
|
||||
/* Define this macro to be the value 1 if unaligned accesses have a cost
|
||||
many times greater than aligned accesses, for example if they are
|
||||
emulated in a trap handler. */
|
||||
/* Altivec vector memory instructions simply ignore the low bits; VSX memory
|
||||
instructions are aligned to 4 or 8 bytes. */
|
||||
#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) \
|
||||
(STRICT_ALIGNMENT \
|
||||
|| (!TARGET_EFFICIENT_UNALIGNED_VSX \
|
||||
&& ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) && (ALIGN) < 32) \
|
||||
|| ((VECTOR_MODE_P (MODE) || FLOAT128_VECTOR_P (MODE)) \
|
||||
&& (int) (ALIGN) < VECTOR_ALIGN (MODE)))))
|
||||
|
||||
|
||||
/* Standard register usage. */
|
||||
|
||||
|
|
|
@ -94,9 +94,6 @@
|
|||
#define BIGGEST_FIELD_ALIGNMENT 128
|
||||
#define WIDEST_HARDWARE_FP_SIZE 64
|
||||
|
||||
/* Unaligned moves trap and are very slow. */
|
||||
#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) 1
|
||||
|
||||
/* Make strings word-aligned so strcpy from constants will be
|
||||
faster. */
|
||||
#define CONSTANT_ALIGNMENT(EXP, ALIGN) \
|
||||
|
|
|
@ -58,9 +58,6 @@
|
|||
#define FASTEST_ALIGNMENT 32
|
||||
#define BIGGEST_FIELD_ALIGNMENT 64
|
||||
|
||||
/* Unaligned moves trap and are very slow. */
|
||||
#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) 1
|
||||
|
||||
/* Make strings word-aligned so strcpy from constants will be
|
||||
faster. */
|
||||
#define CONSTANT_ALIGNMENT(EXP, ALIGN) \
|
||||
|
|
|
@ -1170,10 +1170,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|||
#define ATTRIBUTE_ALIGNED_VALUE BIGGEST_ALIGNMENT
|
||||
#endif
|
||||
|
||||
#ifndef SLOW_UNALIGNED_ACCESS
|
||||
#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
|
||||
#endif
|
||||
|
||||
/* For most ports anything that evaluates to a constant symbolic
|
||||
or integer value is acceptable as a constant address. */
|
||||
#ifndef CONSTANT_ADDRESS_P
|
||||
|
|
|
@ -6386,23 +6386,22 @@ may eliminate subsequent memory access if subsequent accesses occur to
|
|||
other fields in the same word of the structure, but to different bytes.
|
||||
@end defmac
|
||||
|
||||
@defmac SLOW_UNALIGNED_ACCESS (@var{mode}, @var{alignment})
|
||||
Define this macro to be the value 1 if memory accesses described by the
|
||||
@deftypefn {Target Hook} bool TARGET_SLOW_UNALIGNED_ACCESS (machine_mode @var{mode}, unsigned int @var{align})
|
||||
This hook returns true if memory accesses described by the
|
||||
@var{mode} and @var{alignment} parameters have a cost many times greater
|
||||
than aligned accesses, for example if they are emulated in a trap
|
||||
handler. This macro is invoked only for unaligned accesses, i.e. when
|
||||
than aligned accesses, for example if they are emulated in a trap handler.
|
||||
This hook is invoked only for unaligned accesses, i.e. when
|
||||
@code{@var{alignment} < GET_MODE_ALIGNMENT (@var{mode})}.
|
||||
|
||||
When this macro is nonzero, the compiler will act as if
|
||||
@code{STRICT_ALIGNMENT} were nonzero when generating code for block
|
||||
When this hook returns true, the compiler will act as if
|
||||
@code{STRICT_ALIGNMENT} were true when generating code for block
|
||||
moves. This can cause significantly more instructions to be produced.
|
||||
Therefore, do not set this macro nonzero if unaligned accesses only add a
|
||||
cycle or two to the time for a memory access.
|
||||
Therefore, do not make this hook return true if unaligned accesses only
|
||||
add a cycle or two to the time for a memory access.
|
||||
|
||||
If the value of this macro is always zero, it need not be defined. If
|
||||
this macro is defined, it should produce a nonzero value when
|
||||
@code{STRICT_ALIGNMENT} is nonzero.
|
||||
@end defmac
|
||||
The hook must return true whenever @code{STRICT_ALIGNMENT} is true.
|
||||
The default implementation returns @code{STRICT_ALIGNMENT}.
|
||||
@end deftypefn
|
||||
|
||||
@defmac MOVE_RATIO (@var{speed})
|
||||
The threshold of number of scalar memory-to-memory move insns, @emph{below}
|
||||
|
|
|
@ -4559,23 +4559,7 @@ may eliminate subsequent memory access if subsequent accesses occur to
|
|||
other fields in the same word of the structure, but to different bytes.
|
||||
@end defmac
|
||||
|
||||
@defmac SLOW_UNALIGNED_ACCESS (@var{mode}, @var{alignment})
|
||||
Define this macro to be the value 1 if memory accesses described by the
|
||||
@var{mode} and @var{alignment} parameters have a cost many times greater
|
||||
than aligned accesses, for example if they are emulated in a trap
|
||||
handler. This macro is invoked only for unaligned accesses, i.e. when
|
||||
@code{@var{alignment} < GET_MODE_ALIGNMENT (@var{mode})}.
|
||||
|
||||
When this macro is nonzero, the compiler will act as if
|
||||
@code{STRICT_ALIGNMENT} were nonzero when generating code for block
|
||||
moves. This can cause significantly more instructions to be produced.
|
||||
Therefore, do not set this macro nonzero if unaligned accesses only add a
|
||||
cycle or two to the time for a memory access.
|
||||
|
||||
If the value of this macro is always zero, it need not be defined. If
|
||||
this macro is defined, it should produce a nonzero value when
|
||||
@code{STRICT_ALIGNMENT} is nonzero.
|
||||
@end defmac
|
||||
@hook TARGET_SLOW_UNALIGNED_ACCESS
|
||||
|
||||
@defmac MOVE_RATIO (@var{speed})
|
||||
The threshold of number of scalar memory-to-memory move insns, @emph{below}
|
||||
|
|
|
@ -569,7 +569,7 @@ simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
|
|||
return (MEM_P (op0)
|
||||
&& bitnum % BITS_PER_UNIT == 0
|
||||
&& bitsize == GET_MODE_BITSIZE (mode)
|
||||
&& (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
|
||||
&& (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
|
||||
|| (bitnum % GET_MODE_ALIGNMENT (mode) == 0
|
||||
&& MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
|
||||
}
|
||||
|
|
19
gcc/expr.c
19
gcc/expr.c
|
@ -730,7 +730,7 @@ alignment_for_piecewise_move (unsigned int max_pieces, unsigned int align)
|
|||
{
|
||||
tmode = mode_iter.require ();
|
||||
if (GET_MODE_SIZE (tmode) > max_pieces
|
||||
|| SLOW_UNALIGNED_ACCESS (tmode, align))
|
||||
|| targetm.slow_unaligned_access (tmode, align))
|
||||
break;
|
||||
xmode = tmode;
|
||||
}
|
||||
|
@ -2179,7 +2179,7 @@ emit_group_load_1 (rtx *tmps, rtx dst, rtx orig_src, tree type, int ssize)
|
|||
|
||||
/* Optimize the access just a bit. */
|
||||
if (MEM_P (src)
|
||||
&& (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (src))
|
||||
&& (! targetm.slow_unaligned_access (mode, MEM_ALIGN (src))
|
||||
|| MEM_ALIGN (src) >= GET_MODE_ALIGNMENT (mode))
|
||||
&& bytepos * BITS_PER_UNIT % GET_MODE_ALIGNMENT (mode) == 0
|
||||
&& bytelen == GET_MODE_SIZE (mode))
|
||||
|
@ -2584,7 +2584,7 @@ emit_group_store (rtx orig_dst, rtx src, tree type ATTRIBUTE_UNUSED, int ssize)
|
|||
|
||||
/* Optimize the access just a bit. */
|
||||
else if (MEM_P (dest)
|
||||
&& (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (dest))
|
||||
&& (!targetm.slow_unaligned_access (mode, MEM_ALIGN (dest))
|
||||
|| MEM_ALIGN (dest) >= GET_MODE_ALIGNMENT (mode))
|
||||
&& bytepos * BITS_PER_UNIT % GET_MODE_ALIGNMENT (mode) == 0
|
||||
&& bytelen == GET_MODE_SIZE (mode))
|
||||
|
@ -2653,7 +2653,7 @@ copy_blkmode_from_reg (rtx target, rtx srcreg, tree type)
|
|||
|
||||
/* We can use a single move if we have an exact mode for the size. */
|
||||
else if (MEM_P (target)
|
||||
&& (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (target))
|
||||
&& (!targetm.slow_unaligned_access (mode, MEM_ALIGN (target))
|
||||
|| MEM_ALIGN (target) >= GET_MODE_ALIGNMENT (mode))
|
||||
&& bytes == GET_MODE_SIZE (mode))
|
||||
{
|
||||
|
@ -4348,7 +4348,7 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size,
|
|||
/* Here we avoid the case of a structure whose weak alignment
|
||||
forces many pushes of a small amount of data,
|
||||
and such small pushes do rounding that causes trouble. */
|
||||
&& ((! SLOW_UNALIGNED_ACCESS (word_mode, align))
|
||||
&& ((!targetm.slow_unaligned_access (word_mode, align))
|
||||
|| align >= BIGGEST_ALIGNMENT
|
||||
|| (PUSH_ROUNDING (align / BITS_PER_UNIT)
|
||||
== (align / BITS_PER_UNIT)))
|
||||
|
@ -4947,7 +4947,7 @@ expand_assignment (tree to, tree from, bool nontemporal)
|
|||
< GET_MODE_ALIGNMENT (mode))
|
||||
&& (((icode = optab_handler (movmisalign_optab, mode))
|
||||
!= CODE_FOR_nothing)
|
||||
|| SLOW_UNALIGNED_ACCESS (mode, align)))
|
||||
|| targetm.slow_unaligned_access (mode, align)))
|
||||
{
|
||||
rtx reg, mem;
|
||||
|
||||
|
@ -6783,7 +6783,7 @@ store_field (rtx target, HOST_WIDE_INT bitsize, HOST_WIDE_INT bitpos,
|
|||
|| (mode != BLKmode
|
||||
&& ((((MEM_ALIGN (target) < GET_MODE_ALIGNMENT (mode))
|
||||
|| bitpos % GET_MODE_ALIGNMENT (mode))
|
||||
&& SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (target)))
|
||||
&& targetm.slow_unaligned_access (mode, MEM_ALIGN (target)))
|
||||
|| (bitpos % BITS_PER_UNIT != 0)))
|
||||
|| (bitsize >= 0 && mode != BLKmode
|
||||
&& GET_MODE_BITSIZE (mode) > bitsize)
|
||||
|
@ -10229,7 +10229,7 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
|
|||
expand_insn (icode, 2, ops);
|
||||
temp = ops[0].value;
|
||||
}
|
||||
else if (SLOW_UNALIGNED_ACCESS (mode, align))
|
||||
else if (targetm.slow_unaligned_access (mode, align))
|
||||
temp = extract_bit_field (temp, GET_MODE_BITSIZE (mode),
|
||||
0, TYPE_UNSIGNED (TREE_TYPE (exp)),
|
||||
(modifier == EXPAND_STACK_PARM
|
||||
|
@ -10663,7 +10663,8 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
|
|||
&& ((modifier == EXPAND_CONST_ADDRESS
|
||||
|| modifier == EXPAND_INITIALIZER)
|
||||
? STRICT_ALIGNMENT
|
||||
: SLOW_UNALIGNED_ACCESS (mode1, MEM_ALIGN (op0))))
|
||||
: targetm.slow_unaligned_access (mode1,
|
||||
MEM_ALIGN (op0))))
|
||||
|| (bitpos % BITS_PER_UNIT != 0)))
|
||||
/* If the type and the field are a constant size and the
|
||||
size of the type isn't the same size as the bitfield,
|
||||
|
|
|
@ -756,7 +756,7 @@ gimple_fold_builtin_memory_op (gimple_stmt_iterator *gsi,
|
|||
/* If the destination pointer is not aligned we must be able
|
||||
to emit an unaligned store. */
|
||||
&& (dest_align >= GET_MODE_ALIGNMENT (mode)
|
||||
|| !SLOW_UNALIGNED_ACCESS (mode, dest_align)
|
||||
|| !targetm.slow_unaligned_access (mode, dest_align)
|
||||
|| (optab_handler (movmisalign_optab, mode)
|
||||
!= CODE_FOR_nothing)))
|
||||
{
|
||||
|
@ -769,7 +769,7 @@ gimple_fold_builtin_memory_op (gimple_stmt_iterator *gsi,
|
|||
if (tem)
|
||||
srcmem = tem;
|
||||
else if (src_align < GET_MODE_ALIGNMENT (mode)
|
||||
&& SLOW_UNALIGNED_ACCESS (mode, src_align)
|
||||
&& targetm.slow_unaligned_access (mode, src_align)
|
||||
&& (optab_handler (movmisalign_optab, mode)
|
||||
== CODE_FOR_nothing))
|
||||
srcmem = NULL_TREE;
|
||||
|
|
|
@ -63,7 +63,7 @@
|
|||
of a size that is a power of 2. For example it can try to emit a 40-bit
|
||||
store as a 32-bit store followed by an 8-bit store.
|
||||
We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT or
|
||||
SLOW_UNALIGNED_ACCESS rules.
|
||||
TARGET_SLOW_UNALIGNED_ACCESS rules.
|
||||
|
||||
Note on endianness and example:
|
||||
Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
|
||||
|
|
|
@ -1557,9 +1557,10 @@ simplify_operand_subreg (int nop, machine_mode reg_mode)
|
|||
&& GET_MODE_SIZE (innermode) <= UNITS_PER_WORD
|
||||
&& WORD_REGISTER_OPERATIONS)
|
||||
&& (!(MEM_ALIGN (subst) < GET_MODE_ALIGNMENT (mode)
|
||||
&& SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (subst)))
|
||||
&& targetm.slow_unaligned_access (mode, MEM_ALIGN (subst)))
|
||||
|| (MEM_ALIGN (reg) < GET_MODE_ALIGNMENT (innermode)
|
||||
&& SLOW_UNALIGNED_ACCESS (innermode, MEM_ALIGN (reg)))))
|
||||
&& targetm.slow_unaligned_access (innermode,
|
||||
MEM_ALIGN (reg)))))
|
||||
return true;
|
||||
|
||||
*curr_id->operand_loc[nop] = operand;
|
||||
|
|
|
@ -2793,7 +2793,7 @@ bit_field_mode_iterator::next_mode (scalar_int_mode *out_mode)
|
|||
|
||||
/* Stop if the mode requires too much alignment. */
|
||||
if (GET_MODE_ALIGNMENT (mode) > m_align
|
||||
&& SLOW_UNALIGNED_ACCESS (mode, m_align))
|
||||
&& targetm.slow_unaligned_access (mode, m_align))
|
||||
break;
|
||||
|
||||
*out_mode = mode;
|
||||
|
|
|
@ -912,7 +912,7 @@ extern void fancy_abort (const char *, int, const char *)
|
|||
CLEAR_BY_PIECES_P MOVE_BY_PIECES_P SET_BY_PIECES_P \
|
||||
STORE_BY_PIECES_P TARGET_FLT_EVAL_METHOD \
|
||||
HARD_REGNO_CALL_PART_CLOBBERED HARD_REGNO_MODE_OK \
|
||||
MODES_TIEABLE_P FUNCTION_ARG_PADDING
|
||||
MODES_TIEABLE_P FUNCTION_ARG_PADDING SLOW_UNALIGNED_ACCESS
|
||||
|
||||
/* Target macros only used for code built for the target, that have
|
||||
moved to libgcc-tm.h or have never been present elsewhere. */
|
||||
|
|
|
@ -3511,6 +3511,25 @@ negative number from this hook.",
|
|||
int, (machine_mode mode),
|
||||
default_compare_by_pieces_branch_ratio)
|
||||
|
||||
DEFHOOK
|
||||
(slow_unaligned_access,
|
||||
"This hook returns true if memory accesses described by the\n\
|
||||
@var{mode} and @var{alignment} parameters have a cost many times greater\n\
|
||||
than aligned accesses, for example if they are emulated in a trap handler.\n\
|
||||
This hook is invoked only for unaligned accesses, i.e. when\n\
|
||||
@code{@var{alignment} < GET_MODE_ALIGNMENT (@var{mode})}.\n\
|
||||
\n\
|
||||
When this hook returns true, the compiler will act as if\n\
|
||||
@code{STRICT_ALIGNMENT} were true when generating code for block\n\
|
||||
moves. This can cause significantly more instructions to be produced.\n\
|
||||
Therefore, do not make this hook return true if unaligned accesses only\n\
|
||||
add a cycle or two to the time for a memory access.\n\
|
||||
\n\
|
||||
The hook must return true whenever @code{STRICT_ALIGNMENT} is true.\n\
|
||||
The default implementation returns @code{STRICT_ALIGNMENT}.",
|
||||
bool, (machine_mode mode, unsigned int align),
|
||||
default_slow_unaligned_access)
|
||||
|
||||
DEFHOOK
|
||||
(optab_supported_p,
|
||||
"Return true if the optimizers should use optab @var{op} with\n\
|
||||
|
|
|
@ -1558,6 +1558,14 @@ default_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
|
|||
#endif
|
||||
}
|
||||
|
||||
/* The default implementation of TARGET_SLOW_UNALIGNED_ACCESS. */
|
||||
|
||||
bool
|
||||
default_slow_unaligned_access (machine_mode, unsigned int)
|
||||
{
|
||||
return STRICT_ALIGNMENT;
|
||||
}
|
||||
|
||||
/* For hooks which use the MOVE_RATIO macro, this gives the legacy default
|
||||
behavior. SPEED_P is true if we are compiling for speed. */
|
||||
|
||||
|
|
|
@ -197,6 +197,7 @@ extern tree default_builtin_tm_load_store (tree);
|
|||
extern int default_memory_move_cost (machine_mode, reg_class_t, bool);
|
||||
extern int default_register_move_cost (machine_mode, reg_class_t,
|
||||
reg_class_t);
|
||||
extern bool default_slow_unaligned_access (machine_mode, unsigned int);
|
||||
|
||||
extern bool default_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
|
||||
unsigned int,
|
||||
|
|
|
@ -45,6 +45,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
#include "ipa-chkp.h"
|
||||
#include "tree-hash-traits.h"
|
||||
#include "builtins.h"
|
||||
#include "target.h"
|
||||
|
||||
/* A vector indexed by SSA_NAME_VERSION. 0 means unknown, positive value
|
||||
is an index into strinfo vector, negative value stands for
|
||||
|
@ -2124,7 +2125,7 @@ handle_builtin_memcmp (gimple_stmt_iterator *gsi)
|
|||
unsigned align = MIN (align1, align2);
|
||||
scalar_int_mode mode;
|
||||
if (int_mode_for_size (leni, 1).exists (&mode)
|
||||
&& (align >= leni || !SLOW_UNALIGNED_ACCESS (mode, align)))
|
||||
&& (align >= leni || !targetm.slow_unaligned_access (mode, align)))
|
||||
{
|
||||
location_t loc = gimple_location (stmt2);
|
||||
tree type, off;
|
||||
|
|
Loading…
Add table
Reference in a new issue