From 6e98504002b3ee32c9b3d17da648b01e1424f833 Mon Sep 17 00:00:00 2001 From: Alan Modra Date: Mon, 14 Jul 2003 05:17:18 +0000 Subject: [PATCH] tm.texi (BLOCK_REG_PADDING): Describe. * doc/tm.texi (BLOCK_REG_PADDING): Describe. * expr.h (struct locate_and_pad_arg_data): Add where_pad. (emit_group_load, emit_group_store): Adjust declarations. Remove most occurrences of #ifdef TREE_CODE. * expr.c (emit_group_load): Add "type" param, and use BLOCK_REG_PADDING to determine need for a shift. Optimize non- aligned accesses if !SLOW_UNALIGNED_ACCESS. (emit_group_store): Likewise. (emit_push_insn, expand_assignment, store_expr, expand_expr): Adjust emit_group_load and emit_group_store calls. * calls.c (store_unaligned_arguments_into_pseudos): Tidy. Use BLOCK_REG_PADDING to determine whether we need endian_correction. (load_register_parameters): Localize vars. Handle shifting of small values to the correct end of regs. Adjust emit_group_load call. (expand_call, emit_library_call_value_1): Adjust emit_group_load and emit_group_store calls. * function.c (assign_parms): Set mem alignment for stack slots. Adjust emit_group_store call. Store values at the "wrong" end of regs to the stack. Use BLOCK_REG_PADDING. (locate_and_pad_parm): Save where_pad. (expand_function_end): Adjust emit_group_load call. * stmt.c (expand_value_return): Adjust emit_group_load call. * Makefile.in (calls.o): Depend on $(OPTABS_H). * config/rs6000/linux64.h (TARGET_LITTLE_ENDIAN): Redefine as 0. (AGGREGATE_PADDING_FIXED, AGGREGATES_PAD_UPWARD_ALWAYS): Define. (MUST_PASS_IN_STACK): Define. (BLOCK_REG_PADDING): Define. * config/rs6000/rs6000.h (struct rs6000_args): Remove orig_nargs. (PAD_VARARGS_DOWN): Define in terms of FUNCTION_ARG_PADDING. * config/rs6000/rs6000.c (init_cumulative_args): Don't set orig_nargs. (function_arg_padding): !AGGREGATE_PADDING_FIXED compatibility code. Act on AGGREGATES_PAD_UPWARD_ALWAYS. From-SVN: r69318 --- gcc/ChangeLog | 36 +++++++++++ gcc/Makefile.in | 2 +- gcc/calls.c | 120 ++++++++++++++++++++++++++---------- gcc/config/rs6000/linux64.h | 33 ++++++++++ gcc/config/rs6000/rs6000.c | 45 ++++++++++++-- gcc/config/rs6000/rs6000.h | 10 +-- gcc/doc/tm.texi | 11 ++++ gcc/expr.c | 72 ++++++++++++++-------- gcc/expr.h | 25 +++----- gcc/function.c | 30 +++++++-- gcc/stmt.c | 2 +- 11 files changed, 291 insertions(+), 95 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f5595fdeb17..510d74fbd9b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,39 @@ +2003-07-14 Alan Modra + + * doc/tm.texi (BLOCK_REG_PADDING): Describe. + * expr.h (struct locate_and_pad_arg_data): Add where_pad. + (emit_group_load, emit_group_store): Adjust declarations. + Remove most occurrences of #ifdef TREE_CODE. + * expr.c (emit_group_load): Add "type" param, and use + BLOCK_REG_PADDING to determine need for a shift. Optimize non- + aligned accesses if !SLOW_UNALIGNED_ACCESS. + (emit_group_store): Likewise. + (emit_push_insn, expand_assignment, store_expr, expand_expr): Adjust + emit_group_load and emit_group_store calls. + * calls.c (store_unaligned_arguments_into_pseudos): Tidy. Use + BLOCK_REG_PADDING to determine whether we need endian_correction. + (load_register_parameters): Localize vars. Handle shifting of + small values to the correct end of regs. Adjust emit_group_load + call. + (expand_call, emit_library_call_value_1): Adjust emit_group_load + and emit_group_store calls. + * function.c (assign_parms): Set mem alignment for stack slots. + Adjust emit_group_store call. Store values at the "wrong" end + of regs to the stack. Use BLOCK_REG_PADDING. + (locate_and_pad_parm): Save where_pad. + (expand_function_end): Adjust emit_group_load call. + * stmt.c (expand_value_return): Adjust emit_group_load call. + * Makefile.in (calls.o): Depend on $(OPTABS_H). + * config/rs6000/linux64.h (TARGET_LITTLE_ENDIAN): Redefine as 0. + (AGGREGATE_PADDING_FIXED, AGGREGATES_PAD_UPWARD_ALWAYS): Define. + (MUST_PASS_IN_STACK): Define. + (BLOCK_REG_PADDING): Define. + * config/rs6000/rs6000.h (struct rs6000_args): Remove orig_nargs. + (PAD_VARARGS_DOWN): Define in terms of FUNCTION_ARG_PADDING. + * config/rs6000/rs6000.c (init_cumulative_args): Don't set orig_nargs. + (function_arg_padding): !AGGREGATE_PADDING_FIXED compatibility code. + Act on AGGREGATES_PAD_UPWARD_ALWAYS. + 2003-07-13 Aaron W. LaFramboise * config/i386/gthr-win32.c (__GTHREAD_HIDE_WIN32API): Define to 1. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index ed2a23ae760..414980ccf0d 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1547,7 +1547,7 @@ builtins.o : builtins.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(T $(RECOG_H) output.h typeclass.h hard-reg-set.h toplev.h hard-reg-set.h \ except.h $(TM_P_H) $(PREDICT_H) libfuncs.h real.h langhooks.h calls.o : calls.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(TREE_H) flags.h \ - $(EXPR_H) langhooks.h $(TARGET_H) \ + $(EXPR_H) $(OPTABS_H) langhooks.h $(TARGET_H) \ libfuncs.h $(REGS_H) toplev.h output.h function.h $(TIMEVAR_H) $(TM_P_H) cgraph.h except.h expmed.o : expmed.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(TREE_H) \ flags.h insn-config.h $(EXPR_H) $(OPTABS_H) $(RECOG_H) real.h \ diff --git a/gcc/calls.c b/gcc/calls.c index d66ebd93d1c..ef4926ee770 100644 --- a/gcc/calls.c +++ b/gcc/calls.c @@ -27,6 +27,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "tree.h" #include "flags.h" #include "expr.h" +#include "optabs.h" #include "libfuncs.h" #include "function.h" #include "regs.h" @@ -934,22 +935,27 @@ store_unaligned_arguments_into_pseudos (struct arg_data *args, int num_actuals) < (unsigned int) MIN (BIGGEST_ALIGNMENT, BITS_PER_WORD))) { int bytes = int_size_in_bytes (TREE_TYPE (args[i].tree_value)); - int big_endian_correction = 0; - - args[i].n_aligned_regs - = args[i].partial ? args[i].partial - : (bytes + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD; + int nregs = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + int endian_correction = 0; + args[i].n_aligned_regs = args[i].partial ? args[i].partial : nregs; args[i].aligned_regs = (rtx *) xmalloc (sizeof (rtx) * args[i].n_aligned_regs); - /* Structures smaller than a word are aligned to the least - significant byte (to the right). On a BYTES_BIG_ENDIAN machine, + /* Structures smaller than a word are normally aligned to the + least significant byte. On a BYTES_BIG_ENDIAN machine, this means we must skip the empty high order bytes when calculating the bit offset. */ - if (BYTES_BIG_ENDIAN - && bytes < UNITS_PER_WORD) - big_endian_correction = (BITS_PER_WORD - (bytes * BITS_PER_UNIT)); + if (bytes < UNITS_PER_WORD +#ifdef BLOCK_REG_PADDING + && (BLOCK_REG_PADDING (args[i].mode, + TREE_TYPE (args[i].tree_value), 1) + == downward) +#else + && BYTES_BIG_ENDIAN +#endif + ) + endian_correction = BITS_PER_WORD - bytes * BITS_PER_UNIT; for (j = 0; j < args[i].n_aligned_regs; j++) { @@ -958,6 +964,8 @@ store_unaligned_arguments_into_pseudos (struct arg_data *args, int num_actuals) int bitsize = MIN (bytes * BITS_PER_UNIT, BITS_PER_WORD); args[i].aligned_regs[j] = reg; + word = extract_bit_field (word, bitsize, 0, 1, NULL_RTX, + word_mode, word_mode, BITS_PER_WORD); /* There is no need to restrict this code to loading items in TYPE_ALIGN sized hunks. The bitfield instructions can @@ -973,11 +981,8 @@ store_unaligned_arguments_into_pseudos (struct arg_data *args, int num_actuals) emit_move_insn (reg, const0_rtx); bytes -= bitsize / BITS_PER_UNIT; - store_bit_field (reg, bitsize, big_endian_correction, word_mode, - extract_bit_field (word, bitsize, 0, 1, NULL_RTX, - word_mode, word_mode, - BITS_PER_WORD), - BITS_PER_WORD); + store_bit_field (reg, bitsize, endian_correction, word_mode, + word, BITS_PER_WORD); } } } @@ -1580,34 +1585,48 @@ load_register_parameters (struct arg_data *args, int num_actuals, { rtx reg = ((flags & ECF_SIBCALL) ? args[i].tail_call_reg : args[i].reg); - int partial = args[i].partial; - int nregs; - if (reg) { + int partial = args[i].partial; + int nregs; + int size = 0; rtx before_arg = get_last_insn (); /* Set to non-negative if must move a word at a time, even if just one word (e.g, partial == 1 && mode == DFmode). Set to -1 if we just use a normal move insn. This value can be zero if the argument is a zero size structure with no fields. */ - nregs = (partial ? partial - : (TYPE_MODE (TREE_TYPE (args[i].tree_value)) == BLKmode - ? ((int_size_in_bytes (TREE_TYPE (args[i].tree_value)) - + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD) - : -1)); + nregs = -1; + if (partial) + nregs = partial; + else if (TYPE_MODE (TREE_TYPE (args[i].tree_value)) == BLKmode) + { + size = int_size_in_bytes (TREE_TYPE (args[i].tree_value)); + nregs = (size + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD; + } + else + size = GET_MODE_SIZE (args[i].mode); /* Handle calls that pass values in multiple non-contiguous locations. The Irix 6 ABI has examples of this. */ if (GET_CODE (reg) == PARALLEL) - emit_group_load (reg, args[i].value, - int_size_in_bytes (TREE_TYPE (args[i].tree_value))); + { + tree type = TREE_TYPE (args[i].tree_value); + emit_group_load (reg, args[i].value, type, + int_size_in_bytes (type)); + } /* If simple case, just do move. If normal partial, store_one_arg has already loaded the register for us. In all other cases, load the register(s) from memory. */ - else if (nregs == -1) + else if (nregs == -1 +#ifdef BLOCK_REG_PADDING + && !(size < UNITS_PER_WORD + && (args[i].locate.where_pad + == (BYTES_BIG_ENDIAN ? upward : downward))) +#endif + ) emit_move_insn (reg, args[i].value); /* If we have pre-computed the values to put in the registers in @@ -1619,9 +1638,44 @@ load_register_parameters (struct arg_data *args, int num_actuals, args[i].aligned_regs[j]); else if (partial == 0 || args[i].pass_on_stack) - move_block_to_reg (REGNO (reg), - validize_mem (args[i].value), nregs, - args[i].mode); + { + rtx mem = validize_mem (args[i].value); + +#ifdef BLOCK_REG_PADDING + /* Handle case where we have a value that needs shifting + up to the msb. eg. a QImode value and we're padding + upward on a BYTES_BIG_ENDIAN machine. */ + if (nregs == -1) + { + rtx ri = gen_rtx_REG (word_mode, REGNO (reg)); + rtx x; + int shift = (UNITS_PER_WORD - size) * BITS_PER_UNIT; + x = expand_binop (word_mode, ashl_optab, mem, + GEN_INT (shift), ri, 1, OPTAB_WIDEN); + if (x != ri) + emit_move_insn (ri, x); + } + + /* Handle a BLKmode that needs shifting. */ + else if (nregs == 1 && size < UNITS_PER_WORD + && args[i].locate.where_pad == downward) + { + rtx tem = operand_subword_force (mem, 0, args[i].mode); + rtx ri = gen_rtx_REG (word_mode, REGNO (reg)); + rtx x = gen_reg_rtx (word_mode); + int shift = (UNITS_PER_WORD - size) * BITS_PER_UNIT; + optab dir = BYTES_BIG_ENDIAN ? lshr_optab : ashl_optab; + + emit_move_insn (x, tem); + x = expand_binop (word_mode, dir, x, GEN_INT (shift), + ri, 1, OPTAB_WIDEN); + if (x != ri) + emit_move_insn (ri, x); + } + else +#endif + move_block_to_reg (REGNO (reg), mem, nregs, args[i].mode); + } /* When a parameter is a block, and perhaps in other cases, it is possible that it did a load from an argument slot that was @@ -3144,7 +3198,7 @@ expand_call (tree exp, rtx target, int ignore) } if (! rtx_equal_p (target, valreg)) - emit_group_store (target, valreg, + emit_group_store (target, valreg, TREE_TYPE (exp), int_size_in_bytes (TREE_TYPE (exp))); /* We can not support sibling calls for this case. */ @@ -3989,7 +4043,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, /* Handle calls that pass values in multiple non-contiguous locations. The PA64 has examples of this for library calls. */ if (reg != 0 && GET_CODE (reg) == PARALLEL) - emit_group_load (reg, val, GET_MODE_SIZE (GET_MODE (val))); + emit_group_load (reg, val, NULL_TREE, GET_MODE_SIZE (GET_MODE (val))); else if (reg != 0 && partial == 0) emit_move_insn (reg, val); @@ -4093,7 +4147,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, if (GET_CODE (valreg) == PARALLEL) { temp = gen_reg_rtx (outmode); - emit_group_store (temp, valreg, outmode); + emit_group_store (temp, valreg, NULL_TREE, outmode); valreg = temp; } @@ -4136,7 +4190,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, { if (value == 0) value = gen_reg_rtx (outmode); - emit_group_store (value, valreg, outmode); + emit_group_store (value, valreg, NULL_TREE, outmode); } else if (value != 0) emit_move_insn (value, valreg); diff --git a/gcc/config/rs6000/linux64.h b/gcc/config/rs6000/linux64.h index c1c1edb4d30..4ef7382cee8 100644 --- a/gcc/config/rs6000/linux64.h +++ b/gcc/config/rs6000/linux64.h @@ -160,6 +160,10 @@ #ifndef RS6000_BI_ARCH +/* 64-bit PowerPC Linux is always big-endian. */ +#undef TARGET_LITTLE_ENDIAN +#define TARGET_LITTLE_ENDIAN 0 + /* 64-bit PowerPC Linux always has a TOC. */ #undef TARGET_TOC #define TARGET_TOC 1 @@ -232,6 +236,35 @@ #undef JUMP_TABLES_IN_TEXT_SECTION #define JUMP_TABLES_IN_TEXT_SECTION TARGET_64BIT +/* The linux ppc64 ABI isn't explicit on whether aggregates smaller + than a doubleword should be padded upward or downward. You could + reasonably assume that they follow the normal rules for structure + layout treating the parameter area as any other block of memory, + then map the reg param area to registers. ie. pad updard. + Setting both of the following defines results in this behaviour. + Setting just the first one will result in aggregates that fit in a + doubleword being padded downward, and others being padded upward. + Not a bad idea as this results in struct { int x; } being passed + the same way as an int. */ +#define AGGREGATE_PADDING_FIXED TARGET_64BIT +#define AGGREGATES_PAD_UPWARD_ALWAYS 0 + +/* We don't want anything in the reg parm area being passed on the + stack. */ +#define MUST_PASS_IN_STACK(MODE, TYPE) \ + ((TARGET_64BIT \ + && (TYPE) != 0 \ + && (TREE_CODE (TYPE_SIZE (TYPE)) != INTEGER_CST \ + || TREE_ADDRESSABLE (TYPE))) \ + || (!TARGET_64BIT \ + && default_must_pass_in_stack ((MODE), (TYPE)))) + +/* Specify padding for the last element of a block move between + registers and memory. FIRST is nonzero if this is the only + element. */ +#define BLOCK_REG_PADDING(MODE, TYPE, FIRST) \ + (!(FIRST) ? upward : FUNCTION_ARG_PADDING (MODE, TYPE)) + /* __throw will restore its own return address to be the same as the return address of the function that the throw is being made to. This is unfortunate, because we want to check the original diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 7ff59de6b76..fa296d5db94 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -3702,8 +3702,6 @@ init_cumulative_args (cum, fntype, libname, incoming, libcall) else cum->nargs_prototype = 0; - cum->orig_nargs = cum->nargs_prototype; - /* Check for a longcall attribute. */ if (fntype && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype)) @@ -3742,8 +3740,47 @@ function_arg_padding (mode, type) enum machine_mode mode; tree type; { - if (type != 0 && AGGREGATE_TYPE_P (type)) - return upward; +#ifndef AGGREGATE_PADDING_FIXED +#define AGGREGATE_PADDING_FIXED 0 +#endif +#ifndef AGGREGATES_PAD_UPWARD_ALWAYS +#define AGGREGATES_PAD_UPWARD_ALWAYS 0 +#endif + + if (!AGGREGATE_PADDING_FIXED) + { + /* GCC used to pass structures of the same size as integer types as + if they were in fact integers, ignoring FUNCTION_ARG_PADDING. + ie. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were + passed padded downward, except that -mstrict-align further + muddied the water in that multi-component structures of 2 and 4 + bytes in size were passed padded upward. + + The following arranges for best compatibility with previous + versions of gcc, but removes the -mstrict-align dependency. */ + if (BYTES_BIG_ENDIAN) + { + HOST_WIDE_INT size = 0; + + if (mode == BLKmode) + { + if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST) + size = int_size_in_bytes (type); + } + else + size = GET_MODE_SIZE (mode); + + if (size == 1 || size == 2 || size == 4) + return downward; + } + return upward; + } + + if (AGGREGATES_PAD_UPWARD_ALWAYS) + { + if (type != 0 && AGGREGATE_TYPE_P (type)) + return upward; + } /* This is the default definition. */ return (! BYTES_BIG_ENDIAN diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index e91571967d5..5ba18dce5cd 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -1734,7 +1734,6 @@ typedef struct rs6000_args int fregno; /* next available FP register */ int vregno; /* next available AltiVec register */ int nargs_prototype; /* # args left in the current prototype */ - int orig_nargs; /* Original value of nargs_prototype */ int prototype; /* Whether a prototype was defined */ int stdarg; /* Whether function is a stdarg function. */ int call_cookie; /* Do special things for this call */ @@ -1878,13 +1877,8 @@ typedef struct rs6000_args #define EXPAND_BUILTIN_VA_ARG(valist, type) \ rs6000_va_arg (valist, type) -/* For AIX, the rule is that structures are passed left-aligned in - their stack slot. However, GCC does not presently do this: - structures which are the same size as integer types are passed - right-aligned, as if they were in fact integers. This only - matters for structures of size 1 or 2, or 4 when TARGET_64BIT. - ABI_V4 does not use std_expand_builtin_va_arg. */ -#define PAD_VARARGS_DOWN (TYPE_MODE (type) != BLKmode) +#define PAD_VARARGS_DOWN \ + (FUNCTION_ARG_PADDING (TYPE_MODE (type), type) == downward) /* Define this macro to be a nonzero value if the location where a function argument is passed depends on whether or not it is a named argument. */ diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index c9ca07d1682..2c82c4044f0 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -3762,6 +3762,17 @@ controlled by @code{PARM_BOUNDARY}. If this macro is not defined, all such arguments are padded down if @code{BYTES_BIG_ENDIAN} is true. @end defmac +@defmac BLOCK_REG_PADDING (@var{mode}, @var{type}, @var{first}) +Specify padding for the last element of a block move between registers and +memory. @var{first} is nonzero if this is the only element. Defining this +macro allows better control of register function parameters on big-endian +machines, without using @code{PARALLEL} rtl. In particular, +@code{MUST_PASS_IN_STACK} need not test padding and mode of types in +registers, as there is no longer a "wrong" part of a register; For example, +a three byte aggregate may be passed in the high part of a register if so +required. +@end defmac + @defmac FUNCTION_ARG_BOUNDARY (@var{mode}, @var{type}) If defined, a C expression that gives the alignment boundary, in bits, of an argument with the specified mode and type. If it is not defined, diff --git a/gcc/expr.c b/gcc/expr.c index 1b7eab178d8..80efb3f459d 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -2240,18 +2240,13 @@ gen_group_rtx (rtx orig) return gen_rtx_PARALLEL (GET_MODE (orig), gen_rtvec_v (length, tmps)); } -/* Emit code to move a block SRC to a block DST, where DST is non-consecutive - registers represented by a PARALLEL. SSIZE represents the total size of - block SRC in bytes, or -1 if not known. */ -/* ??? If SSIZE % UNITS_PER_WORD != 0, we make the blatant assumption that - the balance will be in what would be the low-order memory addresses, i.e. - left justified for big endian, right justified for little endian. This - happens to be true for the targets currently using this support. If this - ever changes, a new target macro along the lines of FUNCTION_ARG_PADDING - would be needed. */ +/* Emit code to move a block ORIG_SRC of type TYPE to a block DST, + where DST is non-consecutive registers represented by a PARALLEL. + SSIZE represents the total size of block ORIG_SRC in bytes, or -1 + if not known. */ void -emit_group_load (rtx dst, rtx orig_src, int ssize) +emit_group_load (rtx dst, rtx orig_src, tree type ATTRIBUTE_UNUSED, int ssize) { rtx *tmps, src; int start, i; @@ -2279,7 +2274,17 @@ emit_group_load (rtx dst, rtx orig_src, int ssize) /* Handle trailing fragments that run over the size of the struct. */ if (ssize >= 0 && bytepos + (HOST_WIDE_INT) bytelen > ssize) { - shift = (bytelen - (ssize - bytepos)) * BITS_PER_UNIT; + /* Arrange to shift the fragment to where it belongs. + extract_bit_field loads to the lsb of the reg. */ + if ( +#ifdef BLOCK_REG_PADDING + BLOCK_REG_PADDING (GET_MODE (orig_src), type, i == start) + == (BYTES_BIG_ENDIAN ? upward : downward) +#else + BYTES_BIG_ENDIAN +#endif + ) + shift = (bytelen - (ssize - bytepos)) * BITS_PER_UNIT; bytelen = ssize - bytepos; if (bytelen <= 0) abort (); @@ -2304,7 +2309,8 @@ emit_group_load (rtx dst, rtx orig_src, int ssize) /* Optimize the access just a bit. */ if (GET_CODE (src) == MEM - && MEM_ALIGN (src) >= GET_MODE_ALIGNMENT (mode) + && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (src)) + || MEM_ALIGN (src) >= GET_MODE_ALIGNMENT (mode)) && bytepos * BITS_PER_UNIT % GET_MODE_ALIGNMENT (mode) == 0 && bytelen == GET_MODE_SIZE (mode)) { @@ -2360,7 +2366,7 @@ emit_group_load (rtx dst, rtx orig_src, int ssize) bytepos * BITS_PER_UNIT, 1, NULL_RTX, mode, mode, ssize); - if (BYTES_BIG_ENDIAN && shift) + if (shift) expand_binop (mode, ashl_optab, tmps[i], GEN_INT (shift), tmps[i], 0, OPTAB_WIDEN); } @@ -2391,12 +2397,13 @@ emit_group_move (rtx dst, rtx src) XEXP (XVECEXP (src, 0, i), 0)); } -/* Emit code to move a block SRC to a block DST, where SRC is non-consecutive - registers represented by a PARALLEL. SSIZE represents the total size of - block DST, or -1 if not known. */ +/* Emit code to move a block SRC to a block ORIG_DST of type TYPE, + where SRC is non-consecutive registers represented by a PARALLEL. + SSIZE represents the total size of block ORIG_DST, or -1 if not + known. */ void -emit_group_store (rtx orig_dst, rtx src, int ssize) +emit_group_store (rtx orig_dst, rtx src, tree type ATTRIBUTE_UNUSED, int ssize) { rtx *tmps, dst; int start, i; @@ -2440,8 +2447,8 @@ emit_group_store (rtx orig_dst, rtx src, int ssize) the temporary. */ temp = assign_stack_temp (GET_MODE (dst), ssize, 0); - emit_group_store (temp, src, ssize); - emit_group_load (dst, temp, ssize); + emit_group_store (temp, src, type, ssize); + emit_group_load (dst, temp, type, ssize); return; } else if (GET_CODE (dst) != MEM && GET_CODE (dst) != CONCAT) @@ -2462,7 +2469,16 @@ emit_group_store (rtx orig_dst, rtx src, int ssize) /* Handle trailing fragments that run over the size of the struct. */ if (ssize >= 0 && bytepos + (HOST_WIDE_INT) bytelen > ssize) { - if (BYTES_BIG_ENDIAN) + /* store_bit_field always takes its value from the lsb. + Move the fragment to the lsb if it's not already there. */ + if ( +#ifdef BLOCK_REG_PADDING + BLOCK_REG_PADDING (GET_MODE (orig_dst), type, i == start) + == (BYTES_BIG_ENDIAN ? upward : downward) +#else + BYTES_BIG_ENDIAN +#endif + ) { int shift = (bytelen - (ssize - bytepos)) * BITS_PER_UNIT; expand_binop (mode, ashr_optab, tmps[i], GEN_INT (shift), @@ -2495,7 +2511,8 @@ emit_group_store (rtx orig_dst, rtx src, int ssize) /* Optimize the access just a bit. */ if (GET_CODE (dest) == MEM - && MEM_ALIGN (dest) >= GET_MODE_ALIGNMENT (mode) + && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (dest)) + || MEM_ALIGN (dest) >= GET_MODE_ALIGNMENT (mode)) && bytepos * BITS_PER_UNIT % GET_MODE_ALIGNMENT (mode) == 0 && bytelen == GET_MODE_SIZE (mode)) emit_move_insn (adjust_address (dest, mode, bytepos), tmps[i]); @@ -4076,7 +4093,7 @@ emit_push_insn (rtx x, enum machine_mode mode, tree type, rtx size, /* Handle calls that pass values in multiple non-contiguous locations. The Irix 6 ABI has examples of this. */ if (GET_CODE (reg) == PARALLEL) - emit_group_load (reg, x, -1); /* ??? size? */ + emit_group_load (reg, x, type, -1); else move_block_to_reg (REGNO (reg), x, partial, mode); } @@ -4276,7 +4293,8 @@ expand_assignment (tree to, tree from, int want_value, /* Handle calls that return values in multiple non-contiguous locations. The Irix 6 ABI has examples of this. */ if (GET_CODE (to_rtx) == PARALLEL) - emit_group_load (to_rtx, value, int_size_in_bytes (TREE_TYPE (from))); + emit_group_load (to_rtx, value, TREE_TYPE (from), + int_size_in_bytes (TREE_TYPE (from))); else if (GET_MODE (to_rtx) == BLKmode) emit_block_move (to_rtx, value, expr_size (from), BLOCK_OP_NORMAL); else @@ -4310,7 +4328,8 @@ expand_assignment (tree to, tree from, int want_value, temp = expand_expr (from, 0, GET_MODE (to_rtx), 0); if (GET_CODE (to_rtx) == PARALLEL) - emit_group_load (to_rtx, temp, int_size_in_bytes (TREE_TYPE (from))); + emit_group_load (to_rtx, temp, TREE_TYPE (from), + int_size_in_bytes (TREE_TYPE (from))); else emit_move_insn (to_rtx, temp); @@ -4720,7 +4739,8 @@ store_expr (tree exp, rtx target, int want_value) /* Handle calls that return values in multiple non-contiguous locations. The Irix 6 ABI has examples of this. */ else if (GET_CODE (target) == PARALLEL) - emit_group_load (target, temp, int_size_in_bytes (TREE_TYPE (exp))); + emit_group_load (target, temp, TREE_TYPE (exp), + int_size_in_bytes (TREE_TYPE (exp))); else if (GET_MODE (temp) == BLKmode) emit_block_move (target, temp, expr_size (exp), (want_value & 2 @@ -9268,7 +9288,7 @@ expand_expr (tree exp, rtx target, enum machine_mode tmode, enum expand_modifier /* Handle calls that pass values in multiple non-contiguous locations. The Irix 6 ABI has examples of this. */ - emit_group_store (memloc, op0, + emit_group_store (memloc, op0, inner_type, int_size_in_bytes (inner_type)); else emit_move_insn (memloc, op0); diff --git a/gcc/expr.h b/gcc/expr.h index ddf8505fa87..ba84fd02ae8 100644 --- a/gcc/expr.h +++ b/gcc/expr.h @@ -68,7 +68,6 @@ enum expand_modifier {EXPAND_NORMAL = 0, EXPAND_STACK_PARM = 2, EXPAND_SUM, enum direction {none, upward, downward}; -#ifdef TREE_CODE /* Don't lose if tree.h not included. */ /* Structure to record the size of a sequence of arguments as the sum of a tree-expression and a constant. This structure is also used to store offsets from the stack, which might be negative, @@ -96,8 +95,9 @@ struct locate_and_pad_arg_data /* The amount that the stack pointer needs to be adjusted to force alignment for the next argument. */ struct args_size alignment_pad; + /* Which way we should pad this arg. */ + enum direction where_pad; }; -#endif /* Add the value of the tree INC to the `struct args_size' TO. */ @@ -427,7 +427,7 @@ extern rtx gen_group_rtx (rtx); /* Load a BLKmode value into non-consecutive registers represented by a PARALLEL. */ -extern void emit_group_load (rtx, rtx, int); +extern void emit_group_load (rtx, rtx, tree, int); /* Move a non-consecutive group of registers represented by a PARALLEL into a non-consecutive group of registers represented by a PARALLEL. */ @@ -435,12 +435,10 @@ extern void emit_group_move (rtx, rtx); /* Store a BLKmode value from non-consecutive registers represented by a PARALLEL. */ -extern void emit_group_store (rtx, rtx, int); +extern void emit_group_store (rtx, rtx, tree, int); -#ifdef TREE_CODE /* Copy BLKmode object from a set of registers. */ extern rtx copy_blkmode_from_reg (rtx, rtx, tree); -#endif /* Mark REG as holding a parameter for the next CALL_INSN. */ extern void use_reg (rtx *, rtx); @@ -490,7 +488,6 @@ extern rtx emit_move_insn_1 (rtx, rtx); and return an rtx to address the beginning of the block. */ extern rtx push_block (rtx, int, int); -#ifdef TREE_CODE /* Generate code to push something onto the stack, given its mode and type. */ extern void emit_push_insn (rtx, enum machine_mode, tree, rtx, unsigned int, int, rtx, int, rtx, rtx, int, rtx); @@ -503,7 +500,6 @@ extern rtx expand_assignment (tree, tree, int, int); If SUGGEST_REG is nonzero, copy the value through a register and return that register, if that is possible. */ extern rtx store_expr (tree, rtx, int); -#endif /* Given an rtx that may include add and multiply operations, generate them as insns and return a pseudo-reg containing the value. @@ -535,7 +531,6 @@ extern void clear_pending_stack_adjust (void); /* Pop any previously-pushed arguments that have not been popped yet. */ extern void do_pending_stack_adjust (void); -#ifdef TREE_CODE /* Return the tree node and offset if a given argument corresponds to a string constant. */ extern tree string_constant (tree, tree *); @@ -549,7 +544,6 @@ extern void jumpif (tree, rtx); /* Generate code to evaluate EXP and jump to IF_FALSE_LABEL if the result is zero, or IF_TRUE_LABEL if the result is one. */ extern void do_jump (tree, rtx, rtx); -#endif /* Generate rtl to compare two rtx's, will call emit_cmp_insn. */ extern rtx compare_from_rtx (rtx, rtx, enum rtx_code, int, enum machine_mode, @@ -566,7 +560,6 @@ extern int try_tablejump (tree, tree, tree, tree, rtx, rtx); extern unsigned int case_values_threshold (void); -#ifdef TREE_CODE /* rtl.h and tree.h were included. */ /* Return an rtx for the size in bytes of the value of an expr. */ extern rtx expr_size (tree); @@ -592,10 +585,13 @@ extern rtx prepare_call_address (rtx, tree, rtx *, int, int); extern rtx expand_call (tree, rtx, int); +#ifdef TREE_CODE extern rtx expand_shift (enum tree_code, enum machine_mode, rtx, tree, rtx, int); extern rtx expand_divmod (int, enum tree_code, enum machine_mode, rtx, rtx, rtx, int); +#endif + extern void locate_and_pad_parm (enum machine_mode, tree, int, int, tree, struct args_size *, struct locate_and_pad_arg_data *); @@ -608,7 +604,6 @@ extern rtx label_rtx (tree); list of its containing function (i.e. it is treated as reachable even if how is not obvious). */ extern rtx force_label_rtx (tree); -#endif /* Indicate how an input argument register was promoted. */ extern rtx promoted_input_arg (unsigned int, enum machine_mode *, int *); @@ -691,7 +686,6 @@ extern rtx widen_memory_access (rtx, enum machine_mode, HOST_WIDE_INT); valid address. */ extern rtx validize_mem (rtx); -#ifdef TREE_CODE /* Given REF, either a MEM or a REG, and T, either the type of X or the expression corresponding to REF, set RTX_UNCHANGING_P if appropriate. */ @@ -706,7 +700,6 @@ extern void set_mem_attributes (rtx, tree, int); we alter MEM_OFFSET according to T then we should subtract BITPOS expecting that it'll be added back in later. */ extern void set_mem_attributes_minus_bitpos (rtx, tree, int, HOST_WIDE_INT); -#endif /* Assemble the static constant template for function entry trampolines. */ extern rtx assemble_trampoline_template (void); @@ -738,10 +731,8 @@ extern rtx force_reg (enum machine_mode, rtx); /* Return given rtx, copied into a new temp reg if it was in memory. */ extern rtx force_not_mem (rtx); -#ifdef TREE_CODE /* Return mode and signedness to use when object is promoted. */ extern enum machine_mode promote_mode (tree, enum machine_mode, int *, int); -#endif /* Remove some bytes from the stack. An rtx says how many. */ extern void adjust_stack (rtx); @@ -812,9 +803,7 @@ extern void do_jump_by_parts_equality_rtx (rtx, rtx, rtx); extern void do_jump_by_parts_greater_rtx (enum machine_mode, int, rtx, rtx, rtx, rtx); -#ifdef TREE_CODE /* Don't lose if tree.h not included. */ extern void mark_seen_cases (tree, unsigned char *, HOST_WIDE_INT, int); -#endif extern int vector_mode_valid_p (enum machine_mode); diff --git a/gcc/function.c b/gcc/function.c index 301d59d498d..c3f4dc604d4 100644 --- a/gcc/function.c +++ b/gcc/function.c @@ -4507,6 +4507,8 @@ assign_parms (tree fndecl) offset_rtx)); set_mem_attributes (stack_parm, parm, 1); + if (entry_parm && MEM_ATTRS (stack_parm)->align < PARM_BOUNDARY) + set_mem_align (stack_parm, PARM_BOUNDARY); /* Set also REG_ATTRS if parameter was passed in a register. */ if (entry_parm) @@ -4538,6 +4540,7 @@ assign_parms (tree fndecl) locations. The Irix 6 ABI has examples of this. */ if (GET_CODE (entry_parm) == PARALLEL) emit_group_store (validize_mem (stack_parm), entry_parm, + TREE_TYPE (parm), int_size_in_bytes (TREE_TYPE (parm))); else @@ -4644,7 +4647,12 @@ assign_parms (tree fndecl) Set DECL_RTL to that place. */ - if (nominal_mode == BLKmode || GET_CODE (entry_parm) == PARALLEL) + if (nominal_mode == BLKmode +#ifdef BLOCK_REG_PADDING + || (locate.where_pad == (BYTES_BIG_ENDIAN ? upward : downward) + && GET_MODE_SIZE (promoted_mode) < UNITS_PER_WORD) +#endif + || GET_CODE (entry_parm) == PARALLEL) { /* If a BLKmode arrives in registers, copy it to a stack slot. Handle calls that pass values in multiple non-contiguous @@ -4680,7 +4688,7 @@ assign_parms (tree fndecl) /* Handle calls that pass values in multiple non-contiguous locations. The Irix 6 ABI has examples of this. */ if (GET_CODE (entry_parm) == PARALLEL) - emit_group_store (mem, entry_parm, size); + emit_group_store (mem, entry_parm, TREE_TYPE (parm), size); else if (size == 0) ; @@ -4692,7 +4700,13 @@ assign_parms (tree fndecl) enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0); - if (mode != BLKmode) + if (mode != BLKmode +#ifdef BLOCK_REG_PADDING + && (size == UNITS_PER_WORD + || (BLOCK_REG_PADDING (mode, TREE_TYPE (parm), 1) + != (BYTES_BIG_ENDIAN ? upward : downward))) +#endif + ) { rtx reg = gen_rtx_REG (mode, REGNO (entry_parm)); emit_move_insn (change_address (mem, mode, 0), reg); @@ -4703,7 +4717,13 @@ assign_parms (tree fndecl) to memory. Note that the previous test doesn't handle all cases (e.g. SIZE == 3). */ else if (size != UNITS_PER_WORD - && BYTES_BIG_ENDIAN) +#ifdef BLOCK_REG_PADDING + && (BLOCK_REG_PADDING (mode, TREE_TYPE (parm), 1) + == downward) +#else + && BYTES_BIG_ENDIAN +#endif + ) { rtx tem, x; int by = (UNITS_PER_WORD - size) * BITS_PER_UNIT; @@ -5367,6 +5387,7 @@ locate_and_pad_parm (enum machine_mode passed_mode, tree type, int in_regs, = type ? size_in_bytes (type) : size_int (GET_MODE_SIZE (passed_mode)); where_pad = FUNCTION_ARG_PADDING (passed_mode, type); boundary = FUNCTION_ARG_BOUNDARY (passed_mode, type); + locate->where_pad = where_pad; #ifdef ARGS_GROW_DOWNWARD locate->slot_offset.constant = -initial_offset_ptr->constant; @@ -7036,6 +7057,7 @@ expand_function_end (void) emit_group_move (real_decl_rtl, decl_rtl); else emit_group_load (real_decl_rtl, decl_rtl, + TREE_TYPE (decl_result), int_size_in_bytes (TREE_TYPE (decl_result))); } else diff --git a/gcc/stmt.c b/gcc/stmt.c index 334e751bcde..a720691404f 100644 --- a/gcc/stmt.c +++ b/gcc/stmt.c @@ -2955,7 +2955,7 @@ expand_value_return (rtx val) val = convert_modes (mode, old_mode, val, unsignedp); #endif if (GET_CODE (return_reg) == PARALLEL) - emit_group_load (return_reg, val, int_size_in_bytes (type)); + emit_group_load (return_reg, val, type, int_size_in_bytes (type)); else emit_move_insn (return_reg, val); }