Add CRIS atomic patterns for 1, 2, and 4 bytes.

* config/cris/cris.c (cris_emit_trap_for_misalignment): New function.
	* config/cris/cris-protos.h: Declare it.
	* config/cris/cris.h [!TARGET_DEFAULT, TARGET_CPU_DEFAULT == 32] 
	(TARGET_DEFAULT): Add alignment by 32.
	[!TARGET_DEFAULT, TARGET_CPU_DEFAULT == 10] (TARGET_DEFAULT): New
	case, as TARGET_CPU_DEFAULT == 0 but with alignment as for
	TARGET_CPU_DEFAULT == 32. 
	(TARGET_TRAP_UNALIGNED_ATOMIC): New macro.
	* config/cris/cris.md: Include sync.md.  Avoid allocating specific
	numbers by replacing the define_constants for all UNSPECs with the
	equivalent define_c_enum construct.
	* config/cris/cris.opt (mtrap-unaligned-atomic): New option.
	* config/cris/sync.md: New file.

From-SVN: r188096
This commit is contained in:
Hans-Peter Nilsson 2012-06-01 05:49:16 +00:00 committed by Hans-Peter Nilsson
parent 183d6a7e72
commit 21ed44440f
7 changed files with 379 additions and 14 deletions

View file

@ -1,3 +1,20 @@
2012-06-01 Hans-Peter Nilsson <hp@axis.com>
Add CRIS atomic patterns for 1, 2, and 4 bytes.
* config/cris/cris.c (cris_emit_trap_for_misalignment): New function.
* config/cris/cris-protos.h: Declare it.
* config/cris/cris.h [!TARGET_DEFAULT, TARGET_CPU_DEFAULT == 32]
(TARGET_DEFAULT): Add alignment by 32.
[!TARGET_DEFAULT, TARGET_CPU_DEFAULT == 10] (TARGET_DEFAULT): New
case, as TARGET_CPU_DEFAULT == 0 but with alignment as for
TARGET_CPU_DEFAULT == 32.
(TARGET_TRAP_UNALIGNED_ATOMIC): New macro.
* config/cris/cris.md: Include sync.md. Avoid allocating specific
numbers by replacing the define_constants for all UNSPECs with the
equivalent define_c_enum construct.
* config/cris/cris.opt (mtrap-unaligned-atomic): New option.
* config/cris/sync.md: New file.
2012-05-31 Matt Turner <mattst88@gmail.com>
* config/mips/4600.md (r4600_imul_si): Rename from r4600_imul.

View file

@ -49,6 +49,7 @@ extern rtx cris_gen_movem_load (rtx, rtx, int);
extern rtx cris_emit_movem_store (rtx, rtx, int, bool);
extern void cris_expand_pic_call_address (rtx *);
extern void cris_order_for_addsi3 (rtx *, int);
extern void cris_emit_trap_for_misalignment (rtx);
#endif /* RTX_CODE */
extern void cris_asm_output_label_ref (FILE *, char *);
extern void cris_target_asm_named_section (const char *, unsigned int, tree);

View file

@ -1922,6 +1922,39 @@ cris_simple_epilogue (void)
return true;
}
/* Emit checking that MEM is aligned for an access in MODE, failing
that, executing a "break 8" (or call to abort, if "break 8" is
disabled). */
void
cris_emit_trap_for_misalignment (rtx mem)
{
rtx addr, reg, ok_label, and, jmp;
int natural_alignment;
gcc_assert (MEM_P (mem));
natural_alignment = GET_MODE_SIZE (GET_MODE (mem));
addr = XEXP (mem, 0);
reg = force_reg (Pmode, addr);
ok_label = gen_label_rtx ();
/* This will yield a btstq without a separate register used, usually -
with the exception for PRE hoisting the "and" but not the branch
around the trap: see gcc.dg/target/cris/sync-3s.c. */
and = gen_rtx_AND (Pmode, reg, GEN_INT (natural_alignment - 1));
emit_cmp_and_jump_insns (force_reg (SImode, and), const0_rtx, EQ,
NULL_RTX, Pmode, 1, ok_label);
jmp = get_last_insn ();
gcc_assert (JUMP_P (jmp));
/* While this isn't mudflap, it is a similar kind of assertion.
If PRED_MUDFLAP stops working, use something else or introduce a
more suitable assertion predication type. */
predict_insn_def (jmp, PRED_MUDFLAP, TAKEN);
expand_builtin_trap ();
emit_label (ok_label);
}
/* Expand a return insn (just one insn) marked as using SRP or stack
slot depending on parameter ON_STACK. */

View file

@ -286,15 +286,25 @@ extern int cris_cpu_version;
#define TARGET_CPU_DEFAULT CRIS_CPU_BASE
#endif
/* Default target_flags if no switches specified. */
/* Default target_flags if no switches specified.
The alignment-by-32 is to make builtin atomic support for v10 and v32
work for *-elf for types without specified alignment (like plain
"int"). See top comment in sync.md. */
#ifndef TARGET_DEFAULT
# if TARGET_CPU_DEFAULT == 32
# define TARGET_DEFAULT \
(MASK_STACK_ALIGN \
+ MASK_CONST_ALIGN + MASK_DATA_ALIGN \
+ MASK_ALIGN_BY_32 \
+ MASK_PROLOGUE_EPILOGUE)
# else /* 10 */
# define TARGET_DEFAULT \
# elif TARGET_CPU_DEFAULT == 10
# define TARGET_DEFAULT \
(MASK_SIDE_EFFECT_PREFIXES + MASK_STACK_ALIGN \
+ MASK_CONST_ALIGN + MASK_DATA_ALIGN \
+ MASK_ALIGN_BY_32 \
+ MASK_PROLOGUE_EPILOGUE + MASK_MUL_BUG)
# else /* 0 */
# define TARGET_DEFAULT \
(MASK_SIDE_EFFECT_PREFIXES + MASK_STACK_ALIGN \
+ MASK_CONST_ALIGN + MASK_DATA_ALIGN \
+ MASK_PROLOGUE_EPILOGUE + MASK_MUL_BUG)
@ -314,6 +324,16 @@ extern int cris_cpu_version;
#define TARGET_TRAP_USING_BREAK8 \
(cris_trap_using_break8 == 2 ? TARGET_HAS_BREAK : cris_trap_using_break8)
/* The < v10 atomics turn off interrupts, so they don't need alignment.
Incidentally, by default alignment is off there causing variables to
be default unaligned all over, so we'd have to make support
libraries use a proper atomic type (instead of "int"), one we'd
specify as aligned. */
#define TARGET_TRAP_UNALIGNED_ATOMIC \
(cris_trap_unaligned_atomic == 2 \
? (TARGET_V32 || cris_cpu_version == 10) \
: cris_trap_unaligned_atomic)
/* Node: Storage Layout */
#define BITS_BIG_ENDIAN 0

View file

@ -55,46 +55,46 @@
;; The movsi for a gotless symbol could be split (post reload).
(define_constants
(define_c_enum ""
[
;; PLT reference from call expansion: operand 0 is the address,
;; the mode is VOIDmode. Always wrapped in CONST.
;; The value is relative to the GOT.
(CRIS_UNSPEC_PLT_GOTREL 0)
CRIS_UNSPEC_PLT_GOTREL
;; PLT reference from call expansion: operand 0 is the address,
;; the mode is VOIDmode. Always wrapped in CONST.
;; The value is relative to the PC. It's arch-dependent whether
;; the offset counts from the start or the end of the current item.
(CRIS_UNSPEC_PLT_PCREL 1)
CRIS_UNSPEC_PLT_PCREL
;; The address of the global offset table as a source operand.
(CRIS_UNSPEC_GOT 2)
CRIS_UNSPEC_GOT
;; The offset from the global offset table to the operand.
(CRIS_UNSPEC_GOTREL 3)
CRIS_UNSPEC_GOTREL
;; The PC-relative offset to the operand. It's arch-dependent whether
;; the offset counts from the start or the end of the current item.
(CRIS_UNSPEC_PCREL 4)
CRIS_UNSPEC_PCREL
;; The index into the global offset table of a symbol, while
;; also generating a GOT entry for the symbol.
(CRIS_UNSPEC_GOTREAD 5)
CRIS_UNSPEC_GOTREAD
;; Similar to CRIS_UNSPEC_GOTREAD, but also generating a PLT entry.
(CRIS_UNSPEC_PLTGOTREAD 6)
CRIS_UNSPEC_PLTGOTREAD
;; Condition for v32 casesi jump, since it needs to have if_then_else
;; form with register as one branch and default label as other.
;; Operand 0 is const_int 0.
(CRIS_UNSPEC_CASESI 7)
CRIS_UNSPEC_CASESI
;; Stack frame deallocation barrier.
(CRIS_UNSPEC_FRAME_DEALLOC 8)
CRIS_UNSPEC_FRAME_DEALLOC
;; Swap all 32 bits of the operand; 31 <=> 0, 30 <=> 1...
(CRIS_UNSPEC_SWAP_BITS 9)
CRIS_UNSPEC_SWAP_BITS
])
;; Register numbers.
@ -4165,6 +4165,8 @@
3 [(match_dup 0)
(match_dup 1)]))]
"")
(include "sync.md")
;; Splits for all cases in side-effect insns where (possibly after reload
;; and register allocation) rx and ry in [rx=ry+i] are equal.

View file

@ -179,6 +179,10 @@ mtrap-using-break8
Target Report Var(cris_trap_using_break8) Init(2)
Emit traps as \"break 8\", default for CRIS v3 and up. If disabled, calls to abort() are used.
mtrap-unaligned-atomic
Target Report Var(cris_trap_unaligned_atomic) Init(2)
Emit checks causing \"break 8\" instructions to execute when applying atomic builtins on misaligned memory
; TARGET_SVINTO: Currently this just affects alignment. FIXME:
; Redundant with TARGET_ALIGN_BY_32, or put machine stuff here?
; This and the others below could just as well be variables and

288
gcc/config/cris/sync.md Normal file
View file

@ -0,0 +1,288 @@
;; GCC machine description for CRIS atomic memory sequences.
;; Copyright (C) 2012
;; Free Software Foundation, Inc.
;;
;; This file is part of GCC.
;;
;; GCC is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.
;;
;; GCC is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>.
;; The CRIS atomic support yields code in three flavors, depending on
;; the CPU for which code is generated:
;;
;; - Plain old CRIS v0 (..v8)
;; - CRIS v10 (as used in ETRAX 100 LX)
;; - CRIS v32 (as used in ETRAX FS)
;;
;; The last two alternatives are similar, of LL/SC type. They may
;; fail for other reasons; an exception, a cache miss or a bus request
;; from other parts of the system. The difference between them is
;; just in what condition-codes are used to track LL and success or
;; failure for the store. See the chapter on integral read-write
;; operations, chapter 1.13 in "ETRAX 100LX Programmers Manual",
;; <http://www.axis.com/files/tech_notes/etrax_100lx_prog_man-050519.pdf>
;; and chapter 2.1 in "ETRAX FS Designer's reference",
;; <http://www.axis.com/files/manuals/etrax_fs_des_ref-070821.pdf>.
;; Note that the datum being stored has to be contained fully within a
;; cache-line to be integral. A failure to store the data integrally
;; will be flagged, but the store may still have happened in part,
;; which translates most usefully into the data having to be
;; "naturally aligned" to work. Natural alignment is verified in the
;; generated code and will by default cause for unaligned pointers a
;; "break 8" to be executed or optionally a call to abort(). Beware
;; that options -m16bit and -m8bit may cause data to be unaligned
;; where it was otherwise aligned. Data has a better chance of being
;; aligned if it is declared with e.g. __attribute__ ((__align__ (4))).
;;
;; The "plain old v0..v8 flavor" just assumes there's a single CPU in
;; the system, that no other parts of the system have access to memory
;; used for atomic accesses and since there's no user mode without
;; access to interrupt flags (another assumption), it just turns off
;; interrupts while doing the access. Here, alignment is neither
;; required nor asserted.
(define_c_enum ""
[
CRIS_UNSPEC_ATOMIC_OP
CRIS_UNSPEC_ATOMIC_SWAP_MEM
CRIS_UNSPEC_ATOMIC_SWAP_BOOL
])
(define_constants [(CRIS_CCR_INTERRUPT_BIT 5)])
;; We use "mult" as a placeholder for "nand" (which does not have a
;; separate binary rtx operation) so we can use an iterator in the
;; define_expand and define_insn and avoid having a separate
;; mostly-identical copy. You will see the "mult" operator in rtl
;; dumps, but it shouldn't matter as its use has one of its operands
;; inside an unspec_volatile.
(define_code_iterator atomic_op [plus minus ior and xor mult])
(define_code_attr atomic_op_name
[(plus "add") (minus "sub") (and "and") (ior "or") (xor "xor") (mult "nand")])
;; Pairs of these are used to insert the "not" after the "and" for nand.
(define_code_attr atomic_op_mnem_pre ;; Upper-case only to sinplify testing.
[(plus "Add.d") (minus "Sub.d") (and "And.d") (ior "Or.d") (xor "Xor")
(mult "aNd.d")])
(define_code_attr atomic_op_mnem_post_op3
[(plus "") (minus "") (and "") (ior "") (xor "") (mult "not %3\;")])
(define_expand "atomic_fetch_<atomic_op_name><mode>"
[(match_operand:BWD 0 "register_operand")
(match_operand:BWD 1 "memory_operand")
(match_operand:BWD 2 "register_operand")
(match_operand 3)
(atomic_op:BWD (match_dup 0) (match_dup 1))]
""
{
if (<MODE>mode != QImode && TARGET_TRAP_UNALIGNED_ATOMIC)
cris_emit_trap_for_misalignment (operands[1]);
expand_mem_thread_fence (INTVAL (operands[3]));
emit_insn (gen_cris_atomic_fetch_<atomic_op_name><mode>_1 (operands[0],
operands[1],
operands[2]));
expand_mem_thread_fence (INTVAL (operands[3]));
DONE;
})
(define_insn "cris_atomic_fetch_<atomic_op_name><mode>_1"
[(set (match_operand:BWD 1 "memory_operand" "+Q")
(atomic_op:BWD
(unspec_volatile:BWD [(match_dup 1)] CRIS_UNSPEC_ATOMIC_OP)
;; FIXME: relax this for plus, minus, and, ior.
(match_operand:BWD 2 "register_operand" "r")))
(set (match_operand:BWD 0 "register_operand" "=&r")
(match_dup 1))
(clobber (match_scratch:SI 3 "=&r"))]
""
{
/* Can't be too sure; better ICE if this happens. */
gcc_assert (!reg_overlap_mentioned_p (operands[2], operands[1]));
if (TARGET_V32)
return
"clearf p\n"
".Lsync.%=:\;"
"move<m> %1,%0\;"
"move.d %0,%3\;"
"<atomic_op_mnem_pre> %2,%3\;<atomic_op_mnem_post_op3>"
"ax\;"
"move<m> %3,%1\;"
"bcs .Lsync.%=\;"
"clearf p";
else if (cris_cpu_version == 10)
return
"clearf\n"
".Lsync.%=:\;"
"move<m> %1,%0\;"
"move.d %0,%3\;"
"<atomic_op_mnem_pre> %2,%3\;<atomic_op_mnem_post_op3>"
"ax\;"
"move<m> %3,%1\;"
"bwf .Lsync.%=\;"
"clearf";
else
{
/* This one is for CRIS versions without load-locked-store-conditional
machinery; assume single-core-non-shared-memory without user
mode/supervisor mode distinction, and just disable interrupts
while performing the operation.
Rather than making this pattern more complex by freeing another
register or stack position to save condition codes (the value
of the interrupt-enabled bit), we check whether interrupts were
enabled before we disabled them and branch to a version
with/without afterwards re-enabling them. */
rtx ops[5];
/* We have no available macro to stringify CRIS_CCR_INTERRUPT_BIT. */
memcpy (ops, operands, sizeof(ops));
ops[4] = GEN_INT (CRIS_CCR_INTERRUPT_BIT);
output_asm_insn ("move $ccr,%3\;"
"di\;"
"move<m> %1,%0\;"
"btstq %4,%3",
ops);
return
"bmi .Lsync.irqon.%=\;"
"move.d %0,%3\;"
"<atomic_op_mnem_pre> %2,%3\;<atomic_op_mnem_post_op3>"
"ba .Lsync.irqoff.%=\;"
"move<m> %3,%1\n"
".Lsync.irqon.%=:\;"
"<atomic_op_mnem_pre> %2,%3\;<atomic_op_mnem_post_op3>"
"move<m> %3,%1\;"
"ei\n"
".Lsync.irqoff.%=:";
}
})
;; This pattern is more-or-less assumed to always exist if any of the
;; other atomic patterns exist (see e.g. comment at the
;; can_compare_and_swap_p call in omp-low.c, 4.8 era). We'd slightly
;; prefer atomic_exchange<mode> over this, but having both would be
;; redundant.
(define_expand "atomic_compare_and_swap<mode>"
[(match_operand:SI 0 "register_operand")
(match_operand:BWD 1 "register_operand")
(match_operand:BWD 2 "memory_operand")
(match_operand:BWD 3 "general_operand")
(match_operand:BWD 4 "register_operand")
(match_operand 5)
(match_operand 6)
(match_operand 7)]
""
{
if (<MODE>mode != QImode && TARGET_TRAP_UNALIGNED_ATOMIC)
cris_emit_trap_for_misalignment (operands[2]);
expand_mem_thread_fence (INTVAL (operands[6]));
emit_insn (gen_cris_atomic_compare_and_swap<mode>_1 (operands[0],
operands[1],
operands[2],
operands[3],
operands[4]));
expand_mem_thread_fence (INTVAL (operands[6]));
DONE;
})
(define_insn "cris_atomic_compare_and_swap<mode>_1"
[(set (match_operand:SI 0 "register_operand" "=&r")
(unspec_volatile:SI
[(match_operand:BWD 2 "memory_operand" "+Q")
(match_operand:BWD 3 "general_operand" "g")]
CRIS_UNSPEC_ATOMIC_SWAP_BOOL))
(set (match_operand:BWD 1 "register_operand" "=&r") (match_dup 2))
(set (match_dup 2)
(unspec_volatile:BWD
[(match_dup 2)
(match_dup 3)
(match_operand:BWD 4 "register_operand" "r")]
CRIS_UNSPEC_ATOMIC_SWAP_MEM))]
""
{
if (TARGET_V32)
return
"clearf p\n"
".Lsync.repeat.%=:\;"
"move<m> %2,%1\;"
"cmp<m> %3,%1\;"
"bne .Lsync.after.%=\;"
"seq %0\;"
"ax\;"
"move<m> %4,%2\;"
"bcs .Lsync.repeat.%=\;"
"clearf p\n"
".Lsync.after.%=:";
else if (cris_cpu_version == 10)
return
"clearf\n"
".Lsync.repeat.%=:\;"
"move<m> %2,%1\;"
"cmp<m> %3,%1\;"
"bne .Lsync.after.%=\;"
"seq %0\;"
"ax\;"
"move<m> %4,%2\;"
"bwf .Lsync.repeat.%=\;"
"clearf\n"
".Lsync.after.%=:";
else
{
/* This one is for CRIS versions without load-locked-store-conditional
machinery; assume single-core-non-shared-memory without user
mode/supervisor mode distinction, and just disable interrupts
while performing the operation.
Rather than making this pattern more complex by freeing another
register or stack position to save condition codes (the value
of the interrupt-enabled bit), we check whether interrupts were
enabled before we disabled them and branch to a version
with/without afterwards re-enabling them. */
rtx ops[4];
/* We have no available macro to stringify CRIS_CCR_INTERRUPT_BIT. */
memcpy (ops, operands, sizeof(ops));
ops[3] = GEN_INT (CRIS_CCR_INTERRUPT_BIT);
output_asm_insn ("move $ccr,%0\;"
"di\;"
"move<m> %2,%1\;"
"btstq %3,%0",
ops);
return
"bmi .Lsync.irqon.%=\;"
"nop\;"
"cmp<m> %3,%1\;"
"bne .Lsync.after.%=\;"
"seq %0\;"
"ba .Lsync.after.%=\;"
"move<m> %4,%2\n"
".Lsync.irqon.%=:\;"
"cmp<m> %3,%1\;"
"bne .Lsync.after.%=\;"
"seq %0\;"
"move<m> %4,%2\;"
"ei\n"
".Lsync.after.%=:";
}
})