[ARC] Reimplement ZOL support.

2017-05-22  Claudiu Zissulescu <claziss@synopsys.com>

	* config/arc/arc-c.c (__ARC_LPC_WIDTH__): Add builtin define.
	* config/arc/arc.c (ARC_MAX_LOOP_LENGTH): Define.
	(arc_conditional_register_usage): Remove ARC600 lp_count
	exception.
	(arc_file_start): Emit Tag_ARC_CPU_variation.
	(arc_can_use_doloop_p): New conditions to use ZOLs.
	(hwloop_fail): New function.
	(hwloop_optimize): Likewise.
	(hwloop_pattern_reg): Likewise.
	(arc_doloop_hooks): New struct, to be used with reorg_loops.
	(arc_reorg_loops): New function, calls reorg_loops.
	(arc_reorg): Call arc_reorg_loops.  Remove old ZOL handling.
	(arc600_corereg_hazard): Remove ZOL checking, case handled by
	hwloop_optimize.
	(arc_loop_hazard): Remove function, functionality moved into
	hwloop_optimize.
	(arc_hazard): Remove arc_loop_hazard call.
	(arc_adjust_insn_length): Remove ZOL handling, functionality moved
	into hwloop_optimize.
	(arc_label_align): Remove ZOL handling.
	* config/arc/arc.h (LOOP_ALIGN): Changed to 0.
	* config/arc/arc.md (doloop_begin): Remove pattern.
	(doloop_begin_i): Likewise.
	(doloop_end_i): Likewise.
	(doloop_fallback): Likewise.
	(doloop_fallback_m): Likewise.
	(doloop_end): Reimplement expand.
	(arc_lp): New pattern for LP instruction.
	(loop_end): New pattern.
	(loop_fail): Likewise.
	(decrement_and_branch_until_zero): Likewise.
	* config/arc/arc.opt (mlpc-width): New option.
	* doc/invoke.texi (mlpc-width): Document option.

testsuite/
2017-05-22  Claudiu Zissulescu <claziss@synopsys.com>

 	    * gcc.target/arc/loop-1.c: Deleted.

From-SVN: r251589
This commit is contained in:
Claudiu Zissulescu 2017-09-01 13:43:51 +02:00 committed by Claudiu Zissulescu
parent 782bdf2189
commit a2de90a45a
10 changed files with 584 additions and 690 deletions

View file

@ -1,3 +1,39 @@
2017-09-01 Claudiu Zissulescu <claziss@synopsys.com>
* config/arc/arc-c.c (__ARC_LPC_WIDTH__): Add builtin define.
* config/arc/arc.c (ARC_MAX_LOOP_LENGTH): Define.
(arc_conditional_register_usage): Remove ARC600 lp_count
exception.
(arc_file_start): Emit Tag_ARC_CPU_variation.
(arc_can_use_doloop_p): New conditions to use ZOLs.
(hwloop_fail): New function.
(hwloop_optimize): Likewise.
(hwloop_pattern_reg): Likewise.
(arc_doloop_hooks): New struct, to be used with reorg_loops.
(arc_reorg_loops): New function, calls reorg_loops.
(arc_reorg): Call arc_reorg_loops. Remove old ZOL handling.
(arc600_corereg_hazard): Remove ZOL checking, case handled by
hwloop_optimize.
(arc_loop_hazard): Remove function, functionality moved into
hwloop_optimize.
(arc_hazard): Remove arc_loop_hazard call.
(arc_adjust_insn_length): Remove ZOL handling, functionality moved
into hwloop_optimize.
(arc_label_align): Remove ZOL handling.
* config/arc/arc.h (LOOP_ALIGN): Changed to 0.
* config/arc/arc.md (doloop_begin): Remove pattern.
(doloop_begin_i): Likewise.
(doloop_end_i): Likewise.
(doloop_fallback): Likewise.
(doloop_fallback_m): Likewise.
(doloop_end): Reimplement expand.
(arc_lp): New pattern for LP instruction.
(loop_end): New pattern.
(loop_fail): Likewise.
(decrement_and_branch_until_zero): Likewise.
* config/arc/arc.opt (mlpc-width): New option.
* doc/invoke.texi (mlpc-width): Document option.
2017-09-01 Claudiu Zissulescu <claziss@synopsys.com>
* config/arc/arc.c (arc_ifcvt): Remove use of merge_blocks call.

View file

@ -62,6 +62,8 @@ arc_cpu_cpp_builtins (cpp_reader * pfile)
builtin_define_with_int_value ("__ARC_TLS_REGNO__",
arc_tp_regno);
builtin_define_with_int_value ("__ARC_LPC_WIDTH__", arc_lpcwidth);
builtin_define (TARGET_BIG_ENDIAN
? "__BIG_ENDIAN__" : "__LITTLE_ENDIAN__");
if (TARGET_BIG_ENDIAN)

View file

@ -65,11 +65,15 @@ along with GCC; see the file COPYING3. If not see
#include "rtl-iter.h"
#include "alias.h"
#include "opts.h"
#include "hw-doloop.h"
/* Which cpu we're compiling for (ARC600, ARC601, ARC700). */
static char arc_cpu_name[10] = "";
static const char *arc_cpu_string = arc_cpu_name;
/* Maximum size of a loop. */
#define ARC_MAX_LOOP_LENGTH 4095
/* ??? Loads can handle any constant, stores can only handle small ones. */
/* OTOH, LIMMs cost extra, so their usefulness is limited. */
#define RTX_OK_FOR_OFFSET_P(MODE, X) \
@ -1705,18 +1709,7 @@ arc_conditional_register_usage (void)
i <= ARC_LAST_SIMD_DMA_CONFIG_REG; i++)
reg_alloc_order [i] = i;
}
/* For ARC600, lp_count may not be read in an instruction
following immediately after another one setting it to a new value.
There was some discussion on how to enforce scheduling constraints for
processors with missing interlocks on the gcc mailing list:
http://gcc.gnu.org/ml/gcc/2008-05/msg00021.html .
However, we can't actually use this approach, because for ARC the
delay slot scheduling pass is active, which runs after
machine_dependent_reorg. */
if (TARGET_ARC600)
CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], LP_COUNT);
else if (!TARGET_LP_WR_INTERLOCK)
fixed_regs[LP_COUNT] = 1;
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (!call_used_regs[regno])
CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
@ -6900,28 +6893,33 @@ arc_pass_by_reference (cumulative_args_t ca_v ATTRIBUTE_UNUSED,
/* Implement TARGET_CAN_USE_DOLOOP_P. */
static bool
arc_can_use_doloop_p (const widest_int &iterations, const widest_int &,
arc_can_use_doloop_p (const widest_int &,
const widest_int &iterations_max,
unsigned int loop_depth, bool entered_at_top)
{
if (loop_depth > 1)
/* Considering limitations in the hardware, only use doloop
for innermost loops which must be entered from the top. */
if (loop_depth > 1 || !entered_at_top)
return false;
/* Setting up the loop with two sr instructions costs 6 cycles. */
if (TARGET_ARC700
&& !entered_at_top
&& wi::gtu_p (iterations, 0)
&& wi::leu_p (iterations, flag_pic ? 6 : 3))
/* Check for lp_count width boundary. */
if (arc_lpcwidth != 32
&& (wi::gtu_p (iterations_max, ((1 << arc_lpcwidth) - 1))
|| wi::eq_p (iterations_max, 0)))
return false;
return true;
}
/* NULL if INSN insn is valid within a low-overhead loop.
Otherwise return why doloop cannot be applied. */
/* NULL if INSN insn is valid within a low-overhead loop. Otherwise
return why doloop cannot be applied. */
static const char *
arc_invalid_within_doloop (const rtx_insn *insn)
{
if (CALL_P (insn))
return "Function call in the loop.";
/* FIXME! add here all the ZOL exceptions. */
return NULL;
}
@ -7020,6 +7018,359 @@ workaround_arc_anomaly (void)
}
}
/* A callback for the hw-doloop pass. Called when a loop we have discovered
turns out not to be optimizable; we have to split the loop_end pattern into
a subtract and a test. */
static void
hwloop_fail (hwloop_info loop)
{
rtx test;
rtx insn = loop->loop_end;
if (TARGET_V2
&& (loop->length && (loop->length <= ARC_MAX_LOOP_LENGTH))
&& REG_P (loop->iter_reg))
{
/* TARGET_V2 has dbnz instructions. */
test = gen_dbnz (loop->iter_reg, loop->start_label);
insn = emit_jump_insn_before (test, loop->loop_end);
}
else if (REG_P (loop->iter_reg) && (REGNO (loop->iter_reg) == LP_COUNT))
{
/* We have the lp_count as loop iterator, try to use it. */
emit_insn_before (gen_loop_fail (), loop->loop_end);
test = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_ZNmode, CC_REG),
const0_rtx);
test = gen_rtx_IF_THEN_ELSE (VOIDmode, test,
gen_rtx_LABEL_REF (Pmode, loop->start_label),
pc_rtx);
insn = emit_jump_insn_before (gen_rtx_SET (pc_rtx, test),
loop->loop_end);
}
else
{
emit_insn_before (gen_addsi3 (loop->iter_reg,
loop->iter_reg,
constm1_rtx),
loop->loop_end);
test = gen_rtx_NE (VOIDmode, loop->iter_reg, const0_rtx);
insn = emit_jump_insn_before (gen_cbranchsi4 (test,
loop->iter_reg,
const0_rtx,
loop->start_label),
loop->loop_end);
}
JUMP_LABEL (insn) = loop->start_label;
LABEL_NUSES (loop->start_label)++;
delete_insn (loop->loop_end);
}
/* Optimize LOOP. */
static bool
hwloop_optimize (hwloop_info loop)
{
int i;
edge entry_edge;
basic_block entry_bb, bb;
rtx iter_reg, end_label;
rtx_insn *insn, *seq, *entry_after, *last_insn;
unsigned int length;
bool need_fix = false;
rtx lp_reg = gen_rtx_REG (SImode, LP_COUNT);
if (loop->depth > 1)
{
if (dump_file)
fprintf (dump_file, ";; loop %d is not innermost\n",
loop->loop_no);
return false;
}
if (!loop->incoming_dest)
{
if (dump_file)
fprintf (dump_file, ";; loop %d has more than one entry\n",
loop->loop_no);
return false;
}
if (loop->incoming_dest != loop->head)
{
if (dump_file)
fprintf (dump_file, ";; loop %d is not entered from head\n",
loop->loop_no);
return false;
}
if (loop->has_call || loop->has_asm)
{
if (dump_file)
fprintf (dump_file, ";; loop %d has invalid insn\n",
loop->loop_no);
return false;
}
/* Scan all the blocks to make sure they don't use iter_reg. */
if (loop->iter_reg_used || loop->iter_reg_used_outside)
{
if (dump_file)
fprintf (dump_file, ";; loop %d uses iterator\n",
loop->loop_no);
return false;
}
/* Check if start_label appears before doloop_end. */
length = 0;
for (insn = loop->start_label;
insn && insn != loop->loop_end;
insn = NEXT_INSN (insn))
length += NONDEBUG_INSN_P (insn) ? get_attr_length (insn) : 0;
if (!insn)
{
if (dump_file)
fprintf (dump_file, ";; loop %d start_label not before loop_end\n",
loop->loop_no);
return false;
}
loop->length = length;
if (loop->length > ARC_MAX_LOOP_LENGTH)
{
if (dump_file)
fprintf (dump_file, ";; loop %d too long\n", loop->loop_no);
return false;
}
/* Check if we use a register or not. */
if (!REG_P (loop->iter_reg))
{
if (dump_file)
fprintf (dump_file, ";; loop %d iterator is MEM\n",
loop->loop_no);
return false;
}
/* Check if loop register is lpcount. */
if (REG_P (loop->iter_reg) && (REGNO (loop->iter_reg)) != LP_COUNT)
{
if (dump_file)
fprintf (dump_file, ";; loop %d doesn't use lp_count as loop"
" iterator\n",
loop->loop_no);
/* This loop doesn't use the lp_count, check though if we can
fix it. */
if (TEST_HARD_REG_BIT (loop->regs_set_in_loop, LP_COUNT)
/* In very unique cases we may have LP_COUNT alive. */
|| (loop->incoming_src
&& REGNO_REG_SET_P (df_get_live_out (loop->incoming_src),
LP_COUNT)))
return false;
else
need_fix = true;
}
/* Check for control like instruction as the last instruction of a
ZOL. */
bb = loop->tail;
last_insn = PREV_INSN (loop->loop_end);
while (1)
{
for (; last_insn != BB_HEAD (bb);
last_insn = PREV_INSN (last_insn))
if (NONDEBUG_INSN_P (last_insn))
break;
if (last_insn != BB_HEAD (bb))
break;
if (single_pred_p (bb)
&& single_pred_edge (bb)->flags & EDGE_FALLTHRU
&& single_pred (bb) != ENTRY_BLOCK_PTR_FOR_FN (cfun))
{
bb = single_pred (bb);
last_insn = BB_END (bb);
continue;
}
else
{
last_insn = NULL;
break;
}
}
if (!last_insn)
{
if (dump_file)
fprintf (dump_file, ";; loop %d has no last instruction\n",
loop->loop_no);
return false;
}
if ((TARGET_ARC600_FAMILY || TARGET_HS)
&& INSN_P (last_insn)
&& (JUMP_P (last_insn) || CALL_P (last_insn)
|| GET_CODE (PATTERN (last_insn)) == SEQUENCE
|| get_attr_type (last_insn) == TYPE_BRCC
|| get_attr_type (last_insn) == TYPE_BRCC_NO_DELAY_SLOT))
{
if (loop->length + 2 > ARC_MAX_LOOP_LENGTH)
{
if (dump_file)
fprintf (dump_file, ";; loop %d too long\n", loop->loop_no);
return false;
}
if (dump_file)
fprintf (dump_file, ";; loop %d has a control like last insn;"
"add a nop\n",
loop->loop_no);
last_insn = emit_insn_after (gen_nopv (), last_insn);
}
if (LABEL_P (last_insn))
{
if (dump_file)
fprintf (dump_file, ";; loop %d has a label as last insn;"
"add a nop\n",
loop->loop_no);
last_insn = emit_insn_after (gen_nopv (), last_insn);
}
loop->last_insn = last_insn;
/* Get the loop iteration register. */
iter_reg = loop->iter_reg;
gcc_assert (REG_P (iter_reg));
entry_edge = NULL;
FOR_EACH_VEC_SAFE_ELT (loop->incoming, i, entry_edge)
if (entry_edge->flags & EDGE_FALLTHRU)
break;
if (entry_edge == NULL)
{
if (dump_file)
fprintf (dump_file, ";; loop %d has no fallthru edge jumping"
"into the loop\n",
loop->loop_no);
return false;
}
/* The loop is good. */
end_label = gen_label_rtx ();
loop->end_label = end_label;
/* Place the zero_cost_loop_start instruction before the loop. */
entry_bb = entry_edge->src;
start_sequence ();
if (need_fix)
{
/* The loop uses a R-register, but the lp_count is free, thus
use lp_count. */
emit_insn (gen_movsi (lp_reg, iter_reg));
SET_HARD_REG_BIT (loop->regs_set_in_loop, LP_COUNT);
iter_reg = lp_reg;
if (dump_file)
{
fprintf (dump_file, ";; fix loop %d to use lp_count\n",
loop->loop_no);
}
}
insn = emit_insn (gen_arc_lp (iter_reg,
loop->start_label,
loop->end_label));
seq = get_insns ();
end_sequence ();
entry_after = BB_END (entry_bb);
if (!single_succ_p (entry_bb) || vec_safe_length (loop->incoming) > 1
|| !entry_after)
{
basic_block new_bb;
edge e;
edge_iterator ei;
emit_insn_before (seq, BB_HEAD (loop->head));
seq = emit_label_before (gen_label_rtx (), seq);
new_bb = create_basic_block (seq, insn, entry_bb);
FOR_EACH_EDGE (e, ei, loop->incoming)
{
if (!(e->flags & EDGE_FALLTHRU))
redirect_edge_and_branch_force (e, new_bb);
else
redirect_edge_succ (e, new_bb);
}
make_edge (new_bb, loop->head, 0);
}
else
{
#if 0
while (DEBUG_INSN_P (entry_after)
|| (NOTE_P (entry_after)
&& NOTE_KIND (entry_after) != NOTE_INSN_BASIC_BLOCK
/* Make sure we don't split a call and its corresponding
CALL_ARG_LOCATION note. */
&& NOTE_KIND (entry_after) != NOTE_INSN_CALL_ARG_LOCATION))
entry_after = NEXT_INSN (entry_after);
#endif
entry_after = next_nonnote_insn_bb (entry_after);
gcc_assert (entry_after);
emit_insn_before (seq, entry_after);
}
delete_insn (loop->loop_end);
/* Insert the loop end label before the last instruction of the
loop. */
emit_label_after (end_label, loop->last_insn);
return true;
}
/* A callback for the hw-doloop pass. This function examines INSN; if
it is a loop_end pattern we recognize, return the reg rtx for the
loop counter. Otherwise, return NULL_RTX. */
static rtx
hwloop_pattern_reg (rtx_insn *insn)
{
rtx reg;
if (!JUMP_P (insn) || recog_memoized (insn) != CODE_FOR_loop_end)
return NULL_RTX;
reg = SET_DEST (XVECEXP (PATTERN (insn), 0, 1));
if (!REG_P (reg))
return NULL_RTX;
return reg;
}
static struct hw_doloop_hooks arc_doloop_hooks =
{
hwloop_pattern_reg,
hwloop_optimize,
hwloop_fail
};
/* Run from machine_dependent_reorg, this pass looks for doloop_end insns
and tries to rewrite the RTL of these loops so that proper Blackfin
hardware loops are generated. */
static void
arc_reorg_loops (void)
{
reorg_loops (true, &arc_doloop_hooks);
}
static int arc_reorg_in_progress = 0;
/* ARC's machince specific reorg function. */
@ -7033,204 +7384,17 @@ arc_reorg (void)
long offset;
int changed;
workaround_arc_anomaly ();
cfun->machine->arc_reorg_started = 1;
arc_reorg_in_progress = 1;
/* Link up loop ends with their loop start. */
{
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
if (GET_CODE (insn) == JUMP_INSN
&& recog_memoized (insn) == CODE_FOR_doloop_end_i)
{
rtx_insn *top_label
= as_a <rtx_insn *> (XEXP (XEXP (SET_SRC (XVECEXP (PATTERN (insn), 0, 0)), 1), 0));
rtx num = GEN_INT (CODE_LABEL_NUMBER (top_label));
rtx_insn *lp, *prev = prev_nonnote_insn (top_label);
rtx_insn *lp_simple = NULL;
rtx_insn *next = NULL;
rtx op0 = XEXP (XVECEXP (PATTERN (insn), 0, 1), 0);
int seen_label = 0;
compute_bb_for_insn ();
for (lp = prev;
(lp && NONJUMP_INSN_P (lp)
&& recog_memoized (lp) != CODE_FOR_doloop_begin_i);
lp = prev_nonnote_insn (lp))
;
if (!lp || !NONJUMP_INSN_P (lp)
|| dead_or_set_regno_p (lp, LP_COUNT))
{
HOST_WIDE_INT loop_end_id
= INTVAL (XEXP (XVECEXP (PATTERN (insn), 0, 4), 0));
df_analyze ();
for (prev = next = insn, lp = NULL ; prev || next;)
{
if (prev)
{
if (NONJUMP_INSN_P (prev)
&& recog_memoized (prev) == CODE_FOR_doloop_begin_i
&& (INTVAL (XEXP (XVECEXP (PATTERN (prev), 0, 5), 0))
== loop_end_id))
{
lp = prev;
break;
}
else if (LABEL_P (prev))
seen_label = 1;
prev = prev_nonnote_insn (prev);
}
if (next)
{
if (NONJUMP_INSN_P (next)
&& recog_memoized (next) == CODE_FOR_doloop_begin_i
&& (INTVAL (XEXP (XVECEXP (PATTERN (next), 0, 5), 0))
== loop_end_id))
{
lp = next;
break;
}
next = next_nonnote_insn (next);
}
}
prev = NULL;
}
else
lp_simple = lp;
if (lp && !dead_or_set_regno_p (lp, LP_COUNT))
{
rtx begin_cnt = XEXP (XVECEXP (PATTERN (lp), 0 ,3), 0);
if (INTVAL (XEXP (XVECEXP (PATTERN (lp), 0, 4), 0)))
/* The loop end insn has been duplicated. That can happen
when there is a conditional block at the very end of
the loop. */
goto failure;
/* If Register allocation failed to allocate to the right
register, There is no point into teaching reload to
fix this up with reloads, as that would cost more
than using an ordinary core register with the
doloop_fallback pattern. */
if ((true_regnum (op0) != LP_COUNT || !REG_P (begin_cnt))
/* Likewise, if the loop setup is evidently inside the loop,
we loose. */
|| (!lp_simple && lp != next && !seen_label))
{
remove_insn (lp);
goto failure;
}
/* It is common that the optimizers copy the loop count from
another register, and doloop_begin_i is stuck with the
source of the move. Making doloop_begin_i only accept "l"
is nonsentical, as this then makes reload evict the pseudo
used for the loop end. The underlying cause is that the
optimizers don't understand that the register allocation for
doloop_begin_i should be treated as part of the loop.
Try to work around this problem by verifying the previous
move exists. */
if (true_regnum (begin_cnt) != LP_COUNT)
{
rtx_insn *mov;
rtx set, note;
/* Doloop optimization. */
arc_reorg_loops ();
for (mov = prev_nonnote_insn (lp); mov;
mov = prev_nonnote_insn (mov))
{
if (!NONJUMP_INSN_P (mov))
mov = 0;
else if ((set = single_set (mov))
&& rtx_equal_p (SET_SRC (set), begin_cnt)
&& rtx_equal_p (SET_DEST (set), op0))
break;
}
if (mov)
{
XEXP (XVECEXP (PATTERN (lp), 0 ,3), 0) = op0;
note = find_regno_note (lp, REG_DEAD, REGNO (begin_cnt));
if (note)
remove_note (lp, note);
}
else
{
remove_insn (lp);
goto failure;
}
}
XEXP (XVECEXP (PATTERN (insn), 0, 4), 0) = num;
XEXP (XVECEXP (PATTERN (lp), 0, 4), 0) = num;
if (next == lp)
XEXP (XVECEXP (PATTERN (lp), 0, 6), 0) = const2_rtx;
else if (!lp_simple)
XEXP (XVECEXP (PATTERN (lp), 0, 6), 0) = const1_rtx;
else if (prev != lp)
{
remove_insn (lp);
add_insn_after (lp, prev, NULL);
}
if (!lp_simple)
{
XEXP (XVECEXP (PATTERN (lp), 0, 7), 0)
= gen_rtx_LABEL_REF (Pmode, top_label);
add_reg_note (lp, REG_LABEL_OPERAND, top_label);
LABEL_NUSES (top_label)++;
}
/* We can avoid tedious loop start / end setting for empty loops
be merely setting the loop count to its final value. */
if (next_active_insn (top_label) == insn)
{
rtx lc_set
= gen_rtx_SET (XEXP (XVECEXP (PATTERN (lp), 0, 3), 0),
const0_rtx);
rtx_insn *lc_set_insn = emit_insn_before (lc_set, insn);
delete_insn (lp);
delete_insn (insn);
insn = lc_set_insn;
}
/* If the loop is non-empty with zero length, we can't make it
a zero-overhead loop. That can happen for empty asms. */
else
{
rtx_insn *scan;
for (scan = top_label;
(scan && scan != insn
&& (!NONJUMP_INSN_P (scan) || !get_attr_length (scan)));
scan = NEXT_INSN (scan));
if (scan == insn)
{
remove_insn (lp);
goto failure;
}
}
}
else
{
/* Sometimes the loop optimizer makes a complete hash of the
loop. If it were only that the loop is not entered at the
top, we could fix this up by setting LP_START with SR .
However, if we can't find the loop begin were it should be,
chances are that it does not even dominate the loop, but is
inside the loop instead. Using SR there would kill
performance.
We use the doloop_fallback pattern here, which executes
in two cycles on the ARC700 when predicted correctly. */
failure:
if (!REG_P (op0))
{
rtx op3 = XEXP (XVECEXP (PATTERN (insn), 0, 5), 0);
emit_insn_before (gen_move_insn (op3, op0), insn);
PATTERN (insn)
= gen_doloop_fallback_m (op3, JUMP_LABEL (insn), op0);
}
else
XVEC (PATTERN (insn), 0)
= gen_rtvec (2, XVECEXP (PATTERN (insn), 0, 0),
XVECEXP (PATTERN (insn), 0, 1));
INSN_CODE (insn) = -1;
}
}
}
workaround_arc_anomaly ();
/* FIXME: should anticipate ccfsm action, generate special patterns for
to-be-deleted branches that have no delay slot and have at least the
@ -7774,11 +7938,11 @@ arc_register_move_cost (machine_mode,
return 6;
}
/* The ARC700 stalls for 3 cycles when *reading* from lp_count. */
if (TARGET_ARC700
&& (from_class == LPCOUNT_REG || from_class == ALL_CORE_REGS
|| from_class == WRITABLE_CORE_REGS))
return 8;
/* Using lp_count as scratch reg is a VERY bad idea. */
if (from_class == LPCOUNT_REG)
return 1000;
if (to_class == LPCOUNT_REG)
return 6;
/* Force an attempt to 'mov Dy,Dx' to spill. */
if ((TARGET_ARC700 || TARGET_EM) && TARGET_DPFP
@ -8220,14 +8384,6 @@ arc600_corereg_hazard (rtx_insn *pred, rtx_insn *succ)
{
if (!TARGET_ARC600)
return 0;
/* If SUCC is a doloop_end_i with a preceding label, we must output a nop
in front of SUCC anyway, so there will be separation between PRED and
SUCC. */
if (recog_memoized (succ) == CODE_FOR_doloop_end_i
&& LABEL_P (prev_nonnote_insn (succ)))
return 0;
if (recog_memoized (succ) == CODE_FOR_doloop_begin_i)
return 0;
if (GET_CODE (PATTERN (pred)) == SEQUENCE)
pred = as_a <rtx_sequence *> (PATTERN (pred))->insn (1);
if (GET_CODE (PATTERN (succ)) == SEQUENCE)
@ -8301,76 +8457,6 @@ arc_asm_insn_p (rtx x)
return 0;
}
/* We might have a CALL to a non-returning function before a loop end.
??? Although the manual says that's OK (the target is outside the
loop, and the loop counter unused there), the assembler barfs on
this for ARC600, so we must insert a nop before such a call too.
For ARC700, and ARCv2 is not allowed to have the last ZOL
instruction a jump to a location where lp_count is modified. */
static bool
arc_loop_hazard (rtx_insn *pred, rtx_insn *succ)
{
rtx_insn *jump = NULL;
rtx label_rtx = NULL_RTX;
rtx_insn *label = NULL;
basic_block succ_bb;
if (recog_memoized (succ) != CODE_FOR_doloop_end_i)
return false;
/* Phase 1: ARC600 and ARCv2HS doesn't allow any control instruction
(i.e., jump/call) as the last instruction of a ZOL. */
if (TARGET_ARC600 || TARGET_HS)
if (JUMP_P (pred) || CALL_P (pred)
|| arc_asm_insn_p (PATTERN (pred))
|| GET_CODE (PATTERN (pred)) == SEQUENCE)
return true;
/* Phase 2: Any architecture, it is not allowed to have the last ZOL
instruction a jump to a location where lp_count is modified. */
/* Phase 2a: Dig for the jump instruction. */
if (JUMP_P (pred))
jump = pred;
else if (GET_CODE (PATTERN (pred)) == SEQUENCE
&& JUMP_P (XVECEXP (PATTERN (pred), 0, 0)))
jump = as_a <rtx_insn *> (XVECEXP (PATTERN (pred), 0, 0));
else
return false;
/* Phase 2b: Make sure is not a millicode jump. */
if ((GET_CODE (PATTERN (jump)) == PARALLEL)
&& (XVECEXP (PATTERN (jump), 0, 0) == ret_rtx))
return false;
label_rtx = JUMP_LABEL (jump);
if (!label_rtx)
return false;
/* Phase 2c: Make sure is not a return. */
if (ANY_RETURN_P (label_rtx))
return false;
/* Pahse 2d: Go to the target of the jump and check for aliveness of
LP_COUNT register. */
label = safe_as_a <rtx_insn *> (label_rtx);
succ_bb = BLOCK_FOR_INSN (label);
if (!succ_bb)
{
gcc_assert (NEXT_INSN (label));
if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (label)))
succ_bb = NOTE_BASIC_BLOCK (NEXT_INSN (label));
else
succ_bb = BLOCK_FOR_INSN (NEXT_INSN (label));
}
if (succ_bb && REGNO_REG_SET_P (df_get_live_out (succ_bb), LP_COUNT))
return true;
return false;
}
/* For ARC600:
A write to a core reg greater or equal to 32 must not be immediately
followed by a use. Anticipate the length requirement to insert a nop
@ -8382,9 +8468,6 @@ arc_hazard (rtx_insn *pred, rtx_insn *succ)
if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ))
return 0;
if (arc_loop_hazard (pred, succ))
return 4;
if (TARGET_ARC600)
return arc600_corereg_hazard (pred, succ);
@ -8402,24 +8485,6 @@ arc_adjust_insn_length (rtx_insn *insn, int len, bool)
if (GET_CODE (PATTERN (insn)) == SEQUENCE)
return len;
/* It is impossible to jump to the very end of a Zero-Overhead Loop, as
the ZOL mechanism only triggers when advancing to the end address,
so if there's a label at the end of a ZOL, we need to insert a nop.
The ARC600 ZOL also has extra restrictions on jumps at the end of a
loop. */
if (recog_memoized (insn) == CODE_FOR_doloop_end_i)
{
rtx_insn *prev = prev_nonnote_insn (insn);
return ((LABEL_P (prev)
|| (TARGET_ARC600
&& (JUMP_P (prev)
|| CALL_P (prev) /* Could be a noreturn call. */
|| (NONJUMP_INSN_P (prev)
&& GET_CODE (PATTERN (prev)) == SEQUENCE))))
? len + 4 : len);
}
/* Check for return with but one preceding insn since function
start / call. */
if (TARGET_PAD_RETURN
@ -9755,27 +9820,9 @@ arc_scheduling_not_expected (void)
return cfun->machine->arc_reorg_started;
}
/* Oddly enough, sometimes we get a zero overhead loop that branch
shortening doesn't think is a loop - observed with compile/pr24883.c
-O3 -fomit-frame-pointer -funroll-loops. Make sure to include the
alignment visible for branch shortening (we actually align the loop
insn before it, but that is equivalent since the loop insn is 4 byte
long.) */
int
arc_label_align (rtx_insn *label)
{
int loop_align = LOOP_ALIGN (LABEL);
if (loop_align > align_labels_log)
{
rtx_insn *prev = prev_nonnote_insn (label);
if (prev && NONJUMP_INSN_P (prev)
&& GET_CODE (PATTERN (prev)) == PARALLEL
&& recog_memoized (prev) == CODE_FOR_doloop_begin_i)
return loop_align;
}
/* Code has a minimum p2 alignment of 1, which we must restore after an
ADDR_DIFF_VEC. */
if (align_labels_log < 1)

View file

@ -581,15 +581,15 @@ enum reg_class
{0x0000f00f, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'q', r0-r3, r12-r15 */ \
{0x1000f00f, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'e', r0-r3, r12-r15, sp */ \
{0x1c001fff, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* "Rsc", r0-r12 */ \
{0x9fffffff, 0xc0000000, 0x00000000, 0x00000000, 0x00000000}, /* 'r', r0-r28, blink, ap and pcl */ \
{0x9fffffff, 0x80000000, 0x00000000, 0x00000000, 0x00000000}, /* 'r', r0-r28, blink, ap and pcl */ \
{0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'W', r0-r31 */ \
/* Include ap / pcl in WRITABLE_CORE_REGS for sake of symmetry. As these \
registers are fixed, it does not affect the literal meaning of the \
constraints, but it makes it a superset of GENERAL_REGS, thus \
enabling some operations that would otherwise not be possible. */ \
{0xffffffff, 0xd0000000, 0x00000000, 0x00000000, 0x00000000}, /* 'w', r0-r31, r60 */ \
{0xffffffff, 0xdfffffff, 0x00000000, 0x00000000, 0x00000000}, /* 'c', r0-r60, ap, pcl */ \
{0xffffffff, 0xdfffffff, 0x00000000, 0x00000000, 0x00000000}, /* 'Rac', r0-r60, ap, pcl */ \
{0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'w', r0-r31, r60 */ \
{0xffffffff, 0x9fffffff, 0x00000000, 0x00000000, 0x00000000}, /* 'c', r0-r60, ap, pcl */ \
{0xffffffff, 0x9fffffff, 0x00000000, 0x00000000, 0x00000000}, /* 'Rac', r0-r60, ap, pcl */ \
{0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'Rcd', r0-r3 */ \
{0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'Rsd', r0-r1 */ \
{0x9fffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'h', r0-28, r30 */ \
@ -1351,7 +1351,7 @@ do { \
of a loop. */
/* On the ARC, align loops to 4 byte boundaries unless doing all-out size
optimization. */
#define LOOP_ALIGN JUMP_ALIGN
#define LOOP_ALIGN(X) 0
#define LABEL_ALIGN(LABEL) (arc_label_align (LABEL))

View file

@ -554,6 +554,11 @@
(eq_attr "annul_ret_delay_insn" "yes")
(eq_attr "cond_ret_delay_insn" "yes")])
(define_delay (eq_attr "type" "loop_end")
[(eq_attr "in_delay_slot" "true")
(eq_attr "in_delay_slot" "true")
(nil)])
;; For ARC600, unexposing the delay sloy incurs a penalty also in the
;; non-taken case, so the only meaningful way to have an annull-true
;; filled delay slot is to conditionalize the delay slot insn.
@ -618,8 +623,8 @@
; The iscompact attribute allows the epilogue expander to know for which
; insns it should lengthen the return insn.
(define_insn "*movqi_insn"
[(set (match_operand:QI 0 "move_dest_operand" "=Rcq,Rcq#q, w,Rcq#q, h, w,w,???w,h, w,Rcq, S,!*x, r,r, Ucm,m,???m, m,Usc")
(match_operand:QI 1 "move_src_operand" " cL, cP,Rcq#q, P,hCm1,cL,I,?Rac,i,?i, T,Rcq,Usd,Ucm,m,?Rac,c,?Rac,Cm3,i"))]
[(set (match_operand:QI 0 "move_dest_operand" "=Rcq,Rcq#q, w,Rcq#q, h,w*l,w*l,???w,h,w*l,Rcq, S,!*x, r,r, Ucm,m,???m, m,Usc")
(match_operand:QI 1 "move_src_operand" " cL, cP,Rcq#q, P,hCm1, cL, I,?Rac,i, ?i, T,Rcq,Usd,Ucm,m,?Rac,c,?Rac,Cm3,i"))]
"register_operand (operands[0], QImode)
|| register_operand (operands[1], QImode)"
"@
@ -655,8 +660,8 @@
"if (prepare_move_operands (operands, HImode)) DONE;")
(define_insn "*movhi_insn"
[(set (match_operand:HI 0 "move_dest_operand" "=Rcq,Rcq#q, w,Rcq#q, h, w,w,???w,Rcq#q,h, w,Rcq, S, r,r, Ucm,m,???m, m,VUsc")
(match_operand:HI 1 "move_src_operand" " cL, cP,Rcq#q, P,hCm1,cL,I,?Rac, i,i,?i, T,Rcq,Ucm,m,?Rac,c,?Rac,Cm3,i"))]
[(set (match_operand:HI 0 "move_dest_operand" "=Rcq,Rcq#q, w,Rcq#q, h,w*l,w*l,???w,Rcq#q,h,w*l,Rcq, S, r,r, Ucm,m,???m, m,VUsc")
(match_operand:HI 1 "move_src_operand" " cL, cP,Rcq#q, P,hCm1, cL, I,?Rac, i,i, ?i, T,Rcq,Ucm,m,?Rac,c,?Rac,Cm3,i"))]
"register_operand (operands[0], HImode)
|| register_operand (operands[1], HImode)
|| (CONSTANT_P (operands[1])
@ -706,9 +711,9 @@
; the iscompact attribute allows the epilogue expander to know for which
; insns it should lengthen the return insn.
; N.B. operand 1 of alternative 7 expands into pcl,symbol@gotpc .
(define_insn "*movsi_insn" ; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
[(set (match_operand:SI 0 "move_dest_operand" "=Rcq,Rcq#q, w,Rcq#q, h, w,w, w, w, w, w,???w, ?w, w,Rcq#q, h, w,Rcq, S, Us<,RcqRck,!*x, r,!*Rsd,!*Rcd,r,Ucm, Usd,m,???m, m,VUsc")
(match_operand:SI 1 "move_src_operand" " cL, cP,Rcq#q, P,hCm1,cL,I,Crr,Clo,Chi,Cbi,?Rac,Cpc,Clb, ?Cal,Cal,?Cal,Uts,Rcq,RcqRck, Us>,Usd,Ucm, Usd, Ucd,m, w,!*Rzd,c,?Rac,Cm3, C32"))]
(define_insn "*movsi_insn" ; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
[(set (match_operand:SI 0 "move_dest_operand" "=Rcq,Rcq#q, w,Rcq#q, h,w*l,w*l, w, w, w, w, ???w, ?w, w,Rcq#q, h, w*l,Rcq, S, Us<,RcqRck,!*x, r,!*Rsd,!*Rcd,r,Ucm, Usd,m,???m, m,VUsc")
(match_operand:SI 1 "move_src_operand" " cL, cP,Rcq#q, P,hCm1, cL, I,Crr,Clo,Chi,Cbi,?Rac*l,Cpc,Clb, ?Cal,Cal,?Cal,Uts,Rcq,RcqRck, Us>,Usd,Ucm, Usd, Ucd,m, w,!*Rzd,c,?Rac,Cm3, C32"))]
"register_operand (operands[0], SImode)
|| register_operand (operands[1], SImode)
|| (CONSTANT_P (operands[1])
@ -5106,317 +5111,123 @@
xtr, const0_rtx);
})
; operand 0 is the loop count pseudo register
; operand 1 is the loop end pattern
(define_expand "doloop_begin"
[(use (match_operand 0 "register_operand" ""))
(use (match_operand 1 "" ""))]
""
{
/* Using the INSN_UID of the loop end pattern to identify it causes
trouble with -fcompare-debug, so allocate a debug-independent
id instead. We use negative numbers so that we can use the same
slot in doloop_end_i where we later store a CODE_LABEL_NUMBER, and
still be able to tell what kind of number this is. */
static HOST_WIDE_INT loop_end_id = 0;
rtx id = GEN_INT (--loop_end_id);
XEXP (XVECEXP (PATTERN (operands[1]), 0, 4), 0) = id;
emit_insn (gen_doloop_begin_i (operands[0], const0_rtx, id,
const0_rtx, const0_rtx));
DONE;
})
; ??? can't describe the insn properly as then the optimizers try to
; hoist the SETs.
;(define_insn "doloop_begin_i"
; [(set (reg:SI LP_START) (pc))
; (set (reg:SI LP_END) (unspec:SI [(pc)] UNSPEC_ARC_LP))
; (use (match_operand 0 "const_int_operand" "n"))]
; ""
; "lp .L__GCC__LP%0"
;)
; The operands of doloop_end_i are also read / written by arc_reorg with
; XVECEXP (PATTERN (lp, 0, N), so if you want to change the pattern, you
; might have to adjust arc_reorg.
; operands 0 / 2 are supplied by the expander, 1, 3 and 4 are filled in
; by arc_reorg. arc_reorg might also alter operand 0.
;
; N in XVECEXP PATTERN (lp, 0 N)
; V rtl purpose
; 0 unspec UNSPEC_ARC_LP identify pattern
; 1 clobber LP_START show LP_START is set
; 2 clobber LP_END show LP_END is set
; 3 use operand0 loop count pseudo register
; 4 use operand1 before arc_reorg: -id
; after : CODE_LABEL_NUMBER of loop top label
; 5 use operand2 INSN_UID of loop end insn
; 6 use operand3 loop setup not at start (1 above, 2 below)
; 7 use operand4 LABEL_REF of top label, if not
; immediately following
; If operand1 is still zero after arc_reorg, this is an orphaned loop
; instruction that was not at the start of the loop.
; There is no point is reloading this insn - then lp_count would still not
; be available for the loop end.
(define_insn "doloop_begin_i"
[(unspec:SI [(pc)] UNSPEC_ARC_LP)
(clobber (reg:SI LP_START))
(clobber (reg:SI LP_END))
(use (match_operand:SI 0 "register_operand" "l,l,????*X"))
(use (match_operand 1 "const_int_operand" "n,n,C_0"))
(use (match_operand 2 "const_int_operand" "n,n,X"))
(use (match_operand 3 "const_int_operand" "C_0,n,X"))
(use (match_operand 4 "const_int_operand" "C_0,X,X"))]
""
{
rtx_insn *scan;
int len, size = 0;
int n_insns = 0;
rtx loop_start = operands[4];
if (CONST_INT_P (loop_start))
loop_start = NULL_RTX;
/* Size implications of the alignment will be taken care of by the
alignment inserted at the loop start. */
if (LOOP_ALIGN (0) && INTVAL (operands[1]))
{
asm_fprintf (asm_out_file, "\t.p2align %d\\n", LOOP_ALIGN (0));
arc_clear_unalign ();
}
if (!INTVAL (operands[1]))
return "; LITTLE LOST LOOP";
if (loop_start && flag_pic)
{
/* ??? Can do better for when a scratch register
is known. But that would require extra testing. */
return "push_s r0\;add r0,pcl,%4@pcl\;sr r0,[2]; LP_START\;add r0,pcl,.L__GCC__LP%1@pcl\;sr r0,[3]; LP_END\;pop_s r0";
}
/* Check if the loop end is in range to be set by the lp instruction. */
size = INTVAL (operands[3]) < 2 ? 0 : 2048;
for (scan = insn; scan && size < 2048; scan = NEXT_INSN (scan))
{
if (!INSN_P (scan))
continue;
if (recog_memoized (scan) == CODE_FOR_doloop_end_i
&& (XEXP (XVECEXP (PATTERN (scan), 0, 4), 0)
== XEXP (XVECEXP (PATTERN (insn), 0, 4), 0)))
break;
len = get_attr_length (scan);
size += len;
}
/* Try to verify that there are at least three instruction fetches
between the loop setup and the first encounter of the loop end. */
for (scan = NEXT_INSN (insn); scan && n_insns < 3; scan = NEXT_INSN (scan))
{
if (!INSN_P (scan))
continue;
if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (scan)))
scan = seq->insn (0);
if (JUMP_P (scan))
{
if (recog_memoized (scan) != CODE_FOR_doloop_end_i)
{
n_insns += 2;
if (simplejump_p (scan))
{
scan = as_a <rtx_insn *> (XEXP (SET_SRC (PATTERN (scan)), 0));
continue;
}
rtx lab = JUMP_LABEL (scan);
if (!lab)
break;
rtx_insn *next_scan
= next_active_insn (NEXT_INSN (PREV_INSN (scan)));
if (next_scan
&& recog_memoized (next_scan) != CODE_FOR_doloop_begin_i)
break;
/* JUMP_LABEL might be simple_return instead if an insn. */
if (!INSN_P (lab))
{
n_insns++;
break;
}
rtx_insn *next_lab = next_active_insn (as_a<rtx_insn *> (lab));
if (next_lab
&& recog_memoized (next_lab) != CODE_FOR_doloop_begin_i)
break;
n_insns++;
}
break;
}
len = get_attr_length (scan);
/* Size estimation of asms assumes that each line which is nonempty
codes an insn, and that each has a long immediate. For minimum insn
count, assume merely that a nonempty asm has at least one insn. */
if (GET_CODE (PATTERN (scan)) == ASM_INPUT
|| asm_noperands (PATTERN (scan)) >= 0)
n_insns += (len != 0);
else
n_insns += (len > 4 ? 2 : (len ? 1 : 0));
}
if (LOOP_ALIGN (0))
{
asm_fprintf (asm_out_file, "\t.p2align %d\\n", LOOP_ALIGN (0));
arc_clear_unalign ();
}
gcc_assert (n_insns || GET_CODE (next_nonnote_insn (insn)) == CODE_LABEL);
if (size >= 2048 || (TARGET_ARC600 && n_insns == 1) || loop_start)
{
if (flag_pic)
{
/* ??? Can do better for when a scratch register
is known. But that would require extra testing. */
arc_clear_unalign ();
return ".p2align 2\;push_s r0\;add r0,pcl,24\;sr r0,[2]; LP_START\;add r0,pcl,.L__GCC__LP%1@pcl\;sr r0,[3]; LP_END\;pop_s r0";
}
output_asm_insn ((size < 2048
? "lp .L__GCC__LP%1" : "sr .L__GCC__LP%1,[3]; LP_END"),
operands);
output_asm_insn (loop_start
? "sr %4,[2]; LP_START" : "sr 0f,[2]; LP_START",
operands);
if (TARGET_ARC600 && n_insns < 1)
output_asm_insn ("nop", operands);
return (TARGET_ARC600 && n_insns < 3) ? "nop_s\;nop_s\;0:" : "0:";
}
else if (TARGET_ARC600 && n_insns < 3)
{
/* At least four instructions are needed between the setting of LP_COUNT
and the loop end - but the lp instruction qualifies as one. */
rtx_insn *prev = prev_nonnote_insn (insn);
if (!INSN_P (prev) || dead_or_set_regno_p (prev, LP_COUNT))
output_asm_insn ("nop", operands);
}
return "lp .L__GCC__LP%1";
}
[(set_attr "type" "loop_setup")
(set_attr_alternative "length"
; FIXME: length is usually 4, but we need branch shortening
; to get this right.
; [(if_then_else (match_test "TARGET_ARC600") (const_int 16) (const_int 4))
[(if_then_else (match_test "flag_pic") (const_int 24) (const_int 16))
(if_then_else (match_test "flag_pic") (const_int 28) (const_int 16))
(const_int 0)])]
;; ??? we should really branch shorten this insn, but then we'd
;; need a proper label first. N.B. the end label can not only go out
;; of range when it is far away, but also when it precedes the loop -
;; which, unfortunately, it sometimes does, when the loop "optimizer"
;; messes things up.
)
;; -------------------------------------------------------------------
;; Hardware loop
;; -------------------------------------------------------------------
; operand 0 is the loop count pseudo register
; operand 1 is the label to jump to at the top of the loop
; Use this for the ARC600 and ARC700.
; ??? ARC600 might want to check if the loop has few iteration and only a
; single insn - loop setup is expensive then.
(define_expand "doloop_end"
[(use (match_operand 0 "register_operand" ""))
(use (label_ref (match_operand 1 "" "")))]
"!TARGET_ARC601"
[(parallel [(set (pc)
(if_then_else
(ne (match_operand 0 "" "")
(const_int 1))
(label_ref (match_operand 1 "" ""))
(pc)))
(set (match_dup 0) (plus (match_dup 0) (const_int -1)))
(unspec [(const_int 0)] UNSPEC_ARC_LP)
(clobber (match_dup 2))])]
""
{
/* We could do smaller bivs with biv widening, and wider bivs by having
a high-word counter in an outer loop - but punt on this for now. */
if (GET_MODE (operands[0]) != SImode)
FAIL;
emit_jump_insn (gen_doloop_end_i (operands[0], operands[1], const0_rtx));
DONE;
if (GET_MODE (operands[0]) != SImode)
FAIL;
operands[2] = gen_rtx_SCRATCH (SImode);
})
(define_insn_and_split "doloop_end_i"
(define_insn "arc_lp"
[(unspec:SI [(match_operand:SI 0 "register_operand" "l")]
UNSPEC_ARC_LP)
(use (label_ref (match_operand 1 "" "")))
(use (label_ref (match_operand 2 "" "")))]
""
"lp\\t@%l2\\t; %0:@%l1->@%l2"
[(set_attr "type" "loop_setup")
(set_attr "length" "4")])
;; if by any chance the lp_count is not used, then use an 'r'
;; register, instead of going to memory.
(define_insn "loop_end"
[(set (pc)
(if_then_else (ne (match_operand:SI 0 "shouldbe_register_operand" "+l,*c,*m")
(const_int 1))
(if_then_else (ne (match_operand:SI 2 "nonimmediate_operand" "0,0")
(const_int 1))
(label_ref (match_operand 1 "" ""))
(pc)))
(set (match_dup 0) (plus:SI (match_dup 0) (const_int -1)))
(use (reg:SI LP_START))
(use (reg:SI LP_END))
(use (match_operand 2 "const_int_operand" "n,???Cn0,???X"))
(clobber (match_scratch:SI 3 "=X,X,&????r"))]
(set (match_operand:SI 0 "nonimmediate_operand" "=l!r,m")
(plus (match_dup 2) (const_int -1)))
(unspec [(const_int 0)] UNSPEC_ARC_LP)
(clobber (match_scratch:SI 3 "=X,&r"))]
""
"*
{
rtx_insn *prev = prev_nonnote_insn (insn);
"\\t;%0 %1 %2"
[(set_attr "length" "0")
(set_attr "predicable" "no")
(set_attr "type" "loop_end")])
/* If there is an immediately preceding label, we must output a nop,
lest a branch to that label will fall out of the loop.
??? We could try to avoid this by claiming to have a delay slot if there
is a preceding label, and outputting the delay slot insn instead, if
present.
Or we could have some optimization that changes the source edge to update
the loop count and jump to the loop start instead. */
/* For ARC600, we must also prevent jumps inside the loop and jumps where
the loop counter value is live at the target from being directly at the
loop end. Being sure that the loop counter is dead at the target is
too much hair - we can't rely on data flow information at this point -
so insert a nop for all branches.
The ARC600 also can't read the loop counter in the last insn of a loop. */
if (LABEL_P (prev))
output_asm_insn (\"nop%?\", operands);
return \"\\n.L__GCC__LP%2: ; loop end, start is %1\";
}"
"&& memory_operand (operands[0], SImode)"
[(pc)]
{
emit_move_insn (operands[3], operands[0]);
emit_jump_insn (gen_doloop_fallback_m (operands[3], operands[1], operands[0]));
DONE;
}
[(set_attr "type" "loop_end")
(set (attr "length")
(if_then_else (match_test "LABEL_P (prev_nonnote_insn (insn))")
(const_int 4) (const_int 0)))]
)
;; split pattern for the very slim chance when the loop register is
;; memory.
(define_split
[(set (pc)
(if_then_else (ne (match_operand:SI 0 "memory_operand")
(const_int 1))
(label_ref (match_operand 1 ""))
(pc)))
(set (match_dup 0) (plus (match_dup 0) (const_int -1)))
(unspec [(const_int 0)] UNSPEC_ARC_LP)
(clobber (match_scratch:SI 2))]
"memory_operand (operands[0], SImode)"
[(set (match_dup 2) (match_dup 0))
(set (match_dup 2) (plus:SI (match_dup 2) (const_int -1)))
(set (match_dup 0) (match_dup 2))
(set (reg:CC CC_REG) (compare:CC (match_dup 2) (const_int 0)))
(set (pc)
(if_then_else (ne (reg:CC CC_REG)
(const_int 0))
(label_ref (match_dup 1))
(pc)))]
"")
; This pattern is generated by arc_reorg when there is no recognizable
; loop start.
(define_insn "*doloop_fallback"
[(set (pc) (if_then_else (ne (match_operand:SI 0 "register_operand" "+r,!w")
(const_int 1))
(label_ref (match_operand 1 "" ""))
(pc)))
(set (match_dup 0) (plus:SI (match_dup 0) (const_int -1)))]
; avoid fooling the loop optimizer into assuming this is a special insn.
"reload_completed"
"*return get_attr_length (insn) == 8
? \"brne.d %0,1,%1\;sub %0,%0,1\"
: \"breq %0,1,0f\;b.d %1\;sub %0,%0,1\\n0:\";"
[(set (attr "length")
(if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -256))
(le (minus (match_dup 1) (pc)) (const_int 244)))
(const_int 8) (const_int 12)))
(set_attr "type" "brcc_no_delay_slot")
(set_attr "cond" "nocond")]
)
(define_insn "loop_fail"
[(set (reg:SI LP_COUNT)
(plus:SI (reg:SI LP_COUNT) (const_int -1)))
(set (reg:CC_ZN CC_REG)
(compare:CC_ZN (plus:SI (reg:SI LP_COUNT) (const_int -1))
(const_int 0)))]
""
"sub.f%?\\tlp_count,lp_count,1"
[(set_attr "iscompact" "false")
(set_attr "type" "compare")
(set_attr "cond" "set_zn")
(set_attr "length" "4")
(set_attr "predicable" "yes")])
; reload can't make output reloads for jump insns, so we have to do this by hand.
(define_insn "doloop_fallback_m"
[(set (pc) (if_then_else (ne (match_operand:SI 0 "register_operand" "+&r")
(const_int 1))
(label_ref (match_operand 1 "" ""))
(pc)))
(set (match_dup 0) (plus:SI (match_dup 0) (const_int -1)))
(set (match_operand:SI 2 "memory_operand" "=m")
(plus:SI (match_dup 0) (const_int -1)))]
; avoid fooling the loop optimizer into assuming this is a special insn.
"reload_completed"
"*return get_attr_length (insn) == 12
? \"sub %0,%0,1\;brne.d %0,0,%1\;st%U2%V2 %0,%2\"
: \"sub %0,%0,1\;breq %0,0,0f\;b.d %1\\n0:\tst%U2%V2 %0,%2\";"
[(set (attr "length")
(if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -252))
(le (minus (match_dup 1) (pc)) (const_int 244)))
(const_int 12) (const_int 16)))
(set_attr "type" "brcc_no_delay_slot")
(set_attr "cond" "nocond")]
)
(define_insn_and_split "dbnz"
[(set (pc)
(if_then_else
(ne (plus:SI (match_operand:SI 0 "nonimmediate_operand" "+r!l,m")
(const_int -1))
(const_int 0))
(label_ref (match_operand 1 "" ""))
(pc)))
(set (match_dup 0)
(plus:SI (match_dup 0)
(const_int -1)))
(clobber (match_scratch:SI 2 "=X,r"))]
"TARGET_V2"
"@
dbnz%#\\t%0,%l1
#"
"TARGET_V2 && reload_completed && memory_operand (operands[0], SImode)"
[(set (match_dup 2) (match_dup 0))
(set (match_dup 2) (plus:SI (match_dup 2) (const_int -1)))
(set (reg:CC CC_REG) (compare:CC (match_dup 2) (const_int 0)))
(set (match_dup 0) (match_dup 2))
(set (pc) (if_then_else (ge (reg:CC CC_REG)
(const_int 0))
(label_ref (match_dup 1))
(pc)))]
""
[(set_attr "iscompact" "false")
(set_attr "type" "loop_end")
(set_attr "length" "4,20")])
(define_expand "movmemsi"
[(match_operand:BLK 0 "" "")

View file

@ -494,3 +494,28 @@ Specifies the registers that the processor saves on an interrupt entry and exit.
mrgf-banked-regs=
Target RejectNegative Joined Var(arc_deferred_options) Defer
Specifies the number of registers replicated in second register bank on entry to fast interrupt.
mlpc-width=
Target RejectNegative Joined Enum(arc_lpc) Var(arc_lpcwidth) Init(32)
Sets LP_COUNT register width. Possible values are 8, 16, 20, 24, 28, and 32.
Enum
Name(arc_lpc) Type(int)
EnumValue
Enum(arc_lpc) String(8) Value(8)
EnumValue
Enum(arc_lpc) String(16) Value(16)
EnumValue
Enum(arc_lpc) String(20) Value(20)
EnumValue
Enum(arc_lpc) String(24) Value(24)
EnumValue
Enum(arc_lpc) String(28) Value(28)
EnumValue
Enum(arc_lpc) String(32) Value(32)

View file

@ -362,6 +362,8 @@
else if (TARGET_MUL64_SET
&& (REGNO (op) == 57 || REGNO(op) == 58 || REGNO(op) == 59 ))
return 0;
else if (REGNO (op) == LP_COUNT)
return 1;
else
return dest_reg_operand (op, mode);
case SUBREG :

View file

@ -614,7 +614,7 @@ Objective-C and Objective-C++ Dialects}.
-mcrc -mdsp-packa -mdvbf -mlock -mmac-d16 -mmac-24 -mrtsc -mswape @gol
-mtelephony -mxy -misize -mannotate-align -marclinux -marclinux_prof @gol
-mlong-calls -mmedium-calls -msdata -mirq-ctrl-saved @gol
-mrgf-banked-regs -G @var{num} @gol
-mrgf-banked-regs -mlpc-width=@var{width} -G @var{num} @gol
-mvolatile-cache -mtp-regno=@var{regno} @gol
-malign-call -mauto-modify-reg -mbbit-peephole -mno-brcc @gol
-mcase-vector-pcrel -mcompact-casesi -mno-cond-exec -mearly-cbranchsi @gol
@ -14779,6 +14779,18 @@ registers to avoid memory transactions during interrupt entry and exit
sequences. Use this option when you are using fast interrupts in an
ARC V2 family processor. Permitted values are 4, 8, 16, and 32.
@item -mlpc-width=@var{width}
@opindex mlpc-width
Specify the width of the @code{lp_count} register. Valid values for
@var{width} are 8, 16, 20, 24, 28 and 32 bits. The default width is
fixed to 32 bits. If the width is less than 32, the compiler does not
attempt to transform loops in your program to use the zero-delay loop
mechanism unless it is known that the @code{lp_count} register can
hold the required loop-counter value. Depending on the width
specified, the compiler and run-time library might continue to use the
loop mechanism for various needs. This option defines macro
@code{__ARC_LPC_WIDTH__} with the value of @var{width}.
@end table
The following options are passed through to the assembler, and also

View file

@ -1,3 +1,7 @@
2017-09-01 Claudiu Zissulescu <claziss@synopsys.com>
* gcc.target/arc/loop-1.c: Deleted.
2017-09-01 Claudiu Zissulescu <claziss@synopsys.com>
* gcc.target/arc/arc.exp: Test also cpp files.

View file

@ -1,45 +0,0 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
/* This case would fail to make use of the zero-overhead loop
instruction at one time due to a bug. */
extern char a[];
struct some_t
{
struct
{
int aaa;
short bbb;
char ccc;
char ddd;
} ppp[8];
int www[1];
};
int b;
void
some_function ()
{
struct some_t *tmp = (struct some_t *) a;
while ((*tmp).ppp[b].ccc)
while(0);
for (; b; b++)
{
if (tmp->ppp[b].ccc)
{
int c = tmp->ppp[b].bbb;
int d = tmp->ppp[b].aaa;
int e = d - tmp->www[c];
if (e)
tmp->ppp[b].ddd = 1;
}
}
}
/* { dg-final { scan-assembler "\[^\n\]+lp \\.L__GCC__" } } */