re PR rtl-optimization/45352 (ICE: in reset_sched_cycles_in_current_ebb, at sel-sched.c:7058)
PR rtl-optimization/45352 * sel-sched.c (find_best_expr): Do not set pneed_stall when the variable_issue hook is not implemented. (fill_insns): Remove dead variable stall_iterations. (init_seqno_1): Force EBB start for resetting sched cycles on any successor blocks of the rescheduled region. (sel_sched_region_1): Use bitmap_bit_p instead of bitmap_clear_bit. (reset_sched_cycles_in_current_ebb): Add debug printing. New variable issued_insns. Advance state when we have issued issue_rate insns. gcc.dg/pr45352.c, gcc.dg/pr45352-1.c, gcc.dg/pr45352-2.c: New tests. gcc.target/i386/pr45352.c, gcc.target/i386/pr45352-1.c, gcc.target/i386/pr45352-2.c: New tests. From-SVN: r166429
This commit is contained in:
parent
b1d2d6b16e
commit
06f0c25f25
9 changed files with 251 additions and 11 deletions
|
@ -1,3 +1,16 @@
|
|||
2010-11-08 Andrey Belevantsev <abel@ispras.ru>
|
||||
|
||||
PR rtl-optimization/45352
|
||||
* sel-sched.c (find_best_expr): Do not set pneed_stall when
|
||||
the variable_issue hook is not implemented.
|
||||
(fill_insns): Remove dead variable stall_iterations.
|
||||
(init_seqno_1): Force EBB start for resetting sched cycles on any
|
||||
successor blocks of the rescheduled region.
|
||||
(sel_sched_region_1): Use bitmap_bit_p instead of bitmap_clear_bit.
|
||||
(reset_sched_cycles_in_current_ebb): Add debug printing.
|
||||
New variable issued_insns. Advance state when we have issued
|
||||
issue_rate insns.
|
||||
|
||||
2010-11-08 Basile Starynkevitch <basile@starynkevitch.net>
|
||||
|
||||
* gengtype (main): Get here's position using POS_HERE macro for
|
||||
|
|
|
@ -4403,7 +4403,8 @@ find_best_expr (av_set_t *av_vliw_ptr, blist_t bnds, fence_t fence,
|
|||
{
|
||||
can_issue_more = invoke_aftermath_hooks (fence, EXPR_INSN_RTX (best),
|
||||
can_issue_more);
|
||||
if (can_issue_more == 0)
|
||||
if (targetm.sched.variable_issue
|
||||
&& can_issue_more == 0)
|
||||
*pneed_stall = 1;
|
||||
}
|
||||
|
||||
|
@ -5511,7 +5512,7 @@ fill_insns (fence_t fence, int seqno, ilist_t **scheduled_insns_tailpp)
|
|||
blist_t *bnds_tailp1, *bndsp;
|
||||
expr_t expr_vliw;
|
||||
int need_stall;
|
||||
int was_stall = 0, scheduled_insns = 0, stall_iterations = 0;
|
||||
int was_stall = 0, scheduled_insns = 0;
|
||||
int max_insns = pipelining_p ? issue_rate : 2 * issue_rate;
|
||||
int max_stall = pipelining_p ? 1 : 3;
|
||||
bool last_insn_was_debug = false;
|
||||
|
@ -5530,16 +5531,15 @@ fill_insns (fence_t fence, int seqno, ilist_t **scheduled_insns_tailpp)
|
|||
do
|
||||
{
|
||||
expr_vliw = find_best_expr (&av_vliw, bnds, fence, &need_stall);
|
||||
if (!expr_vliw && need_stall)
|
||||
if (! expr_vliw && need_stall)
|
||||
{
|
||||
/* All expressions required a stall. Do not recompute av sets
|
||||
as we'll get the same answer (modulo the insns between
|
||||
the fence and its boundary, which will not be available for
|
||||
pipelining). */
|
||||
gcc_assert (! expr_vliw && stall_iterations < 2);
|
||||
was_stall++;
|
||||
/* If we are going to stall for too long, break to recompute av
|
||||
pipelining).
|
||||
If we are going to stall for too long, break to recompute av
|
||||
sets and bring more insns for pipelining. */
|
||||
was_stall++;
|
||||
if (need_stall <= 3)
|
||||
stall_for_cycles (fence, need_stall);
|
||||
else
|
||||
|
@ -6712,6 +6712,8 @@ init_seqno_1 (basic_block bb, sbitmap visited_bbs, bitmap blocks_to_reschedule)
|
|||
|
||||
init_seqno_1 (succ, visited_bbs, blocks_to_reschedule);
|
||||
}
|
||||
else if (blocks_to_reschedule)
|
||||
bitmap_set_bit (forced_ebb_heads, succ->index);
|
||||
}
|
||||
|
||||
for (insn = BB_END (bb); insn != note; insn = PREV_INSN (insn))
|
||||
|
@ -6966,6 +6968,7 @@ reset_sched_cycles_in_current_ebb (void)
|
|||
int last_clock = 0;
|
||||
int haifa_last_clock = -1;
|
||||
int haifa_clock = 0;
|
||||
int issued_insns = 0;
|
||||
insn_t insn;
|
||||
|
||||
if (targetm.sched.init)
|
||||
|
@ -7020,7 +7023,9 @@ reset_sched_cycles_in_current_ebb (void)
|
|||
haifa_cost = cost;
|
||||
after_stall = 1;
|
||||
}
|
||||
|
||||
if (haifa_cost == 0
|
||||
&& issued_insns == issue_rate)
|
||||
haifa_cost = 1;
|
||||
if (haifa_cost > 0)
|
||||
{
|
||||
int i = 0;
|
||||
|
@ -7028,6 +7033,7 @@ reset_sched_cycles_in_current_ebb (void)
|
|||
while (haifa_cost--)
|
||||
{
|
||||
advance_state (curr_state);
|
||||
issued_insns = 0;
|
||||
i++;
|
||||
|
||||
if (sched_verbose >= 2)
|
||||
|
@ -7047,6 +7053,8 @@ reset_sched_cycles_in_current_ebb (void)
|
|||
}
|
||||
|
||||
haifa_clock += i;
|
||||
if (sched_verbose >= 2)
|
||||
sel_print ("haifa clock: %d\n", haifa_clock);
|
||||
}
|
||||
else
|
||||
gcc_assert (haifa_cost == 0);
|
||||
|
@ -7060,21 +7068,27 @@ reset_sched_cycles_in_current_ebb (void)
|
|||
&sort_p))
|
||||
{
|
||||
advance_state (curr_state);
|
||||
issued_insns = 0;
|
||||
haifa_clock++;
|
||||
if (sched_verbose >= 2)
|
||||
{
|
||||
sel_print ("advance_state (dfa_new_cycle)\n");
|
||||
debug_state (curr_state);
|
||||
sel_print ("haifa clock: %d\n", haifa_clock + 1);
|
||||
}
|
||||
}
|
||||
|
||||
if (real_insn)
|
||||
{
|
||||
cost = state_transition (curr_state, insn);
|
||||
issued_insns++;
|
||||
|
||||
if (sched_verbose >= 2)
|
||||
debug_state (curr_state);
|
||||
|
||||
{
|
||||
sel_print ("scheduled insn %d, clock %d\n", INSN_UID (insn),
|
||||
haifa_clock + 1);
|
||||
debug_state (curr_state);
|
||||
}
|
||||
gcc_assert (cost < 0);
|
||||
}
|
||||
|
||||
|
@ -7518,7 +7532,7 @@ sel_sched_region_1 (void)
|
|||
continue;
|
||||
}
|
||||
|
||||
if (bitmap_clear_bit (blocks_to_reschedule, bb->index))
|
||||
if (bitmap_bit_p (blocks_to_reschedule, bb->index))
|
||||
{
|
||||
flist_tail_init (new_fences);
|
||||
|
||||
|
|
|
@ -1,3 +1,10 @@
|
|||
2010-11-08 Andrey Belevantsev <abel@ispras.ru>
|
||||
|
||||
PR rtl-optimization/45352
|
||||
gcc.dg/pr45352.c, gcc.dg/pr45352-1.c, gcc.dg/pr45352-2.c: New tests.
|
||||
gcc.target/i386/pr45352.c, gcc.target/i386/pr45352-1.c,
|
||||
gcc.target/i386/pr45352-2.c: New tests.
|
||||
|
||||
2010-11-06 Iain Sandoe <iains@gcc.gnu.org>
|
||||
|
||||
PR libobjc/36610
|
||||
|
|
13
gcc/testsuite/gcc.dg/pr45352-1.c
Normal file
13
gcc/testsuite/gcc.dg/pr45352-1.c
Normal file
|
@ -0,0 +1,13 @@
|
|||
/* { dg-do compile { target powerpc*-*-* ia64-*-* x86_64-*-* } } */
|
||||
/* { dg-options "-O3 -fschedule-insns -fschedule-insns2 -fselective-scheduling2 -fsel-sched-pipelining -funroll-loops -fprefetch-loop-arrays" } */
|
||||
|
||||
void main1 (float *pa, float *pc)
|
||||
{
|
||||
int i;
|
||||
float b[256];
|
||||
float c[256];
|
||||
for (i = 0; i < 256; i++)
|
||||
b[i] = c[i] = pc[i];
|
||||
for (i = 0; i < 256; i++)
|
||||
pa[i] = b[i] * c[i];
|
||||
}
|
17
gcc/testsuite/gcc.dg/pr45352-2.c
Normal file
17
gcc/testsuite/gcc.dg/pr45352-2.c
Normal file
|
@ -0,0 +1,17 @@
|
|||
/* { dg-do compile { target powerpc*-*-* ia64-*-* x86_64-*-* } } */
|
||||
/* { dg-options "-O1 -freorder-blocks -fschedule-insns2 -funswitch-loops -fselective-scheduling2 -fsel-sched-pipelining -funroll-all-loops" } */
|
||||
void
|
||||
foo1 (int *s)
|
||||
{
|
||||
s[0] = s[1];
|
||||
while (s[6] - s[8])
|
||||
{
|
||||
s[6] -= s[8];
|
||||
if (s[8] || s[0])
|
||||
{
|
||||
s[3] += s[0];
|
||||
s[4] += s[1];
|
||||
}
|
||||
s[7]++;
|
||||
}
|
||||
}
|
24
gcc/testsuite/gcc.dg/pr45352.c
Normal file
24
gcc/testsuite/gcc.dg/pr45352.c
Normal file
|
@ -0,0 +1,24 @@
|
|||
/* { dg-do compile { target powerpc*-*-* ia64-*-* x86_64-*-* } } */
|
||||
/* { dg-options "-Os -fselective-scheduling2 -fsel-sched-pipelining -fprofile-generate" } */
|
||||
|
||||
static inline void
|
||||
bmp_iter_next (int *bi, int *bit_no)
|
||||
{
|
||||
*bi >>= 1;
|
||||
*bit_no += 1;
|
||||
}
|
||||
|
||||
int bmp_iter_set (int *bi, int *bit_no);
|
||||
void bitmap_initialize_stat (int, ...);
|
||||
void bitmap_clear (void);
|
||||
|
||||
void
|
||||
df_md_alloc (int bi, int bb_index, void *bb_info)
|
||||
{
|
||||
for (; bmp_iter_set (&bi, &bb_index); bmp_iter_next (&bi, &bb_index))
|
||||
|
||||
if (bb_info)
|
||||
bitmap_clear ();
|
||||
else
|
||||
bitmap_initialize_stat (0);
|
||||
}
|
19
gcc/testsuite/gcc.target/i386/pr45352-1.c
Normal file
19
gcc/testsuite/gcc.target/i386/pr45352-1.c
Normal file
|
@ -0,0 +1,19 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mtune=amdfam10 -O3 -fpeel-loops -fselective-scheduling2 -fsel-sched-pipelining -fPIC" } */
|
||||
|
||||
static int FIR_Tab_16[16][16];
|
||||
|
||||
void
|
||||
V_Pass_Avrg_16_C_ref (int *Dst, int *Src, int W, int BpS, int Rnd)
|
||||
{
|
||||
while (W-- > 0)
|
||||
{
|
||||
int i, k;
|
||||
int Sums[16] = { };
|
||||
for (i = 0; i < 16; ++i)
|
||||
for (k = 0; k < 16; ++k)
|
||||
Sums[k] += FIR_Tab_16[i][k] * Src[i];
|
||||
for (i = 0; i < 16; ++i)
|
||||
Dst[i] = Sums[i] + Src[i];
|
||||
}
|
||||
}
|
108
gcc/testsuite/gcc.target/i386/pr45352-2.c
Normal file
108
gcc/testsuite/gcc.target/i386/pr45352-2.c
Normal file
|
@ -0,0 +1,108 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O1 -mtune=amdfam10 -fexpensive-optimizations -fgcse -foptimize-register-move -freorder-blocks -fschedule-insns2 -funswitch-loops -fgcse-las -fselective-scheduling2 -fsel-sched-pipelining -funroll-all-loops" } */
|
||||
|
||||
typedef char uint8_t;
|
||||
typedef uint32_t;
|
||||
typedef vo_frame_t;
|
||||
struct vo_frame_s
|
||||
{
|
||||
uint8_t base[3];
|
||||
int pitches[3];};
|
||||
typedef struct
|
||||
{
|
||||
void
|
||||
(*proc_macro_block)
|
||||
(void);
|
||||
}
|
||||
xine_xvmc_t;
|
||||
typedef struct
|
||||
{
|
||||
uint8_t ref[2][3];
|
||||
int pmv;
|
||||
}
|
||||
motion_t;
|
||||
typedef struct
|
||||
{
|
||||
uint32_t bitstream_buf;
|
||||
int bitstream_bits;
|
||||
uint8_t * bitstream_ptr;
|
||||
uint8_t dest[3];
|
||||
int pitches[3];
|
||||
int offset;
|
||||
motion_t b_motion;
|
||||
motion_t f_motion;
|
||||
int v_offset;
|
||||
int coded_picture_width;
|
||||
int picture_structure;
|
||||
struct vo_frame_s *current_frame;}
|
||||
picture_t;
|
||||
typedef struct
|
||||
{
|
||||
int xvmc_last_slice_code;}
|
||||
mpeg2dec_accel_t;
|
||||
static bitstream_init (picture_t * picture, void *start)
|
||||
{
|
||||
picture->bitstream_ptr = start;
|
||||
}
|
||||
static slice_xvmc_init (picture_t * picture, int code)
|
||||
{
|
||||
int offset;
|
||||
struct vo_frame_s *forward_reference_frame;
|
||||
offset = picture->picture_structure == 2;
|
||||
picture->pitches[0] = picture->current_frame->pitches[0];
|
||||
picture->pitches[1] = picture->current_frame->pitches[1];
|
||||
if (picture)
|
||||
picture->f_motion.ref
|
||||
[0]
|
||||
[0]
|
||||
= forward_reference_frame->base + (offset ? picture->pitches[0] : 0);
|
||||
picture->f_motion.ref[0][1] = (offset);
|
||||
if (picture->picture_structure)
|
||||
picture->pitches[0] <<= picture->pitches[1] <<= 1;
|
||||
offset = 0;
|
||||
while (1)
|
||||
{
|
||||
if (picture->bitstream_buf >= 0x08000000)
|
||||
break;
|
||||
switch (picture->bitstream_buf >> 12)
|
||||
{
|
||||
case 8:
|
||||
offset += 33;
|
||||
picture->bitstream_buf
|
||||
|=
|
||||
picture->bitstream_ptr[1] << picture->bitstream_bits;
|
||||
}
|
||||
}
|
||||
picture->offset = (offset);
|
||||
while (picture->offset - picture->coded_picture_width >= 0)
|
||||
{
|
||||
picture->offset -= picture->coded_picture_width;
|
||||
if (picture->current_frame)
|
||||
{
|
||||
picture->dest[0] += picture->pitches[0];
|
||||
picture->dest[1] += picture->pitches[1];
|
||||
}
|
||||
picture->v_offset += 16;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
mpeg2_xvmc_slice
|
||||
(mpeg2dec_accel_t * accel, picture_t * picture, int code, uint8_t buffer,int mba_inc)
|
||||
{
|
||||
xine_xvmc_t * xvmc = bitstream_init (picture, buffer);
|
||||
slice_xvmc_init (picture, code);
|
||||
while (1)
|
||||
{
|
||||
if (picture)
|
||||
break;
|
||||
switch (picture->bitstream_buf)
|
||||
{
|
||||
case 8:
|
||||
mba_inc += accel->xvmc_last_slice_code = code;
|
||||
xvmc->proc_macro_block ();
|
||||
while (mba_inc)
|
||||
;
|
||||
}
|
||||
}
|
||||
}
|
25
gcc/testsuite/gcc.target/i386/pr45352.c
Normal file
25
gcc/testsuite/gcc.target/i386/pr45352.c
Normal file
|
@ -0,0 +1,25 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3 -march=amdfam10 -fselective-scheduling2 -fsel-sched-pipelining -funroll-all-loops" } */
|
||||
|
||||
struct S
|
||||
{
|
||||
struct
|
||||
{
|
||||
int i;
|
||||
} **p;
|
||||
int x;
|
||||
int y;
|
||||
};
|
||||
|
||||
extern int baz (void);
|
||||
extern int bar (void *, int, int);
|
||||
|
||||
void
|
||||
foo (struct S *s)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < s->x; i++)
|
||||
bar (s->p[i], baz (), s->y);
|
||||
for (i = 0; i < s->x; i++)
|
||||
s->p[i]->i++;
|
||||
}
|
Loading…
Add table
Reference in a new issue