diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-87.c b/gcc/testsuite/gcc.dg/Warray-bounds-87.c index a49874df5da..a5457807c3a 100644 --- a/gcc/testsuite/gcc.dg/Warray-bounds-87.c +++ b/gcc/testsuite/gcc.dg/Warray-bounds-87.c @@ -33,7 +33,7 @@ static unsigned int h (int i, int j) case 9: return j; case 10: - return a[i]; // { dg-bogus "-Warray-bounds" "pr101671" { xfail *-*-* } } + return a[i]; // { dg-bogus "-Warray-bounds" "pr101671" } } return 0; } diff --git a/gcc/testsuite/gcc.dg/analyzer/pr94851-2.c b/gcc/testsuite/gcc.dg/analyzer/pr94851-2.c index 0acf48810c1..62176bdaee8 100644 --- a/gcc/testsuite/gcc.dg/analyzer/pr94851-2.c +++ b/gcc/testsuite/gcc.dg/analyzer/pr94851-2.c @@ -45,7 +45,7 @@ int pamark(void) { if (curbp->b_amark == (AMARK *)NULL) curbp->b_amark = p; else - last->m_next = p; /* { dg-warning "dereference of NULL 'last'" "deref" { xfail *-*-* } } */ + last->m_next = p; /* { dg-warning "dereference of NULL 'last'" "deref" } */ } p->m_name = (char)c; /* { dg-bogus "leak of 'p'" "bogus leak" } */ diff --git a/gcc/testsuite/gcc.dg/graphite/pr69728.c b/gcc/testsuite/gcc.dg/graphite/pr69728.c index 69e28318aaf..a6f385749c2 100644 --- a/gcc/testsuite/gcc.dg/graphite/pr69728.c +++ b/gcc/testsuite/gcc.dg/graphite/pr69728.c @@ -24,6 +24,4 @@ fn1 () run into scheduling issues before here, not being able to handle empty domains. */ -/* XFAILed by fix for PR86865. */ - -/* { dg-final { scan-tree-dump "loop nest optimized" "graphite" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "loop nest optimized" "graphite" } } */ diff --git a/gcc/testsuite/gcc.dg/graphite/scop-dsyr2k.c b/gcc/testsuite/gcc.dg/graphite/scop-dsyr2k.c index 925ae306903..41c91b97b57 100644 --- a/gcc/testsuite/gcc.dg/graphite/scop-dsyr2k.c +++ b/gcc/testsuite/gcc.dg/graphite/scop-dsyr2k.c @@ -17,4 +17,4 @@ void dsyr2k(int N) { #pragma endscop } -/* { dg-final { scan-tree-dump-times "number of SCoPs: 1" 1 "graphite" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "number of SCoPs: 1" 1 "graphite" } } */ diff --git a/gcc/testsuite/gcc.dg/graphite/scop-dsyrk.c b/gcc/testsuite/gcc.dg/graphite/scop-dsyrk.c index b748946fabb..e01a517be11 100644 --- a/gcc/testsuite/gcc.dg/graphite/scop-dsyrk.c +++ b/gcc/testsuite/gcc.dg/graphite/scop-dsyrk.c @@ -19,4 +19,4 @@ void dsyrk(int N) #pragma endscop } -/* { dg-final { scan-tree-dump-times "number of SCoPs: 1" 1 "graphite" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "number of SCoPs: 1" 1 "graphite" } } */ diff --git a/gcc/testsuite/gcc.dg/loop-8.c b/gcc/testsuite/gcc.dg/loop-8.c index 90ea1c45524..a685fc25056 100644 --- a/gcc/testsuite/gcc.dg/loop-8.c +++ b/gcc/testsuite/gcc.dg/loop-8.c @@ -11,18 +11,23 @@ f (int *a, int *b) { int i; - for (i = 0; i < 100; i++) + i = 100; + if (i > 0) { - int d = 42; + do + { + int d = 42; - a[i] = d; - if (i % 2) - d = i; - b[i] = d; + a[i] = d; + if (i % 2) + d = i; + b[i] = d; + ++i; + } + while (i < 100); } } /* Load of 42 is moved out of the loop, introducing a new pseudo register. */ -/* { dg-final { scan-rtl-dump-times "Decided" 1 "loop2_invariant" } } */ /* { dg-final { scan-rtl-dump-not "without introducing a new temporary register" "loop2_invariant" } } */ diff --git a/gcc/testsuite/gcc.dg/shrink-wrap-loop.c b/gcc/testsuite/gcc.dg/shrink-wrap-loop.c index 6e1be8937fe..ddc99e6b75a 100644 --- a/gcc/testsuite/gcc.dg/shrink-wrap-loop.c +++ b/gcc/testsuite/gcc.dg/shrink-wrap-loop.c @@ -1,58 +1,6 @@ /* { dg-do compile { target { { { i?86-*-* x86_64-*-* } && lp64 } || { arm_thumb2 } } } } */ /* { dg-options "-O2 -fdump-rtl-pro_and_epilogue" } */ -/* -Our new threader is threading things a bit too early, and causing the -testcase in gcc.dg/shrink-wrap-loop.c to fail. - - The gist is this BB inside a loop: - - : - # p_2 = PHI - if (p_2 != 0B) - goto ; [INV] - else - goto ; [INV] - -Our threader can move this check outside of the loop (good). This is -done before branch probabilities are calculated and causes the probs -to be calculated as: - - [local count: 216361238]: - if (p2_6(D) != 0B) - goto ; [54.59%] - else - goto ; [45.41%] - -Logically this seems correct to me. A simple check outside of a loop -should slightly but not overwhelmingly favor a non-zero value. - -Interestingly however, the old threader couldn't get this, but the IL -ended up identical, albeit with different probabilities. What happens -is that, because the old code could not thread this, the p2 != 0 check -would remain inside the loop and probs would be calculated thusly: - - [local count: 1073741824]: - # p_2 = PHI - if (p_2 != 0B) - goto ; [94.50%] - else - goto ; [5.50%] - -Then when the loop header copying pass ("ch") shuffled things around, -the IL would end up identical to my early threader code, but with the -probabilities would remain as 94.5/5.5. - -The above discrepancy causes the RTL ifcvt pass to generate different -code, and by the time we get to the shrink wrapping pass, things look -sufficiently different such that the legacy code can actually shrink -wrap, whereas our new code does not. - -IMO, if the loop-ch pass moves conditionals outside of a loop, the -probabilities should be adjusted, but that does mean the shrink wrap -won't happen for this contrived testcase. - */ - int foo (int *p1, int *p2); int @@ -68,4 +16,4 @@ test (int *p1, int *p2) return 1; } -/* { dg-final { scan-rtl-dump "Performing shrink-wrapping" "pro_and_epilogue" { xfail *-*-* } } } */ +/* { dg-final { scan-rtl-dump "Performing shrink-wrapping" "pro_and_epilogue" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ifc-20040816-1.c b/gcc/testsuite/gcc.dg/tree-ssa/ifc-20040816-1.c index b55a533e374..f8a6495cbaa 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ifc-20040816-1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ifc-20040816-1.c @@ -39,4 +39,4 @@ int main1 () which is folded by vectorizer. Both outgoing edges must have probability 100% so the resulting profile match after folding. */ /* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */ -/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 2 "ifcvt" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr21559.c b/gcc/testsuite/gcc.dg/tree-ssa/pr21559.c index 51b3b7ac755..43f046edabe 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr21559.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr21559.c @@ -35,10 +35,7 @@ void foo (void) /* First, we should simplify the bits < 0 test within the loop. */ /* { dg-final { scan-tree-dump-times "Simplified relational" 1 "evrp" } } */ -/* Second, we should thread the edge out of the loop via the break - statement. We also realize that the final bytes == 0 test is useless, - and thread over it. We also know that toread != 0 is useless when - entering while loop and thread over it. */ -/* { dg-final { scan-tree-dump-times "Threaded jump" 3 "vrp-thread1" } } */ +/* We used to check for 3 threaded jumps here, but they all would + rotate the loop. */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr59597.c b/gcc/testsuite/gcc.dg/tree-ssa/pr59597.c index 2caa1f532ea..764b3fe2e80 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr59597.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr59597.c @@ -56,11 +56,7 @@ main (int argc, char argv[]) return crc; } -/* Previously we had 3 jump threads, but one of them crossed loops. - The reason the old threader was allowing it, was because there was - an ASSERT_EXPR getting in the way. Without the ASSERT_EXPR, we - have an empty pre-header block as the final block in the thread, - which the threader will simply join with the next block which *is* - in a different loop. */ -/* { dg-final { scan-tree-dump-times "Registering jump thread" 2 "vrp-thread1" } } */ +/* None of the threads we can get in vrp-thread1 are valid. They all + cross or rotate loops. */ +/* { dg-final { scan-tree-dump-not "Registering jump thread" "vrp-thread1" } } */ /* { dg-final { scan-tree-dump-not "joiner" "vrp-thread1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr71437.c b/gcc/testsuite/gcc.dg/tree-ssa/pr71437.c index a2386ba19f0..eab3a25928e 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr71437.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr71437.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-ffast-math -O3 -fdump-tree-vrp-thread1-details" } */ +/* { dg-options "-ffast-math -O3 -fdump-tree-dom3-details" } */ int I = 50, J = 50; int S, L; @@ -39,4 +39,8 @@ void foo (int K) bar (LD, SD); } } -/* { dg-final { scan-tree-dump-times "Threaded jump " 2 "vrp-thread1" } } */ + +/* We used to get 1 vrp-thread1 candidates here, but they now get + deferred until after loop opts are done, because they were rotating + loops. */ +/* { dg-final { scan-tree-dump-times "Threaded jump " 2 "dom3" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c index 18f7aab2be7..f2a5e78e6be 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c @@ -123,8 +123,7 @@ enum STATES FMS( u8 **in , u32 *transitions) { aarch64 has the highest CASE_VALUES_THRESHOLD in GCC. It's high enough to change decisions in switch expansion which in turn can expose new jump threading opportunities. Skip the later tests on aarch64. */ -/* { dg-final { scan-tree-dump "Jumps threaded: \[7-9\]" "thread1" } } */ -/* { dg-final { scan-tree-dump-times "Invalid sum" 1 "thread1" } } */ +/* { dg-final { scan-tree-dump "Jumps threaded: \[7-9\]" "thread2" } } */ /* { dg-final { scan-tree-dump-not "optimizing for size" "thread1" } } */ /* { dg-final { scan-tree-dump-not "optimizing for size" "thread2" } } */ /* { dg-final { scan-tree-dump-not "optimizing for size" "thread3" { target { ! aarch64*-*-* } } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-18.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-18.c deleted file mode 100644 index 0246ebf3c63..00000000000 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-18.c +++ /dev/null @@ -1,27 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-vrp-thread1-details -std=gnu89 --param logical-op-non-short-circuit=0" } */ - -#include "ssa-dom-thread-4.c" - -/* On targets that define LOGICAL_OP_NON_SHORT_CIRCUIT to 0, we split both - "a_elt || b_elt" and "b_elt && kill_elt" into two conditions each, - rather than using "(var1 != 0) op (var2 != 0)". Also, as on other targets, - we duplicate the header of the inner "while" loop. There are then - 4 threading opportunities: - - 1x "!a_elt && b_elt" in the outer "while" loop - -> the start of the inner "while" loop, - skipping the known-true "b_elt" in the first condition. - 1x "!b_elt" in the first condition - -> the outer "while" loop's continuation point, - skipping the known-false "b_elt" in the second condition. - 2x "kill_elt->indx >= b_elt->indx" in the first "while" loop - -> "kill_elt->indx == b_elt->indx" in the second condition, - skipping the known-true "b_elt && kill_elt" in the second - condition. - - All the cases are picked up by VRP1 as jump threads. */ - -/* There used to be 6 jump threads found by thread1, but they all - depended on threading through distinct loops in ethread. */ -/* { dg-final { scan-tree-dump-times "Threaded" 2 "vrp-thread1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-2a.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-2a.c deleted file mode 100644 index 8f0a12c12ee..00000000000 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-2a.c +++ /dev/null @@ -1,21 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-vrp-thread1-stats -fdump-tree-dom2-stats" } */ - -void bla(); - -/* In the following case, we should be able to thread edge through - the loop header. */ - -void thread_entry_through_header (void) -{ - int i; - - for (i = 0; i < 170; i++) - bla (); -} - -/* There's a single jump thread that should be handled by the VRP - jump threading pass. */ -/* { dg-final { scan-tree-dump-times "Jumps threaded: 1" 1 "vrp-thread1"} } */ -/* { dg-final { scan-tree-dump-times "Jumps threaded: 2" 0 "vrp-thread1"} } */ -/* { dg-final { scan-tree-dump-not "Jumps threaded" "dom2"} } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-4.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-4.c index 46e464ff26a..9cd463571c4 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-4.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-4.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-vrp-thread1-details -fdump-tree-dom2-details -std=gnu89 --param logical-op-non-short-circuit=1" } */ +/* { dg-options "-O2 -fdump-tree-vrp-thread2-details -fdump-tree-dom2-details -std=gnu89 --param logical-op-non-short-circuit=1" } */ struct bitmap_head_def; typedef struct bitmap_head_def *bitmap; typedef const struct bitmap_head_def *const_bitmap; @@ -53,10 +53,8 @@ bitmap_ior_and_compl (bitmap dst, const_bitmap a, const_bitmap b, return changed; } -/* The block starting the second conditional has 3 incoming edges, - we should thread all three, but due to a bug in the threading - code we missed the edge when the first conditional is false - (b_elt is zero, which means the second conditional is always - zero. VRP1 catches all three. */ -/* { dg-final { scan-tree-dump-times "Registering jump thread" 2 "vrp-thread1" } } */ -/* { dg-final { scan-tree-dump-times "Path crosses loops" 1 "vrp-thread1" } } */ +/* We used to catch 3 jump threads in vrp-thread1, but they all + rotated the loop, so they were disallowed. This in turn created + other opportunities for the other threaders which result in the the + post-loop threader (vrp-thread2) catching more. */ +/* { dg-final { scan-tree-dump-times "Registering jump thread" 5 "vrp-thread2" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c deleted file mode 100644 index b0a7d423475..00000000000 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c +++ /dev/null @@ -1,44 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-thread1-details -fdump-tree-thread3-details" } */ - -/* { dg-final { scan-tree-dump-times "Registering jump" 6 "thread1" } } */ -/* { dg-final { scan-tree-dump-times "Registering jump" 1 "thread3" } } */ - -int sum0, sum1, sum2, sum3; -int foo (char *s, char **ret) -{ - int state=0; - char c; - - for (; *s && state != 4; s++) - { - c = *s; - if (c == '*') - { - s++; - break; - } - switch (state) - { - case 0: - if (c == '+') - state = 1; - else if (c != '-') - sum0+=c; - break; - case 1: - if (c == '+') - state = 2; - else if (c == '-') - state = 0; - else - sum1+=c; - break; - default: - break; - } - - } - *ret = s; - return state; -} diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c index 16abcde5053..1da00a691c8 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c @@ -1,15 +1,14 @@ /* { dg-do compile } */ /* { dg-options "-O2 -fdump-tree-thread1-stats -fdump-tree-thread2-stats -fdump-tree-dom2-stats -fdump-tree-thread3-stats -fdump-tree-dom3-stats -fdump-tree-vrp2-stats -fno-guess-branch-probability" } */ -/* { dg-final { scan-tree-dump "Jumps threaded: 12" "thread1" } } */ -/* { dg-final { scan-tree-dump "Jumps threaded: 5" "thread3" { target { ! aarch64*-*-* } } } } */ +/* { dg-final { scan-tree-dump "Jumps threaded: 12" "thread3" } } */ /* { dg-final { scan-tree-dump-not "Jumps threaded" "dom2" } } */ /* aarch64 has the highest CASE_VALUES_THRESHOLD in GCC. It's high enough to change decisions in switch expansion which in turn can expose new jump threading opportunities. Skip the later tests on aarch64. */ /* { dg-final { scan-tree-dump-not "Jumps threaded" "dom3" { target { ! aarch64*-*-* } } } } */ -/* { dg-final { scan-tree-dump-not "Jumps threaded" "vrp2" { target { ! aarch64*-*-* } } } } */ +/* { dg-final { scan-tree-dump-not "Jumps threaded" "vrp-thread2" { target { ! aarch64*-*-* } } } } */ enum STATE { S0=0, diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-invalid.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-invalid.c new file mode 100644 index 00000000000..bd56a62a4b4 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-invalid.c @@ -0,0 +1,102 @@ +// { dg-do compile } +// { dg-options "-O2 -fgimple -fdump-statistics" } +// +// This is a collection of seemingly threadble paths that should not be allowed. + +void foobar (int); + +// Possible thread from 2->4->3, but it would rotate the loop. +void __GIMPLE (ssa) +f1 () +{ + int i; + + // Pre-header. + __BB(2): + goto __BB4; + + // Latch. + __BB(3): + foobar (i_1); + i_5 = i_1 + 1; + goto __BB4; + + __BB(4,loop_header(1)): + i_1 = __PHI (__BB2: 0, __BB3: i_5); + if (i_1 != 101) + goto __BB3; + else + goto __BB5; + + __BB(5): + return; + +} + +// Possible thread from 2->3->5 but threading through the empty latch +// would create a non-empty latch. +void __GIMPLE (ssa) +f2 () +{ + int i; + + // Pre-header. + __BB(2): + goto __BB3; + + __BB(3,loop_header(1)): + i_8 = __PHI (__BB5: i_5, __BB2: 0); + foobar (i_8); + i_5 = i_8 + 1; + if (i_5 != 256) + goto __BB5; + else + goto __BB4; + + // Latch. + __BB(5): + goto __BB3; + + __BB(4): + return; + +} + +// Possible thread from 3->5->6->3 but this would thread through the +// header but not exit the loop. +int __GIMPLE (ssa) +f3 (int a) +{ + int i; + + __BB(2): + goto __BB6; + + __BB(3): + if (i_1 != 0) + goto __BB4; + else + goto __BB5; + + __BB(4): + foobar (5); + goto __BB5; + + // Latch. + __BB(5): + i_7 = i_1 + 1; + goto __BB6; + + __BB(6,loop_header(1)): + i_1 = __PHI (__BB2: 1, __BB5: i_7); + if (i_1 <= 99) + goto __BB3; + else + goto __BB7; + + __BB(7): + return; + +} + +// { dg-final { scan-tree-dump-not "Jumps threaded" "statistics" } } diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-16.c b/gcc/testsuite/gcc.dg/vect/bb-slp-16.c index e68a9b62535..4fc176dde84 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-16.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-16.c @@ -16,41 +16,52 @@ main1 (int dummy) unsigned int *pin = &in[0]; unsigned int *pout = &out[0]; unsigned int a = 0; - - for (i = 0; i < N; i++) + + i = N; + if (i > 0) { - *pout++ = *pin++ + a; - *pout++ = *pin++ + a; - *pout++ = *pin++ + a; - *pout++ = *pin++ + a; - *pout++ = *pin++ + a; - *pout++ = *pin++ + a; - *pout++ = *pin++ + a; - *pout++ = *pin++ + a; - if (arr[i] = i) - a = i; - else - a = 2; + do + { + *pout++ = *pin++ + a; + *pout++ = *pin++ + a; + *pout++ = *pin++ + a; + *pout++ = *pin++ + a; + *pout++ = *pin++ + a; + *pout++ = *pin++ + a; + *pout++ = *pin++ + a; + *pout++ = *pin++ + a; + if (arr[i] = i) + a = i; + else + a = 2; + } + while (i < N); } a = 0; - /* check results: */ - for (i = 0; i < N; i++) + /* check results: */ + i = N; + if (i > 0) { - if (out[i*8] != in[i*8] + a - || out[i*8 + 1] != in[i*8 + 1] + a - || out[i*8 + 2] != in[i*8 + 2] + a - || out[i*8 + 3] != in[i*8 + 3] + a - || out[i*8 + 4] != in[i*8 + 4] + a - || out[i*8 + 5] != in[i*8 + 5] + a - || out[i*8 + 6] != in[i*8 + 6] + a - || out[i*8 + 7] != in[i*8 + 7] + a) - abort (); + do + { + if (out[i*8] != in[i*8] + a + || out[i*8 + 1] != in[i*8 + 1] + a + || out[i*8 + 2] != in[i*8 + 2] + a + || out[i*8 + 3] != in[i*8 + 3] + a + || out[i*8 + 4] != in[i*8 + 4] + a + || out[i*8 + 5] != in[i*8 + 5] + a + || out[i*8 + 6] != in[i*8 + 6] + a + || out[i*8 + 7] != in[i*8 + 7] + a) + abort (); - if (arr[i] = i) - a = i; - else - a = 2; + if (arr[i] = i) + a = i; + else + a = 2; + i++; + } + while (i < N); } return 0; @@ -66,4 +77,3 @@ int main (void) } /* { dg-final { scan-tree-dump-times "optimized: basic block" 1 "slp1" } } */ - diff --git a/gcc/tree-ssa-threadupdate.c b/gcc/tree-ssa-threadupdate.c index 32ce1e3af40..293836cdc53 100644 --- a/gcc/tree-ssa-threadupdate.c +++ b/gcc/tree-ssa-threadupdate.c @@ -278,7 +278,7 @@ cancel_thread (vec *path, const char *reason = NULL) if (dump_file && (dump_flags & TDF_DETAILS)) { if (reason) - fprintf (dump_file, "%s:\n", reason); + fprintf (dump_file, "%s: ", reason); dump_jump_thread_path (dump_file, *path, false); fprintf (dump_file, "\n"); @@ -2771,6 +2771,7 @@ jt_path_registry::cancel_invalid_paths (vec &path) bool seen_latch = false; int loops_crossed = 0; bool crossed_latch = false; + bool crossed_loop_header = false; // Use ->dest here instead of ->src to ignore the first block. The // first block is allowed to be in a different loop, since it'll be // redirected. See similar comment in profitable_path_p: "we don't @@ -2804,6 +2805,14 @@ jt_path_registry::cancel_invalid_paths (vec &path) ++loops_crossed; } + // ?? Avoid threading through loop headers that remain in the + // loop, as such threadings tend to create sub-loops which + // _might_ be OK ??. + if (e->dest->loop_father->header == e->dest + && !flow_loop_nested_p (exit->dest->loop_father, + e->dest->loop_father)) + crossed_loop_header = true; + if (flag_checking && !m_backedge_threads) gcc_assert ((path[i]->e->flags & EDGE_DFS_BACK) == 0); } @@ -2829,6 +2838,21 @@ jt_path_registry::cancel_invalid_paths (vec &path) cancel_thread (&path, "Path crosses loops"); return true; } + // The path should either start and end in the same loop or exit the + // loop it starts in but never enter a loop. This also catches + // creating irreducible loops, not only rotation. + if (entry->src->loop_father != exit->dest->loop_father + && !flow_loop_nested_p (exit->src->loop_father, + entry->dest->loop_father)) + { + cancel_thread (&path, "Path rotates loop"); + return true; + } + if (crossed_loop_header) + { + cancel_thread (&path, "Path crosses loop header but does not exit it"); + return true; + } return false; } diff --git a/libgomp/testsuite/libgomp.graphite/force-parallel-5.c b/libgomp/testsuite/libgomp.graphite/force-parallel-5.c index b83ca79dfae..de31d6436f5 100644 --- a/libgomp/testsuite/libgomp.graphite/force-parallel-5.c +++ b/libgomp/testsuite/libgomp.graphite/force-parallel-5.c @@ -31,6 +31,6 @@ int main(void) } /* Check that parallel code generation part make the right answer. */ -/* { dg-final { scan-tree-dump-times "2 loops carried no dependency" 1 "graphite" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "2 loops carried no dependency" 1 "graphite" } } */ /* { dg-final { scan-tree-dump-times "loopfn.0" 4 "optimized" } } */ /* { dg-final { scan-tree-dump-times "loopfn.1" 4 "optimized" } } */