diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c index c3b8820e213..a721940a617 100644 --- a/gcc/omp-expand.c +++ b/gcc/omp-expand.c @@ -2262,6 +2262,7 @@ expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi, && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node)) != CODE_FOR_nothing)) { + tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1; tree itype = TREE_TYPE (fd->loops[i].v); tree min_inner_iterations = fd->min_inner_iterations; tree factor = fd->factor; @@ -2384,7 +2385,7 @@ expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi, *gsi = gsi_after_labels (e->dest); t = fold_convert (itype, c); t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step); - t = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t); + t = fold_build2 (PLUS_EXPR, itype, outer_n1, t); t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false, GSI_CONTINUE_LINKING); expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true); diff --git a/gcc/omp-general.c b/gcc/omp-general.c index c6878cfec66..b2ce4083b27 100644 --- a/gcc/omp-general.c +++ b/gcc/omp-general.c @@ -214,6 +214,7 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd, fd->simd_schedule = false; fd->min_inner_iterations = NULL_TREE; fd->factor = NULL_TREE; + fd->adjn1 = NULL_TREE; collapse_iter = NULL; collapse_count = NULL; @@ -508,7 +509,10 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd, continue; if (single_nonrect == -1 || (loop->m1 && TREE_CODE (loop->m1) != INTEGER_CST) - || (loop->m2 && TREE_CODE (loop->m2) != INTEGER_CST)) + || (loop->m2 && TREE_CODE (loop->m2) != INTEGER_CST) + || TREE_CODE (loop->n1) != INTEGER_CST + || TREE_CODE (loop->n2) != INTEGER_CST + || TREE_CODE (loop->step) != INTEGER_CST) { count = NULL_TREE; continue; @@ -574,12 +578,129 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd, else if (t && t2 && integer_zerop (t) && integer_zerop (t2)) /* No iterations of the inner loop. count will be set to zero cst below. */; - else + else if (TYPE_UNSIGNED (itype) + || t == NULL_TREE + || t2 == NULL_TREE + || TREE_CODE (t) != INTEGER_CST + || TREE_CODE (t2) != INTEGER_CST) { /* Punt (for now). */ count = NULL_TREE; continue; } + else + { + /* Some iterations of the outer loop have zero iterations + of the inner loop, while others have at least one. + In this case, we need to adjust one of those outer + loop bounds. If ADJ_FIRST, we need to adjust outer n1 + (first), otherwise outer n2 (last). */ + bool adj_first = integer_zerop (t); + tree n1 = fold_convert (itype, loop->n1); + tree n2 = fold_convert (itype, loop->n2); + tree m1 = loop->m1 ? fold_convert (itype, loop->m1) + : build_zero_cst (itype); + tree m2 = loop->m2 ? fold_convert (itype, loop->m2) + : build_zero_cst (itype); + t = fold_binary (MINUS_EXPR, itype, n1, n2); + t2 = fold_binary (MINUS_EXPR, itype, m2, m1); + t = fold_binary (TRUNC_DIV_EXPR, itype, t, t2); + t2 = fold_binary (MINUS_EXPR, itype, t, first); + t2 = fold_binary (TRUNC_MOD_EXPR, itype, t2, ostep); + t = fold_binary (MINUS_EXPR, itype, t, t2); + tree n1cur + = fold_binary (PLUS_EXPR, itype, n1, + fold_binary (MULT_EXPR, itype, m1, t)); + tree n2cur + = fold_binary (PLUS_EXPR, itype, n2, + fold_binary (MULT_EXPR, itype, m2, t)); + t2 = fold_binary (loop->cond_code, boolean_type_node, + n1cur, n2cur); + tree t3 = fold_binary (MULT_EXPR, itype, m1, ostep); + tree t4 = fold_binary (MULT_EXPR, itype, m2, ostep); + tree diff; + if (adj_first) + { + tree new_first; + if (integer_nonzerop (t2)) + { + new_first = t; + n1first = n1cur; + n2first = n2cur; + if (flag_checking) + { + t3 = fold_binary (MINUS_EXPR, itype, n1cur, t3); + t4 = fold_binary (MINUS_EXPR, itype, n2cur, t4); + t3 = fold_binary (loop->cond_code, + boolean_type_node, t3, t4); + gcc_assert (integer_zerop (t3)); + } + } + else + { + t3 = fold_binary (PLUS_EXPR, itype, n1cur, t3); + t4 = fold_binary (PLUS_EXPR, itype, n2cur, t4); + new_first = fold_binary (PLUS_EXPR, itype, t, ostep); + n1first = t3; + n2first = t4; + if (flag_checking) + { + t3 = fold_binary (loop->cond_code, + boolean_type_node, t3, t4); + gcc_assert (integer_nonzerop (t3)); + } + } + diff = fold_binary (MINUS_EXPR, itype, new_first, first); + first = new_first; + fd->adjn1 = first; + } + else + { + tree new_last; + if (integer_zerop (t2)) + { + t3 = fold_binary (MINUS_EXPR, itype, n1cur, t3); + t4 = fold_binary (MINUS_EXPR, itype, n2cur, t4); + new_last = fold_binary (MINUS_EXPR, itype, t, ostep); + n1last = t3; + n2last = t4; + if (flag_checking) + { + t3 = fold_binary (loop->cond_code, + boolean_type_node, t3, t4); + gcc_assert (integer_nonzerop (t3)); + } + } + else + { + new_last = t; + n1last = n1cur; + n2last = n2cur; + if (flag_checking) + { + t3 = fold_binary (PLUS_EXPR, itype, n1cur, t3); + t4 = fold_binary (PLUS_EXPR, itype, n2cur, t4); + t3 = fold_binary (loop->cond_code, + boolean_type_node, t3, t4); + gcc_assert (integer_zerop (t3)); + } + } + diff = fold_binary (MINUS_EXPR, itype, last, new_last); + } + if (TYPE_UNSIGNED (itype) + && single_nonrect_cond_code == GT_EXPR) + diff = fold_binary (TRUNC_DIV_EXPR, itype, + fold_unary (NEGATE_EXPR, itype, diff), + fold_unary (NEGATE_EXPR, itype, + ostep)); + else + diff = fold_binary (TRUNC_DIV_EXPR, itype, diff, ostep); + diff = fold_convert (long_long_unsigned_type_node, diff); + single_nonrect_count + = fold_binary (MINUS_EXPR, long_long_unsigned_type_node, + single_nonrect_count, diff); + t = NULL_TREE; + } } else t = fold_binary (loop->cond_code, boolean_type_node, @@ -715,10 +836,11 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd, *collapse_count = fold_convert_loc (loc, iter_type, count); if (fd->min_inner_iterations && fd->factor) { - t = make_tree_vec (3); + t = make_tree_vec (4); TREE_VEC_ELT (t, 0) = *collapse_count; TREE_VEC_ELT (t, 1) = fd->min_inner_iterations; TREE_VEC_ELT (t, 2) = fd->factor; + TREE_VEC_ELT (t, 3) = fd->adjn1; *collapse_count = t; } } @@ -736,6 +858,7 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd, gcc_assert (fd->non_rect); fd->min_inner_iterations = TREE_VEC_ELT (fd->loop.n2, 1); fd->factor = TREE_VEC_ELT (fd->loop.n2, 2); + fd->adjn1 = TREE_VEC_ELT (fd->loop.n2, 3); fd->loop.n2 = TREE_VEC_ELT (fd->loop.n2, 0); } fd->loop.step = build_int_cst (TREE_TYPE (fd->loop.v), 1); diff --git a/gcc/omp-general.h b/gcc/omp-general.h index ec0f2a4becb..2da4d14b310 100644 --- a/gcc/omp-general.h +++ b/gcc/omp-general.h @@ -85,6 +85,8 @@ struct omp_for_data outer iterator, depending on which results in fewer iterations. */ tree factor; /* (m2 - m1) * outer_step / inner_step. */ + /* Adjusted n1 of the outer loop in such loop nests (if needed). */ + tree adjn1; }; #define OACC_FN_ATTRIB "oacc function" diff --git a/libgomp/testsuite/libgomp.c/loop-21.c b/libgomp/testsuite/libgomp.c/loop-21.c new file mode 100644 index 00000000000..1baf13d84db --- /dev/null +++ b/libgomp/testsuite/libgomp.c/loop-21.c @@ -0,0 +1,230 @@ +/* { dg-do run } */ + +extern void abort (void); + +int x, i, j; +volatile int a, b, c, d, e, f, g, h; +int k[13][27]; + +int +main () +{ + int niters; + for (i = -4; i < 8; i++) + for (j = 3 * i; j > 2 * i; j--) + k[i + 5][j + 5] = 1; + a = -4; b = 8; c = 1; d = 3; e = 0; f = 2; g = 0; h = -1; + niters = 0; i = -100; j = -100; x = -100; + #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters) + for (i = -4; i < 8; i++) + for (j = 3 * i; j > 2 * i; j--) + { + if (i < -4 || i >= 8 || j > 3 * i || j <= i * 2 || k[i + 5][j + 5] != 1) + abort (); + k[i + 5][j + 5]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (i != 8 || j != 14 || x != 7183 || niters != 28) + abort (); + niters = 0; i = -100; j = -100; x = -100; + #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters) + for (i = a; i < b; i += c) + for (j = d * i + e; j > g + i * f; j += h) + { + if (i < -4 || i >= 8 || j > 3 * i || j <= i * 2 || k[i + 5][j + 5] != 2) + abort (); + k[i + 5][j + 5]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (i != 8 || j != 14 || x != 7183 || niters != 28) + abort (); + for (int i = -4; i < 8; i++) + for (int j = 3 * i; j > 2 * i; j--) + if (k[i + 5][j + 5] == 3) + k[i + 5][j + 5] = 0; + else + abort (); + for (i = -2; i < 4; i++) + for (j = -2 * i + 3; j > -3; j -= 2) + k[i + 5][j + 5] = 1; + a = -2; b = 4; c = 1; d = -2; e = 3; f = 0; g = -3; h = -2; + niters = 0; i = -100; j = -100; x = -100; + #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters) + for (i = -2; i < 4; i++) + for (j = -2 * i + 3; j > -3; j -= 2) + { + if (i < -2 || i >= 4 || j <= -3 || j > -2 * i + 3 || k[i + 5][j + 5] != 1) + abort (); + k[i + 5][j + 5]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (/* i != 4 || j != -3 || */x != 3071 || niters != 15) + abort (); + niters = 0; i = -100; j = -100; x = -100; + #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters) + for (i = a; i < b; i += c) + for (j = d * i + e; j > g + i * f; j += h) + { + if (i < -2 || i >= 4 || j <= -3 || j > -2 * i + 3 || k[i + 5][j + 5] != 2) + abort (); + k[i + 5][j + 5]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (/*i != 4 || j != -3 || */x != 3071 || niters != 15) + abort (); + for (i = -2; i < 4; i++) + for (j = -2 * i + 3; j > -3; j -= 2) + if (k[i + 5][j + 5] == 3) + k[i + 5][j + 5] = 0; + else + abort (); + for (i = 3; i > -3; i--) + for (j = -2 * i + 7; j > 2 * i + 1; j--) + k[i + 5][j + 5] = 1; + a = 3; b = -3; c = -1; d = -2; e = 7; f = 2; g = 1; h = -1; + niters = 0; i = -100; j = -100; x = -100; + #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters) + for (i = 3; i > -3; i--) + for (j = -2 * i + 7; j > 2 * i + 1; j--) + { + if (i <= -3 || i > 3 || j <= 2 * i + 1 || j > -2 * i + 7 || k[i + 5][j + 5] != 1) + abort (); + k[i + 5][j + 5]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (i != -3 || j != -3 || x != -1026 || niters != 32) + abort (); + niters = 0; i = -100; j = -100; x = -100; + #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters) + for (i = a; i > b; i += c) + for (j = d * i + e; j > g + i * f; j += h) + { + if (i <= -3 || i > 3 || j <= 2 * i + 1 || j > -2 * i + 7 || k[i + 5][j + 5] != 2) + abort (); + k[i + 5][j + 5]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (i != -3 || j != -3 || x != -1026 || niters != 32) + abort (); + for (i = 3; i > -3; i--) + for (j = -2 * i + 7; j > 2 * i + 1; j--) + if (k[i + 5][j + 5] == 3) + k[i + 5][j + 5] = 0; + else + abort (); + for (i = 3; i > -3; i--) + for (j = 2 * i + 7; j > -2 * i + 1; j--) + k[i + 5][j + 5] = 1; + a = 3; b = -3; c = -1; d = 2; e = 7; f = -2; g = 1; h = -1; + niters = 0; i = -100; j = -100; x = -100; + #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters) + for (i = 3; i > -3; i--) + for (j = 2 * i + 7; j > -2 * i + 1; j--) + { + if (i <= -3 || i > 3 || j <= -2 * i + 1 || j > 2 * i + 7 || k[i + 5][j + 5] != 1) + abort (); + k[i + 5][j + 5]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (/*i != -3 || j != 3 || */x != -1020 || niters != 50) + abort (); + niters = 0; i = -100; j = -100; x = -100; + #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters) + for (i = a; i > b; i += c) + for (j = d * i + e; j > g + i * f; j += h) + { + if (i <= -3 || i > 3 || j <= -2 * i + 1 || j > 2 * i + 7 || k[i + 5][j + 5] != 2) + abort (); + k[i + 5][j + 5]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (/*i != -3 || j != 3 || */x != -1020 || niters != 50) + abort (); + for (i = 3; i > -3; i--) + for (j = 2 * i + 7; j > -2 * i + 1; j--) + if (k[i + 5][j + 5] == 3) + k[i + 5][j + 5] = 0; + else + abort (); + for (i = 6; i > -6; i--) + for (j = 2 * i + 7; j <= -2 * i + 1; j++) + k[i + 5][j + 5] = 1; + a = 6; b = -6; c = -1; d = 2; e = 7; f = -2; g = 2; h = 1; + niters = 0; i = -100; j = -100; x = -100; + #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters) + for (i = 6; i > -6; i--) + for (j = 2 * i + 7; j <= -2 * i + 1; j++) + { + if (i <= -6 || i > 6 || j < 2 * i + 7 || j >= -2 * i + 2 || k[i + 5][j + 5] != 1) + abort (); + k[i + 5][j + 5]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (i != -6 || j != 12 || x != -5109 || niters != 36) + abort (); + niters = 0; i = -100; j = -100; x = -100; + #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters) + for (i = a; i > b; i += c) + for (j = d * i + e; j < g + i * f; j += h) + { + if (i <= -6 || i > 6 || j < 2 * i + 7 || j >= -2 * i + 2 || k[i + 5][j + 5] != 2) + abort (); + k[i + 5][j + 5]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (i != -6 || j != 12 || x != -5109 || niters != 36) + abort (); + for (i = 6; i > -6; i--) + for (j = 2 * i + 7; j <= -2 * i + 1; j++) + if (k[i + 5][j + 5] == 3) + k[i + 5][j + 5] = 0; + else + abort (); + for (i = 6; i > -6; i -= 2) + for (j = -2 * i + 7; j <= 2 * i + 1; j++) + k[i + 5][j + 5] = 1; + a = 6; b = -6; c = -2; d = -2; e = 7; f = 2; g = 2; h = 1; + niters = 0; i = -100; j = -100; x = -100; + #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters) + for (i = 6; i > -6; i -= 2) + for (j = -2 * i + 7; j <= 2 * i + 1; j++) + { + if (i <= -6 || i > 6 || j < -2 * i + 7 || j >= 2 * i + 2 || k[i + 5][j + 5] != 1) + abort (); + k[i + 5][j + 5]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (/*i != -6 || j != 15 || */x != 2053 || niters != 33) + abort (); + niters = 0; i = -100; j = -100; x = -100; + #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters) + for (i = a; i > b; i += c) + for (j = d * i + e; j < g + i * f; j += h) + { + if (i <= -6 || i > 6 || j < -2 * i + 7 || j >= 2 * i + 2 || k[i + 5][j + 5] != 2) + abort (); + k[i + 5][j + 5]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (/*i != -6 || j != 15 || */x != 2053 || niters != 33) + abort (); + for (i = 6; i > -6; i -= 2) + for (j = -2 * i + 7; j <= 2 * i + 1; j++) + if (k[i + 5][j + 5] == 3) + k[i + 5][j + 5] = 0; + else + abort (); + return 0; +}