[committed] openmp: Non-rectangular loop support for non-composite worksharing loops and distribute

Message ID 20200627105157.GB8462@tucnak
State New
Headers show
Series
  • [committed] openmp: Non-rectangular loop support for non-composite worksharing loops and distribute
Related show

Commit Message

Jonathan Wakely via Gcc-patches June 27, 2020, 10:51 a.m.
Hi!

This implements the fallback mentioned in
https://gcc.gnu.org/pipermail/gcc/2020-June/232874.html
Special cases for triangular loops etc. to follow later, also composite
constructs not supported yet (need to check the passing of temporaries around) 
and lastprivate might not give the same answers as serial loop if the last 
innermost body iteration isn't the last one for some of the outer loops
(that will need to be solved separately together with rectangular loops that have no
innermost body iterations, but some of the outer loops actually iterate).
Also, simd needs work. 

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2020-06-27  Jakub Jelinek  <jakub@redhat.com>

	* omp-general.h (struct omp_for_data_loop): Add non_rect_referenced
	member, move outer member.
	(struct omp_for_data): Add first_nonrect and last_nonrect members.
	* omp-general.c (omp_extract_for_data): Initialize first_nonrect,
	last_nonrect and non_rect_referenced members.
	* omp-expand.c (expand_omp_for_init_counts): Handle non-rectangular
	loops.
	(expand_omp_for_init_vars): Add nonrect_bounds parameter.  Handle
	non-rectangular loops.
	(extract_omp_for_update_vars): Likewise.
	(expand_omp_for_generic, expand_omp_for_static_nochunk,
	expand_omp_for_static_chunk, expand_omp_simd,
	expand_omp_taskloop_for_outer, expand_omp_taskloop_for_inner): Adjust
	expand_omp_for_init_vars and extract_omp_for_update_vars callers.
	(expand_omp_for): Don't sorry on non-composite worksharing-loop or
	distribute.

	* testsuite/libgomp.c/loop-17.c: New test.
	* testsuite/libgomp.c/loop-18.c: New test.



	Jakub

Patch

--- gcc/omp-general.h.jj	2020-06-23 15:58:46.608022000 +0200
+++ gcc/omp-general.h	2020-06-24 15:08:07.283089199 +0200
@@ -47,13 +47,16 @@  enum oacc_loop_flags {
    or for non-rectangular loops:
    for (V = M1 * W + N1; V cond M2 * W + N2; V += STEP;
    where W is V of the OUTER-th loop (e.g. for OUTER 1 it is the
-   the index of the immediately surrounding loop).  */
+   the index of the immediately surrounding loop).
+   NON_RECT_REFERENCED is true for loops referenced by loops
+   with non-NULL M1 or M2.  */
 
 struct omp_for_data_loop
 {
   tree v, n1, n2, step, m1, m2;
-  int outer;
   enum tree_code cond_code;
+  int outer;
+  bool non_rect_referenced;
 };
 
 /* A structure describing the main elements of a parallel loop.  */
@@ -67,6 +70,7 @@  struct omp_for_data
   tree tiling;  /* Tiling values (if non null).  */
   int collapse;  /* Collapsed loops, 1 for a non-collapsed loop.  */
   int ordered;
+  int first_nonrect, last_nonrect;
   bool have_nowait, have_ordered, simd_schedule, have_reductemp;
   bool have_pointer_condtemp, have_scantemp, have_nonctrl_scantemp;
   bool non_rect;
--- gcc/omp-general.c.jj	2020-06-23 15:58:46.594022202 +0200
+++ gcc/omp-general.c	2020-06-24 16:58:37.888877351 +0200
@@ -206,6 +206,8 @@  omp_extract_for_data (gomp_for *for_stmt
   fd->tiling = NULL_TREE;
   fd->collapse = 1;
   fd->ordered = 0;
+  fd->first_nonrect = -1;
+  fd->last_nonrect = -1;
   fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC;
   fd->sched_modifiers = 0;
   fd->chunk_size = NULL_TREE;
@@ -372,18 +374,24 @@  omp_extract_for_data (gomp_for *for_stmt
       loop->m1 = NULL_TREE;
       loop->m2 = NULL_TREE;
       loop->outer = 0;
+      loop->non_rect_referenced = false;
       if (TREE_CODE (loop->n1) == TREE_VEC)
 	{
 	  for (int j = i - 1; j >= 0; j--)
 	    if (TREE_VEC_ELT (loop->n1, 0) == gimple_omp_for_index (for_stmt, j))
 	      {
 		loop->outer = i - j;
+		if (loops != NULL)
+		  loops[j].non_rect_referenced = true;
+		if (fd->first_nonrect == -1 || fd->first_nonrect > j)
+		  fd->first_nonrect = j;
 		break;
 	      }
 	  gcc_assert (loop->outer);
 	  loop->m1 = TREE_VEC_ELT (loop->n1, 1);
 	  loop->n1 = TREE_VEC_ELT (loop->n1, 2);
 	  fd->non_rect = true;
+	  fd->last_nonrect = i;
 	}
 
       loop->cond_code = gimple_omp_for_cond (for_stmt, i);
@@ -401,12 +409,17 @@  omp_extract_for_data (gomp_for *for_stmt
 	      if (TREE_VEC_ELT (loop->n2, 0) == gimple_omp_for_index (for_stmt, j))
 		{
 		  loop->outer = i - j;
+		  if (loops != NULL)
+		    loops[j].non_rect_referenced = true;
+		  if (fd->first_nonrect == -1 || fd->first_nonrect > j)
+		    fd->first_nonrect = j;
 		  break;
 		}
 	  gcc_assert (loop->outer);
 	  loop->m2 = TREE_VEC_ELT (loop->n2, 1);
 	  loop->n2 = TREE_VEC_ELT (loop->n2, 2);
 	  fd->non_rect = true;
+	  fd->last_nonrect = i;
 	}
 
       t = gimple_omp_for_incr (for_stmt, i);
--- gcc/omp-expand.c.jj	2020-06-23 15:58:46.593022216 +0200
+++ gcc/omp-expand.c	2020-06-26 21:38:16.299301772 +0200
@@ -1734,7 +1734,39 @@  expand_oacc_collapse_vars (const struct
 	count = 0;
    and set ZERO_ITER_BB to that bb.  If this isn't the outermost
    of the combined loop constructs, just initialize COUNTS array
-   from the _looptemp_ clauses.  */
+   from the _looptemp_ clauses.  For loop nests with non-rectangular
+   loops, do this only for the rectangular loops.  Then pick
+   the loops which reference outer vars in their bound expressions
+   and the loops which they refer to and for this sub-nest compute
+   number of iterations.  For triangular loops use Faulhaber's formula
+   (TBD.), otherwise as a fallback, compute by iterating the loops.
+   If e.g. the sub-nest is
+	for (I = N11; I COND1 N12; I += STEP1)
+	for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
+	for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
+   do:
+	COUNT = 0;
+	for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
+	for (tmpj = M21 * tmpi + N21;
+	     tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
+	  {
+	    int tmpk1 = M31 * tmpj + N31;
+	    int tmpk2 = M32 * tmpj + N32;
+	    if (tmpk1 COND3 tmpk2)
+	      {
+		if (COND3 is <)
+		  adj = STEP3 - 1;
+		else
+		  adj = STEP3 + 1;
+		COUNT += (adj + tmpk2 - tmpk1) / STEP3;
+	      }
+	  }
+   and finally multiply the counts of the rectangular loops not
+   in the sub-nest with COUNT.  Also, as counts[fd->last_nonrect]
+   store number of iterations of the loops from fd->first_nonrect
+   to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
+   by the counts of rectangular loops not referenced in any non-rectangular
+   loops sandwitched in between those.  */
 
 /* NOTE: It *could* be better to moosh all of the BBs together,
    creating one larger BB with all the computation and the unexpected
@@ -1813,12 +1845,23 @@  expand_omp_for_init_counts (struct omp_f
 	  break;
 	}
     }
+  bool rect_count_seen = false;
   for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
     {
       tree itype = TREE_TYPE (fd->loops[i].v);
 
       if (i >= fd->collapse && counts[i])
 	continue;
+      if (fd->non_rect)
+	{
+	  /* Skip loops that use outer iterators in their expressions
+	     during this phase.  */
+	  if (fd->loops[i].m1 || fd->loops[i].m2)
+	    {
+	      counts[i] = build_zero_cst (type);
+	      continue;
+	    }
+	}
       if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
 	  && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
 				fold_convert (itype, fd->loops[i].n1),
@@ -1914,13 +1957,197 @@  expand_omp_for_init_counts (struct omp_f
 	}
       if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
 	{
-	  if (i == 0)
-	    t = counts[0];
+	  if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
+	    continue;
+	  if (!rect_count_seen)
+	    {
+	      t = counts[i];
+	      rect_count_seen = true;
+	    }
 	  else
 	    t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
 	  expand_omp_build_assign (gsi, fd->loop.n2, t);
 	}
     }
+  if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
+    {
+      gcc_assert (fd->last_nonrect != -1);
+
+      /* Fallback implementation.  Evaluate the loops with m1/m2
+	 non-NULL as well as their outer loops at runtime using temporaries
+	 instead of the original iteration variables, and in the
+	 body just bump the counter.  */
+      counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
+      expand_omp_build_assign (gsi, counts[fd->last_nonrect],
+			       build_zero_cst (type));
+      gimple_stmt_iterator gsi2 = *gsi;
+      gsi_prev (&gsi2);
+      e = split_block (entry_bb, gsi_stmt (gsi2));
+      e = split_block (e->dest, (gimple *) NULL);
+      basic_block cur_bb = e->src;
+      basic_block next_bb = e->dest;
+      entry_bb = e->dest;
+      *gsi = gsi_after_labels (entry_bb);
+
+      tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
+      memset (vs, 0, fd->last_nonrect * sizeof (tree));
+
+      for (i = 0; i <= fd->last_nonrect; i++)
+	{
+	  if (fd->loops[i].m1 == NULL_TREE
+	      && fd->loops[i].m2 == NULL_TREE
+	      && !fd->loops[i].non_rect_referenced)
+	    continue;
+
+	  tree itype = TREE_TYPE (fd->loops[i].v);
+
+	  gsi2 = gsi_after_labels (cur_bb);
+	  tree n1, n2;
+	  t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
+	  if (fd->loops[i].m1)
+	    {
+	      n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
+	      n1 = fold_build2 (MULT_EXPR, itype, vs[i - fd->loops[i].outer],
+				n1);
+	      n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
+	    }
+	  else
+	    n1 = t;
+	  n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
+					 true, GSI_SAME_STMT);
+	  if (i < fd->last_nonrect)
+	    {
+	      vs[i] = create_tmp_reg (itype, ".it");
+	      expand_omp_build_assign (&gsi2, vs[i], n1);
+	    }
+	  t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
+	  if (fd->loops[i].m2)
+	    {
+	      n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
+	      n2 = fold_build2 (MULT_EXPR, itype, vs[i - fd->loops[i].outer],
+				n2);
+	      n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
+	    }
+	  else
+	    n2 = t;
+	  n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
+					 true, GSI_SAME_STMT);
+	  if (i == fd->last_nonrect)
+	    {
+	      gcond *cond_stmt
+		= gimple_build_cond (fd->loops[i].cond_code, n1, n2,
+				     NULL_TREE, NULL_TREE);
+	      gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
+	      e = split_block (cur_bb, cond_stmt);
+	      e->flags = EDGE_TRUE_VALUE;
+	      ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
+	      e->probability = profile_probability::likely ().guessed ();
+	      ne->probability = e->probability.invert ();
+	      gsi2 = gsi_after_labels (e->dest);
+
+	      t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
+					 ? -1 : 1));
+	      t = fold_build2 (PLUS_EXPR, itype,
+			       fold_convert (itype, fd->loops[i].step), t);
+	      t = fold_build2 (PLUS_EXPR, itype, t, n2);
+	      t = fold_build2 (MINUS_EXPR, itype, t, n1);
+	      tree step = fold_convert (itype, fd->loops[i].step);
+	      if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
+		t = fold_build2 (TRUNC_DIV_EXPR, itype,
+				 fold_build1 (NEGATE_EXPR, itype, t),
+				 fold_build1 (NEGATE_EXPR, itype, step));
+	      else
+		t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
+	      t = fold_convert (type, t);
+	      t = fold_build2 (PLUS_EXPR, type, counts[fd->last_nonrect], t);
+	      t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
+					    true, GSI_SAME_STMT);
+	      expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t);
+	      e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
+	      set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
+	      break;
+	    }
+	  e = split_block (cur_bb, last_stmt (cur_bb));
+
+	  basic_block new_cur_bb = create_empty_bb (cur_bb);
+	  add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
+
+	  gsi2 = gsi_after_labels (e->dest);
+	  tree step = fold_convert (itype, unshare_expr (fd->loops[i].step));
+	  t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
+	  t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
+					true, GSI_SAME_STMT);
+	  expand_omp_build_assign (&gsi2, vs[i], t);
+
+	  ne = split_block (e->dest, last_stmt (e->dest));
+	  gsi2 = gsi_after_labels (ne->dest);
+
+	  gcond *cond_stmt
+	    = gimple_build_cond (fd->loops[i].cond_code, vs[i], n2,
+				 NULL_TREE, NULL_TREE);
+	  gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
+	  edge e3, e4;
+	  if (next_bb == entry_bb)
+	    {
+	      e3 = find_edge (ne->dest, next_bb);
+	      e3->flags = EDGE_FALSE_VALUE;
+	    }
+	  else
+	    e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
+	  e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
+	  e4->probability = profile_probability::likely ().guessed ();
+	  e3->probability = e4->probability.invert ();
+	  basic_block esrc = e->src;
+	  make_edge (e->src, ne->dest, EDGE_FALLTHRU);
+	  cur_bb = new_cur_bb;
+	  basic_block latch_bb = next_bb;
+	  next_bb = e->dest;
+	  remove_edge (e);
+	  set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
+	  set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
+	  set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
+	}
+      t = NULL_TREE;
+      for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
+	if (!fd->loops[i].non_rect_referenced
+	    && fd->loops[i].m1 == NULL_TREE
+	    && fd->loops[i].m2 == NULL_TREE)
+	  {
+	    if (t == NULL_TREE)
+	      t = counts[i];
+	    else
+	      t = fold_build2 (MULT_EXPR, type, t, counts[i]);
+	  }
+      if (t)
+	{
+	  t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
+	  expand_omp_build_assign (gsi, counts[fd->last_nonrect], t);
+	}
+      if (!rect_count_seen)
+	t = counts[fd->last_nonrect];
+      else
+	t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
+			 counts[fd->last_nonrect]);
+      expand_omp_build_assign (gsi, fd->loop.n2, t);
+    }
+  else if (fd->non_rect)
+    {
+      tree t = fd->loop.n2;
+      gcc_assert (TREE_CODE (t) == INTEGER_CST);
+      int non_rect_referenced = 0, non_rect = 0;
+      for (i = 0; i < fd->collapse; i++)
+	{
+	  if ((i < fd->first_nonrect || fd->last_nonrect)
+	      && !integer_zerop (counts[i]))
+	    t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
+	  if (fd->loops[i].non_rect_referenced)
+	    non_rect_referenced++;
+	  if (fd->loops[i].m1 || fd->loops[i].m2)
+	    non_rect++;
+	}
+      gcc_assert (non_rect == 1 && non_rect_referenced == 1);
+      counts[fd->last_nonrect] = t;
+    }
 }
 
 /* Helper function for expand_omp_{for_*,simd}.  Generate code like:
@@ -1933,11 +2160,43 @@  expand_omp_for_init_counts (struct omp_f
    if this loop doesn't have an inner loop construct combined with it.
    If it does have an inner loop construct combined with it and the
    iteration count isn't known constant, store values from counts array
-   into its _looptemp_ temporaries instead.  */
+   into its _looptemp_ temporaries instead.
+   For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
+   inclusive), use the count of all those loops together, and either
+   find quadratic etc. equation roots (TBD), or as a fallback, do:
+	COUNT = 0;
+	for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
+	for (tmpj = M21 * tmpi + N21;
+	     tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
+	  {
+	    int tmpk1 = M31 * tmpj + N31;
+	    int tmpk2 = M32 * tmpj + N32;
+	    if (tmpk1 COND3 tmpk2)
+	      {
+		if (COND3 is <)
+		  adj = STEP3 - 1;
+		else
+		  adj = STEP3 + 1;
+		int temp = (adj + tmpk2 - tmpk1) / STEP3;
+		if (COUNT + temp > T)
+		  {
+		    V1 = tmpi;
+		    V2 = tmpj;
+		    V3 = tmpk1 + (T - COUNT) * STEP3;
+		    goto done;
+		  }
+		else
+		  COUNT += temp;
+	      }
+	  }
+	done:;
+   but for optional innermost or outermost rectangular loops that aren't
+   referenced by other loop expressions keep doing the division/modulo.  */
 
 static void
 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
-			  tree *counts, gimple *inner_stmt, tree startvar)
+			  tree *counts, tree *nonrect_bounds,
+			  gimple *inner_stmt, tree startvar)
 {
   int i;
   if (gimple_omp_for_combined_p (fd->for_stmt))
@@ -1984,25 +2243,237 @@  expand_omp_for_init_vars (struct omp_for
       itype = vtype;
       if (POINTER_TYPE_P (vtype))
 	itype = signed_type_for (vtype);
-      if (i != 0)
+      if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
 	t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
       else
 	t = tem;
-      t = fold_convert (itype, t);
-      t = fold_build2 (MULT_EXPR, itype, t,
-		       fold_convert (itype, fd->loops[i].step));
-      if (POINTER_TYPE_P (vtype))
-	t = fold_build_pointer_plus (fd->loops[i].n1, t);
+      if (i == fd->last_nonrect)
+	{
+	  /* Fallback implementation.  Evaluate the loops in between
+	     (inclusive) fd->first_nonrect and fd->last_nonrect at
+	     runtime unsing temporaries instead of the original iteration
+	     variables, in the body just bump the counter and compare
+	     with the desired value.  */
+	  t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
+					false, GSI_CONTINUE_LINKING);
+	  tree stopval = t;
+	  tree idx = create_tmp_reg (type, ".count");
+	  expand_omp_build_assign (gsi, idx,
+				   build_zero_cst (type), true);
+	  gimple_stmt_iterator gsi2 = *gsi;
+	  basic_block entry_bb = gsi_bb (gsi2);
+	  edge e = split_block (entry_bb, gsi_stmt (gsi2));
+	  e = split_block (e->dest, (gimple *) NULL);
+	  basic_block dom_bb = NULL;
+	  basic_block cur_bb = e->src;
+	  basic_block next_bb = e->dest;
+	  entry_bb = e->dest;
+	  *gsi = gsi_after_labels (entry_bb);
+
+	  tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
+	  tree n1 = NULL_TREE, n2 = NULL_TREE;
+	  memset (vs, 0, fd->last_nonrect * sizeof (tree));
+
+	  for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
+	    {
+	      tree itype = TREE_TYPE (fd->loops[j].v);
+	      bool rect_p = (fd->loops[j].m1 == NULL_TREE
+			     && fd->loops[j].m2 == NULL_TREE
+			     && !fd->loops[j].non_rect_referenced);
+	      gsi2 = gsi_after_labels (cur_bb);
+	      t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
+	      if (fd->loops[j].m1)
+		{
+		  n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
+		  n1 = fold_build2 (MULT_EXPR, itype,
+				    vs[j - fd->loops[j].outer], n1);
+		  n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
+		}
+	      else if (rect_p)
+		n1 = build_zero_cst (type);
+	      else
+		n1 = t;
+	      n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
+					     true, GSI_SAME_STMT);
+	      if (j < fd->last_nonrect)
+		{
+		  vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
+		  expand_omp_build_assign (&gsi2, vs[j], n1);
+		}
+	      t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
+	      if (fd->loops[j].m2)
+		{
+		  n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
+		  n2 = fold_build2 (MULT_EXPR, itype,
+				    vs[j - fd->loops[j].outer], n2);
+		  n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
+		}
+	      else if (rect_p)
+		n2 = counts[j];
+	      else
+		n2 = t;
+	      n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
+					     true, GSI_SAME_STMT);
+	      if (j == fd->last_nonrect)
+		{
+		  gcond *cond_stmt
+		    = gimple_build_cond (fd->loops[j].cond_code, n1, n2,
+					 NULL_TREE, NULL_TREE);
+		  gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
+		  e = split_block (cur_bb, cond_stmt);
+		  e->flags = EDGE_TRUE_VALUE;
+		  edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
+		  e->probability = profile_probability::likely ().guessed ();
+		  ne->probability = e->probability.invert ();
+		  gsi2 = gsi_after_labels (e->dest);
+
+		  t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
+					     ? -1 : 1));
+		  t = fold_build2 (PLUS_EXPR, itype,
+				   fold_convert (itype, fd->loops[j].step), t);
+		  t = fold_build2 (PLUS_EXPR, itype, t, n2);
+		  t = fold_build2 (MINUS_EXPR, itype, t, n1);
+		  tree step = fold_convert (itype, fd->loops[j].step);
+		  if (TYPE_UNSIGNED (itype)
+		      && fd->loops[j].cond_code == GT_EXPR)
+		    t = fold_build2 (TRUNC_DIV_EXPR, itype,
+				     fold_build1 (NEGATE_EXPR, itype, t),
+				     fold_build1 (NEGATE_EXPR, itype, step));
+		  else
+		    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
+		  t = fold_convert (type, t);
+		  t = fold_build2 (PLUS_EXPR, type, idx, t);
+		  t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
+						true, GSI_SAME_STMT);
+		  e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
+		  set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
+		  cond_stmt
+		    = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
+					 NULL_TREE);
+		  gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
+		  e = split_block (gsi_bb (gsi2), cond_stmt);
+		  e->flags = EDGE_TRUE_VALUE;
+		  e->probability = profile_probability::likely ().guessed ();
+		  ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
+		  ne->probability = e->probability.invert ();
+		  gsi2 = gsi_after_labels (e->dest);
+		  expand_omp_build_assign (&gsi2, idx, t);
+		  set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
+		  break;
+		}
+	      e = split_block (cur_bb, last_stmt (cur_bb));
+
+	      basic_block new_cur_bb = create_empty_bb (cur_bb);
+	      add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
+
+	      gsi2 = gsi_after_labels (e->dest);
+	      if (rect_p)
+		t = fold_build2 (PLUS_EXPR, type, vs[j],
+				 build_one_cst (type));
+	      else
+		{
+		  tree step
+		    = fold_convert (itype, unshare_expr (fd->loops[j].step));
+		  t = fold_build2 (PLUS_EXPR, itype, vs[j], step);
+		}
+	      t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
+					    true, GSI_SAME_STMT);
+	      expand_omp_build_assign (&gsi2, vs[j], t);
+
+	      edge ne = split_block (e->dest, last_stmt (e->dest));
+	      gsi2 = gsi_after_labels (ne->dest);
+
+	      gcond *cond_stmt;
+	      if (next_bb == entry_bb)
+		/* No need to actually check the outermost condition.  */
+		cond_stmt
+		  = gimple_build_cond (EQ_EXPR, boolean_true_node,
+				       boolean_true_node,
+				       NULL_TREE, NULL_TREE);
+	      else
+		cond_stmt
+		  = gimple_build_cond (rect_p ? LT_EXPR
+					      : fd->loops[j].cond_code,
+				       vs[j], n2, NULL_TREE, NULL_TREE);
+	      gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
+	      edge e3, e4;
+	      if (next_bb == entry_bb)
+		{
+		  e3 = find_edge (ne->dest, next_bb);
+		  e3->flags = EDGE_FALSE_VALUE;
+		  dom_bb = ne->dest;
+		}
+	      else
+		e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
+	      e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
+	      e4->probability = profile_probability::likely ().guessed ();
+	      e3->probability = e4->probability.invert ();
+	      basic_block esrc = e->src;
+	      make_edge (e->src, ne->dest, EDGE_FALLTHRU);
+	      cur_bb = new_cur_bb;
+	      basic_block latch_bb = next_bb;
+	      next_bb = e->dest;
+	      remove_edge (e);
+	      set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
+	      set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
+	      set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
+	    }
+	  for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--)
+	    {
+	      tree itype = TREE_TYPE (fd->loops[j].v);
+	      bool rect_p = (fd->loops[j].m1 == NULL_TREE
+			     && fd->loops[j].m2 == NULL_TREE
+			     && !fd->loops[j].non_rect_referenced);
+	      if (j == fd->last_nonrect)
+		{
+		  t = fold_build2 (MINUS_EXPR, type, stopval, idx);
+		  t = fold_convert (itype, t);
+		  tree t2
+		    = fold_convert (itype, unshare_expr (fd->loops[j].step));
+		  t = fold_build2 (MULT_EXPR, itype, t, t2);
+		  t = fold_build2 (PLUS_EXPR, itype, n1, t);
+		}
+	      else if (rect_p)
+		{
+		  t = fold_convert (itype, vs[j]);
+		  t = fold_build2 (MULT_EXPR, itype, t,
+				   fold_convert (itype, fd->loops[j].step));
+		  if (POINTER_TYPE_P (vtype))
+		    t = fold_build_pointer_plus (fd->loops[j].n1, t);
+		  else
+		    t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t);
+		}
+	      else
+		t = vs[j];
+	      t = force_gimple_operand_gsi (gsi, t, false,
+					    NULL_TREE, true,
+					    GSI_SAME_STMT);
+	      stmt = gimple_build_assign (fd->loops[j].v, t);
+	      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
+	    }
+	  if (gsi_end_p (*gsi))
+	    *gsi = gsi_last_bb (gsi_bb (*gsi));
+	  else
+	    gsi_prev (gsi);
+	}
       else
-	t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
-      t = force_gimple_operand_gsi (gsi, t,
-				    DECL_P (fd->loops[i].v)
-				    && TREE_ADDRESSABLE (fd->loops[i].v),
-				    NULL_TREE, false,
-				    GSI_CONTINUE_LINKING);
-      stmt = gimple_build_assign (fd->loops[i].v, t);
-      gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
-      if (i != 0)
+	{
+	  t = fold_convert (itype, t);
+	  t = fold_build2 (MULT_EXPR, itype, t,
+			   fold_convert (itype, fd->loops[i].step));
+	  if (POINTER_TYPE_P (vtype))
+	    t = fold_build_pointer_plus (fd->loops[i].n1, t);
+	  else
+	    t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
+	  t = force_gimple_operand_gsi (gsi, t,
+					DECL_P (fd->loops[i].v)
+					&& TREE_ADDRESSABLE (fd->loops[i].v),
+					NULL_TREE, false,
+					GSI_CONTINUE_LINKING);
+	  stmt = gimple_build_assign (fd->loops[i].v, t);
+	  gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
+	}
+      if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
 	{
 	  t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
 	  t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
@@ -2010,7 +2481,28 @@  expand_omp_for_init_vars (struct omp_for
 	  stmt = gimple_build_assign (tem, t);
 	  gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
 	}
+      if (i == fd->last_nonrect)
+	i = fd->first_nonrect;
     }
+  if (fd->non_rect)
+    for (i = 0; i <= fd->last_nonrect; i++)
+      if (fd->loops[i].m2)
+	{
+	  tree itype = TREE_TYPE (fd->loops[i].v);
+
+	  tree t = fold_convert (itype, unshare_expr (fd->loops[i].m2));
+	  t = fold_build2 (MULT_EXPR, itype,
+			   fd->loops[i - fd->loops[i].outer].v, t);
+	  t = fold_build2 (PLUS_EXPR, itype, t,
+			   fold_convert (itype,
+					 unshare_expr (fd->loops[i].n2)));
+	  nonrect_bounds[i] = create_tmp_reg (itype, ".bound");
+	  t = force_gimple_operand_gsi (gsi, t, false,
+					NULL_TREE, false,
+					GSI_CONTINUE_LINKING);
+	  stmt = gimple_build_assign (nonrect_bounds[i], t);
+	  gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
+	}
 }
 
 /* Helper function for expand_omp_for_*.  Generate code like:
@@ -2024,11 +2516,38 @@  expand_omp_for_init_vars (struct omp_for
     L12:
 	V2 = N21;
 	V1 += STEP1;
-	goto BODY_BB;  */
+	goto BODY_BB;
+   For non-rectangular loops, use temporaries stored in nonrect_bounds
+   for the upper bounds if M?2 multiplier is present.  Given e.g.
+   for (V1 = N11; V1 cond1 N12; V1 += STEP1)
+   for (V2 = N21; V2 cond2 N22; V2 += STEP2)
+   for (V3 = N31; V3 cond3 N32; V3 += STEP3)
+   for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
+   do:
+    L10:
+	V4 += STEP4;
+	if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
+    L11:
+	V4 = N41 + M41 * V2; // This can be left out if the loop
+			     // refers to the immediate parent loop
+	V3 += STEP3;
+	if (V3 cond3 N32) goto BODY_BB; else goto L12;
+    L12:
+	V3 = N31;
+	V2 += STEP2;
+	if (V2 cond2 N22) goto L120; else goto L13;
+    L120:
+	V4 = N41 + M41 * V2;
+	NONRECT_BOUND4 = N42 + M42 * V2;
+	if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
+    L13:
+	V2 = N21;
+	V1 += STEP1;
+	goto L120;  */
 
 static basic_block
-extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
-			     basic_block body_bb)
+extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds,
+			     basic_block cont_bb, basic_block body_bb)
 {
   basic_block last_bb, bb, collapse_bb = NULL;
   int i;
@@ -2049,17 +2568,28 @@  extract_omp_for_update_vars (struct omp_
       if (i < fd->collapse - 1)
 	{
 	  e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
-	  e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
+	  e->probability
+	    = profile_probability::guessed_always ().apply_scale (1, 8);
 
-	  t = fd->loops[i + 1].n1;
-	  t = force_gimple_operand_gsi (&gsi, t,
-					DECL_P (fd->loops[i + 1].v)
-					&& TREE_ADDRESSABLE (fd->loops[i
-								       + 1].v),
-					NULL_TREE, false,
-					GSI_CONTINUE_LINKING);
-	  stmt = gimple_build_assign (fd->loops[i + 1].v, t);
-	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+	  struct omp_for_data_loop *l = &fd->loops[i + 1];
+	  if (l->m1 == NULL_TREE || l->outer != 1)
+	    {
+	      t = l->n1;
+	      if (l->m1)
+		{
+		  tree t2
+		    = fold_build2 (MULT_EXPR, TREE_TYPE (t),
+				   fd->loops[i + 1 - l->outer].v, l->m1);
+		  t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t);
+		}
+	      t = force_gimple_operand_gsi (&gsi, t,
+					    DECL_P (l->v)
+					    && TREE_ADDRESSABLE (l->v),
+					    NULL_TREE, false,
+					    GSI_CONTINUE_LINKING);
+	      stmt = gimple_build_assign (l->v, t);
+	      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+	    }
 	}
       else
 	collapse_bb = bb;
@@ -2077,9 +2607,84 @@  extract_omp_for_update_vars (struct omp_
       stmt = gimple_build_assign (fd->loops[i].v, t);
       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
 
+      if (fd->loops[i].non_rect_referenced)
+	{
+	  basic_block update_bb = NULL, prev_bb = NULL;
+	  for (int j = i + 1; j <= fd->last_nonrect; j++)
+	    if (j - fd->loops[j].outer == i)
+	      {
+		tree n1, n2;
+		struct omp_for_data_loop *l = &fd->loops[j];
+		basic_block this_bb = create_empty_bb (last_bb);
+		add_bb_to_loop (this_bb, last_bb->loop_father);
+		gimple_stmt_iterator gsi2 = gsi_start_bb (this_bb);
+		if (prev_bb)
+		  {
+		    e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE);
+		    e->probability
+		      = profile_probability::guessed_always ().apply_scale (7,
+									    8);
+		    set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb);
+
+		  }
+		if (l->m1)
+		  {
+		    t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1,
+				     fd->loops[i].v);
+		    t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v), t, l->n1);
+		    n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
+						   false,
+						   GSI_CONTINUE_LINKING);
+		    stmt = gimple_build_assign (l->v, n1);
+		    gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
+		    n1 = l->v;
+		  }
+		else
+		  n1 = force_gimple_operand_gsi (&gsi2, l->n1, true,
+						 NULL_TREE, false,
+						 GSI_CONTINUE_LINKING);
+		if (l->m2)
+		  {
+		    t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2,
+				     fd->loops[i].v);
+		    t = fold_build2 (PLUS_EXPR, TREE_TYPE (nonrect_bounds[j]),
+				     t, unshare_expr (l->n2));
+		    n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
+						   false,
+						   GSI_CONTINUE_LINKING);
+		    stmt = gimple_build_assign (nonrect_bounds[j], n2);
+		    gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
+		    n2 = nonrect_bounds[j];
+		  }
+		else
+		  n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2),
+						 true, NULL_TREE, false,
+						 GSI_CONTINUE_LINKING);
+		gcond *cond_stmt
+		  = gimple_build_cond (l->cond_code, n1, n2,
+				       NULL_TREE, NULL_TREE);
+		gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING);
+		if (update_bb == NULL)
+		  update_bb = this_bb;
+		e = make_edge (this_bb, bb, EDGE_FALSE_VALUE);
+		e->probability
+		  = profile_probability::guessed_always ().apply_scale (1, 8);
+		if (prev_bb == NULL)
+		  set_immediate_dominator (CDI_DOMINATORS, this_bb, last_bb);
+		prev_bb = this_bb;
+	      }
+	  e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE);
+	  e->probability
+	    = profile_probability::guessed_always ().apply_scale (7, 8);
+	  body_bb = update_bb;
+	}
+
       if (i > 0)
 	{
-	  t = fd->loops[i].n2;
+	  if (fd->loops[i].m2)
+	    t = nonrect_bounds[i];
+	  else
+	    t = unshare_expr (fd->loops[i].n2);
 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
 					false, GSI_CONTINUE_LINKING);
 	  tree v = fd->loops[i].v;
@@ -2099,6 +2704,7 @@  extract_omp_for_update_vars (struct omp_
 	}
       else
 	make_edge (bb, body_bb, EDGE_FALLTHRU);
+      set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
       last_bb = bb;
     }
 
@@ -3180,7 +3786,7 @@  expand_omp_for_generic (struct omp_regio
 	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
 	}
   if (fd->collapse > 1)
-    expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
+    expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
 
   if (fd->ordered)
     {
@@ -3327,7 +3933,7 @@  expand_omp_for_generic (struct omp_regio
       gsi_remove (&gsi, true);
 
       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
-	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
+	collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, l1_bb);
 
       /* Emit code to get the next parallel iteration in L2_BB.  */
       gsi = gsi_start_bb (l2_bb);
@@ -4111,6 +4717,7 @@  expand_omp_for_static_nochunk (struct om
     }
   /* Handle linear clause adjustments.  */
   tree itercnt = NULL_TREE;
+  tree *nonrect_bounds = NULL;
   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
     for (tree c = gimple_omp_for_clauses (fd->for_stmt);
 	 c; c = OMP_CLAUSE_CHAIN (c))
@@ -4153,7 +4760,15 @@  expand_omp_for_static_nochunk (struct om
 	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
 	}
   if (fd->collapse > 1)
-    expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
+    {
+      if (fd->non_rect)
+	{
+	  nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
+	  memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
+	}
+      expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
+				startvar);
+    }
 
   if (!broken_loop)
     {
@@ -4205,7 +4820,8 @@  expand_omp_for_static_nochunk (struct om
       gsi_remove (&gsi, true);
 
       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
-	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
+	collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
+						   cont_bb, body_bb);
     }
 
   /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.  */
@@ -4876,7 +5492,7 @@  expand_omp_for_static_chunk (struct omp_
 	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
 	}
   if (fd->collapse > 1)
-    expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
+    expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
 
   if (!broken_loop)
     {
@@ -4931,7 +5547,7 @@  expand_omp_for_static_chunk (struct omp_
       gsi_remove (&gsi, true);
 
       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
-	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
+	collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
 
       /* Trip update code goes into TRIP_UPDATE_BB.  */
       gsi = gsi_start_bb (trip_update_bb);
@@ -5331,7 +5947,7 @@  expand_omp_simd (struct omp_region *regi
       if (gimple_omp_for_combined_into_p (fd->for_stmt))
 	{
 	  gsi_prev (&gsi);
-	  expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
+	  expand_omp_for_init_vars (fd, &gsi, counts, NULL, NULL, n1);
 	  gsi_next (&gsi);
 	}
       else
@@ -5704,7 +6320,7 @@  expand_omp_taskloop_for_outer (struct om
   assign_stmt = gimple_build_assign (endvar, t1);
   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
   if (fd->collapse > 1)
-    expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
+    expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
 
   /* Remove the GIMPLE_OMP_FOR statement.  */
   gsi = gsi_for_stmt (for_stmt);
@@ -5860,7 +6476,7 @@  expand_omp_taskloop_for_inner (struct om
       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
     }
   if (fd->collapse > 1)
-    expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
+    expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
 
   if (!broken_loop)
     {
@@ -5895,7 +6511,7 @@  expand_omp_taskloop_for_inner (struct om
       gsi_remove (&gsi, true);
 
       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
-	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
+	collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
     }
 
   /* Remove the GIMPLE_OMP_FOR statement.  */
@@ -6556,7 +7172,9 @@  expand_omp_for (struct omp_region *regio
   else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
 	   && !fd.have_ordered)
     {
-      if (fd.non_rect)
+      if (fd.non_rect
+	  && (gimple_omp_for_combined_into_p (fd.for_stmt)
+	      || gimple_omp_for_combined_p (fd.for_stmt)))
 	sorry_at (gimple_location (fd.for_stmt),
 		  "non-rectangular OpenMP loops not supported yet");
       if (fd.chunk_size == NULL)
--- libgomp/testsuite/libgomp.c/loop-17.c.jj	2020-06-26 19:18:50.400479189 +0200
+++ libgomp/testsuite/libgomp.c/loop-17.c	2020-06-26 19:48:16.660675237 +0200
@@ -0,0 +1,189 @@ 
+/* { dg-do run } */
+
+extern void abort (void);
+
+signed char v[5][7][9][21][4][42][3];
+volatile int zero = 0, one = 1, two = 2, three = 3;
+volatile int five = 5, seven = 7, nine = 9, eleven = 11;
+
+int
+main ()
+{
+  for (int i = 0; i < 5; i++)
+  for (int j = 0; j < 7; j++)
+  for (int k = 0; k < 9; k++)
+  for (int l = 2 * j; l < 3 * j; l++)
+  for (int m = 7; m < 11; m++)
+  for (int n = l; n < 2 * l; n++)
+  for (int o = 0; o < 3; o++)
+    v[i][j][k][l][m - 7][n][o] = 1;
+
+  int niters = 0;
+  #pragma omp parallel
+  #pragma omp for collapse(7) reduction(+:niters)
+  for (int i = 0; i < 5; i++)
+  for (int j = 0; j < 7; j++)
+  for (int k = 0; k < 9; k++)
+  for (int l = 2 * j; l < 3 * j; l++)
+  for (int m = 7; m < 11; m++)
+  for (int n = l; n < 2 * l; n++)
+  for (int o = 0; o < 3; o++)
+    {
+      niters++;
+      if (i < 0 || i >= 5
+	  || j < 0 || j >= 7
+	  || k < 0 || k >= 9
+	  || l < 2 * j || l >= 3 * j
+	  || m < 7 || m >= 11
+	  || n < l || n >= 2 * l
+	  || o < 0 || o >= 3)
+	abort ();
+      if (v[i][j][k][l][m - 7][n][o] != 1)
+	abort ();
+      v[i][j][k][l][m - 7][n][o]++;
+    }
+
+  if (niters != 117180)
+    abort ();
+
+  int niters2 = 0;
+  #pragma omp parallel
+  #pragma omp for collapse(7) reduction(+:niters2)
+  for (int i = zero; i < five; i += one)
+  for (int j = seven - one; j >= zero; j -= one)
+  for (int k = nine - one; k >= zero; k += -one)
+  for (int l = two * j + zero; l < three * j; l += one)
+  for (int m = eleven - one; m >= seven; m -= one)
+  for (int n = two * l - one; n > one * l - one; n -= one)
+  for (int o = zero; o < three; o += one)
+    {
+      niters2++;
+      if (i < 0 || i >= 5
+	  || j < 0 || j >= 7
+	  || k < 0 || k >= 9
+	  || l < 2 * j || l >= 3 * j
+	  || m < 7 || m >= 11
+	  || n < l || n >= 2 * l
+	  || o < 0 || o >= 3)
+	abort ();
+      if (v[i][j][k][l][m - 7][n][o] != 2)
+	abort ();
+      v[i][j][k][l][m - 7][n][o]++;
+    }
+
+  if (niters2 != 117180)
+    abort ();
+
+  for (int i = 0; i < 5; i++)
+  for (int j = 0; j < 7; j++)
+  for (int k = 0; k < 9; k++)
+  for (int l = 2 * j; l < 3 * j; l++)
+  for (int m = 7; m < 11; m++)
+  for (int n = l; n < 2 * l; n++)
+  for (int o = 0; o < 3; o++)
+    if (v[i][j][k][l][m - 7][n][o] != 3)
+      abort ();
+
+  int niters3 = 0;
+  #pragma omp parallel
+  #pragma omp for collapse(5) reduction(+:niters3)
+  for (int i = 4; i >= 0; i--)
+  for (int j = 6; j >= 0; --j)
+  for (int l = 3 * j - 1; l >= 2 * j; l--)
+  for (int n = 2 * l + -1; n > l - 1; --n)
+  for (int o = 2; o >= 0; o--)
+    {
+      niters3++;
+      if (i < 0 || i >= 5
+	  || j < 0 || j >= 7
+	  || l < 2 * j || l >= 3 * j
+	  || n < l || n >= 2 * l
+	  || o < 0 || o >= 3)
+	abort ();
+      if (v[i][j][0][l][0][n][o] != 3)
+	abort ();
+      v[i][j][0][l][0][n][o]++;
+    }
+
+  if (niters3 != 3255)
+    abort ();
+
+  int niters4 = 0;
+  #pragma omp parallel
+  #pragma omp for collapse(5) reduction(+:niters4)
+  for (int i = zero; i < five; i += one)
+  for (int j = zero; j <= seven - one; j += one)
+  for (int l = j * two; l < three * j + zero; l += one)
+  for (int n = one * l; n <= l * two - one; n += one)
+  for (int o = zero; o < three; o += one)
+    {
+      niters4++;
+      if (i < 0 || i >= 5
+	  || j < 0 || j >= 7
+	  || l < 2 * j || l >= 3 * j
+	  || n < l || n >= 2 * l
+	  || o < 0 || o >= 3)
+	abort ();
+      if (v[i][j][0][l][0][n][o] != 4)
+	abort ();
+      v[i][j][0][l][0][n][o]++;
+    }
+
+  if (niters4 != 3255)
+    abort ();
+
+  for (int i = 0; i < 5; i++)
+  for (int j = 0; j < 7; j++)
+  for (int l = 2 * j; l < 3 * j; l++)
+  for (int n = l; n < 2 * l; n++)
+  for (int o = 0; o < 3; o++)
+    if (v[i][j][0][l][0][n][o] != 5)
+      abort ();
+
+  int niters5 = 0;
+  #pragma omp parallel
+  #pragma omp for collapse(3) reduction(+:niters5)
+  for (int j = 6; j >= 0; --j)
+  for (int l = 2 * j; l <= 3 * j - 1; l++)
+  for (int n = 2 * l + -1; n > l - 1; --n)
+    {
+      niters5++;
+      if (j < 0 || j >= 7
+	  || l < 2 * j || l >= 3 * j
+	  || n < l || n >= 2 * l)
+	abort ();
+      if (v[0][j][0][l][0][n][0] != 5)
+	abort ();
+      v[0][j][0][l][0][n][0]++;
+    }
+
+  if (niters5 != 217)
+    abort ();
+
+  int niters6 = 0;
+  #pragma omp parallel
+  #pragma omp for collapse(3) reduction(+:niters6)
+  for (int j = seven - one; j > - one; j -= one)
+  for (int l = j * three - one; l >= j * two + zero; l += -one)
+  for (int n = two * l - one; n > l - one; n -= one)
+    {
+      niters6++;
+      if (j < 0 || j >= 7
+	  || l < 2 * j || l >= 3 * j
+	  || n < l || n >= 2 * l)
+	abort ();
+      if (v[0][j][0][l][0][n][0] != 6)
+	abort ();
+      v[0][j][0][l][0][n][0]++;
+    }
+
+  if (niters6 != 217)
+    abort ();
+
+  for (int j = 0; j < 7; j++)
+  for (int l = 2 * j; l < 3 * j; l++)
+  for (int n = l; n < 2 * l; n++)
+    if (v[0][j][0][l][0][n][0] != 7)
+      abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/loop-18.c.jj	2020-06-27 12:24:00.098807006 +0200
+++ libgomp/testsuite/libgomp.c/loop-18.c	2020-06-27 12:24:23.699473452 +0200
@@ -0,0 +1,245 @@ 
+/* { dg-do run } */
+
+extern void abort (void);
+
+int x, i, j;
+volatile int a, b, c, d, e, f, g, h;
+int k[11][101];
+
+int
+main ()
+{
+  int niters;
+  for (i = 1; i <= 10; i++)
+    for (j = 1; j <= 10 * i; j++)
+      k[i][j] = 1;
+  a = 1; b = 11; c = 1; d = 0; e = 1; f = 10; g = 1; h = 1;
+  niters = 0; i = -100; j = -100; x = -100;
+  #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
+  for (i = 1; i <= 10; i++)
+    for (j = 1; j <= 10 * i; j++)
+      {
+	if (i < 1 || i > 10 || j < 1 || j > 10 * i || k[i][j] != 1)
+	  abort ();
+	k[i][j]++;
+	x = i * 1024 + (j & 1023);
+	niters++;
+      }
+  if (i != 11 || j != 101 || x != 10340 || niters != 550)
+    abort ();
+  niters = 0; i = -100; j = -100; x = -100;
+  #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
+  for (i = a; i < b; i += c)
+    for (j = d * i + e; j < g + i * f; j += h)
+      {
+	if (i < 1 || i > 10 || j < 1 || j > 10 * i || k[i][j] != 2)
+	  abort ();
+	k[i][j]++;
+	x = i * 1024 + (j & 1023);
+	niters++;
+      }
+  if (i != 11 || j != 101 || x != 10340 || niters != 550)
+    abort ();
+  for (i = 1; i <= 10; i++)
+    for (j = 1; j <= 10 * i; j++)
+      if (k[i][j] == 3)
+	k[i][j] = 0;
+      else
+	abort ();
+  for (i = 0; i < 11; i++)
+    for (j = 0; j < 101; j++)
+      if (k[i][j] != 0)
+	abort ();
+  for (i = 0; i < 10; i++)
+    for (j = 0; j < 10 * i; j++)
+      k[i][j] = 1;
+  a = 0; b = 10; c = 1; d = 0; e = 0; f = 10; g = 0; h = 1;
+  niters = 0; i = -100; j = -100; x = -100;
+  #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
+  for (i = 0; i < 10; i++)
+    for (j = 0; j < 10 * i; j++)
+      {
+	if (i < 0 || i >= 10 || j < 0 || j >= 10 * i || k[i][j] != 1)
+	  abort ();
+	k[i][j]++;
+	x = i * 1024 + (j & 1023);
+	niters++;
+      }
+  if (i != 10 || j != 90 || x != 9305 || niters != 450)
+    abort ();
+  niters = 0; i = -100; j = -100; x = -100;
+  #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
+  for (i = a; i < b; i += c)
+    for (j = d * i + e; j < g + i * f; j += h)
+      {
+	if (i < 0 || i >= 10 || j < 0 || j >= 10 * i || k[i][j] != 2)
+	  abort ();
+	k[i][j]++;
+	x = i * 1024 + (j & 1023);
+	niters++;
+      }
+  if (i != 10 || j != 90 || x != 9305 || niters != 450)
+    abort ();
+  for (i = 0; i < 10; i++)
+    for (j = 0; j < 10 * i; j++)
+      if (k[i][j] == 3)
+	k[i][j] = 0;
+      else
+	abort ();
+  for (i = 0; i < 11; i++)
+    for (j = 0; j < 101; j++)
+      if (k[i][j] != 0)
+	abort ();
+  for (i = 4; i < 10; i++)
+    for (j = -9 + 2 * i; j < i; j++)
+      k[i][j + 1] = 1;
+  a = 4; b = 10; c = 1; d = 2; e = -9; f = 1; g = 0; h = 1;
+  niters = 0; i = -100; j = -100; x = -100;
+  #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
+  for (i = 4; i < 10; i++)
+    for (j = -9 + 2 * i; j < i; j++)
+      {
+	if (i < 4 || i >= 10 || j < -9 + 2 * i || j >= i || k[i][j + 1] != 1)
+	  abort ();
+	k[i][j + 1]++;
+	x = i * 1024 + (j & 1023);
+	niters++;
+      }
+  if (/*i != 10 || j != 9 || */x != 8199 || niters != 15)
+    abort ();
+  niters = 0; i = -100; j = -100; x = -100;
+  #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
+  for (i = a; i < b; i += c)
+    for (j = d * i + e; j < g + i * f; j += h)
+      {
+	if (i < 4 || i >= 10 || j < -9 + 2 * i || j >= i || k[i][j + 1] != 2)
+	  abort ();
+	k[i][j + 1]++;
+	x = i * 1024 + (j & 1023);
+	niters++;
+      }
+  if (/*i != 10 || j != 9 || */x != 8199 || niters != 15)
+    abort ();
+  for (i = 4; i < 10; i++)
+    for (j = -9 + 2 * i; j < i; j++)
+      if (k[i][j + 1] == 3)
+	k[i][j + 1] = 0;
+      else
+	abort ();
+  for (i = 0; i < 11; i++)
+    for (j = 0; j < 101; j++)
+      if (k[i][j] != 0)
+	abort ();
+  for (i = 1; i < 10; i += 2)
+    for (j = 1; j < i + 1; j++)
+      k[i][j] = 1;
+  a = 1; b = 10; c = 2; d = 0; e = 1; f = 1; g = 1; h = 1;
+  niters = 0; i = -100; j = -100; x = -100;
+  #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
+  for (i = 1; i < 10; i += 2)
+    for (j = 1; j < i + 1; j++)
+      {
+	if (i < 1 || i >= 10 || j < 1 || j >= i + 1 || k[i][j] != 1)
+	  abort ();
+	k[i][j]++;
+	x = i * 1024 + (j & 1023);
+	niters++;
+      }
+  if (i != 11 || j != 10 || x != 9225 || niters != 25)
+    abort ();
+  niters = 0; i = -100; j = -100; x = -100;
+  #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
+  for (i = a; i < b; i += c)
+    for (j = d * i + e; j < g + i * f; j += h)
+      {
+	if (i < 1 || i >= 10 || j < 1 || j >= i + 1 || k[i][j] != 2)
+	  abort ();
+	k[i][j]++;
+	x = i * 1024 + (j & 1023);
+	niters++;
+      }
+  if (i != 11 || j != 10 || x != 9225 || niters != 25)
+    abort ();
+  for (i = 1; i < 10; i += 2)
+    for (j = 1; j < i + 1; j++)
+      if (k[i][j] == 3)
+	k[i][j] = 0;
+      else
+	abort ();
+  for (i = 0; i < 11; i++)
+    for (j = 0; j < 101; j++)
+      if (k[i][j] != 0)
+	abort ();
+  for (j = -11; j >= -41; j -= 15)
+    k[0][-j] = 1;
+  a = 4; b = 8; c = 12; d = -8; e = -9; f = -3; g = 6; h = 15;
+  niters = 0; i = -100; j = -100; x = -100;
+  #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
+  for (i = 4; i < 8; i += 12)
+    for (j = -8 * i - 9; j < i * -3 + 6; j += 15)
+      {
+	if (i != 4 || j < -41 || j > -11 || k[0][-j] != 1)
+	  abort ();
+	k[0][-j]++;
+	x = i * 1024 + (j & 1023);
+	niters++;
+      }
+  if (i != 16 || j != 4 || x != 5109 || niters != 3)
+    abort ();
+  niters = 0; i = -100; j = -100; x = -100;
+  #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
+  for (i = a; i < b; i += c)
+    for (j = d * i + e; j < g + i * f; j += h)
+      {
+	if (i != 4 || j < -41 || j > -11 || k[0][-j] != 2)
+	  abort ();
+	k[0][-j]++;
+	x = i * 1024 + (j & 1023);
+	niters++;
+      }
+  if (i != 16 || j != 4 || x != 5109 || niters != 3)
+    abort ();
+  for (j = -11; j >= -41; j -= 15)
+    if (k[0][-j] == 3)
+      k[0][-j] = 0;
+    else
+      abort ();
+  for (j = -11; j >= -41; j--)
+    if (k[0][-j] != 0)
+      abort ();
+  for (j = -34; j <= -7; j++)
+    k[0][-j] = 1;
+  a = -13; b = 7; c = 12; d = 3; e = 5; f = 0; g = -6; h = 1;
+  niters = 0; i = -100; j = -100; x = -100;
+  #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
+  for (i = -13; i < 7; i += 12)
+    for (j = 3 * i + 5; j < -6; j++)
+      {
+	if (i != -13 || j < -34 || j > -7 || k[0][-j] != 1)
+	  abort ();
+	k[0][-j]++;
+	x = i * 1024 + (j & 1023);
+	niters++;
+      }
+  if (/*i != 11 || j != 2 || */x != -12295 || niters != 28)
+    abort ();
+  niters = 0; i = -100; j = -100; x = -100;
+  #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
+  for (i = a; i < b; i += c)
+    for (j = d * i + e; j < g + i * f; j += h)
+      {
+	if (i != -13 || j < -34 || j > -7 || k[0][-j] != 2)
+	  abort ();
+	k[0][-j]++;
+	x = i * 1024 + (j & 1023);
+	niters++;
+      }
+  if (/*i != 11 || j != 2 || */x != -12295 || niters != 28)
+    abort ();
+  for (j = -34; j <= -7; j++)
+    if (k[0][-j] == 3)
+      k[0][-j] = 0;
+    else
+      abort ();
+  return 0;
+}