tree-optimization/95897 - fix fold-left SLP reduction insert place

Message ID nycvar.YFH.7.76.2006261014200.4397@zhemvz.fhfr.qr
State New
Headers show
Series
  • tree-optimization/95897 - fix fold-left SLP reduction insert place
Related show

Commit Message

Richard Biener June 26, 2020, 8:14 a.m.
This fixes computation of the insertion place for fold-left SLP
reductions where the PHIs do not have vectorized stmts.  The
SLP representation isn't perfect here thus the following.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

2020-06-26  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/95897
	* tree-vectorizer.h (vectorizable_induction): Remove
	unused gimple_stmt_iterator * parameter.
	* tree-vect-loop.c (vectorizable_induction): Likewise.
	(vect_analyze_loop_operations): Adjust.
	* tree-vect-stmts.c (vect_analyze_stmt): Likewise.
	(vect_transform_stmt): Likewise.
	* tree-vect-slp.c (vect_schedule_slp_instance): Adjust
	for fold-left reductions, clarify existing reduction case.

	* gcc.dg/vect/pr95897.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/pr95897.c | 13 +++++++++++++
 gcc/tree-vect-loop.c                |  3 +--
 gcc/tree-vect-slp.c                 | 25 +++++++++++++++++++++++--
 gcc/tree-vect-stmts.c               |  4 ++--
 gcc/tree-vectorizer.h               |  1 -
 5 files changed, 39 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr95897.c

-- 
2.26.2

Patch

diff --git a/gcc/testsuite/gcc.dg/vect/pr95897.c b/gcc/testsuite/gcc.dg/vect/pr95897.c
new file mode 100644
index 00000000000..a17b72dd040
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr95897.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile } */
+
+double foo (double x, int n)
+{
+  double s = 0.;
+  for (int i = 0; i < n; ++i)
+    {
+      s += x;
+      s += x;
+      s += x;
+    }
+  return s;
+}
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 08c9f119626..bc913eeeb36 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -1573,7 +1573,7 @@  vect_analyze_loop_operations (loop_vec_info loop_vinfo)
               if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
 		  && ! PURE_SLP_STMT (stmt_info))
 		ok = vectorizable_induction (loop_vinfo,
-					     stmt_info, NULL, NULL, NULL,
+					     stmt_info, NULL, NULL,
 					     &cost_vec);
 	      else if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
 			|| (STMT_VINFO_DEF_TYPE (stmt_info)
@@ -7285,7 +7285,6 @@  vect_worthwhile_without_simd_p (vec_info *vinfo, tree_code code)
 bool
 vectorizable_induction (loop_vec_info loop_vinfo,
 			stmt_vec_info stmt_info,
-			gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
 			gimple **vec_stmt, slp_tree slp_node,
 			stmt_vector_for_cost *cost_vec)
 {
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 4d3de77988d..47498b1d417 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -4331,8 +4331,15 @@  vect_schedule_slp_instance (vec_info *vinfo,
       si = gsi_for_stmt (last_stmt_info->stmt);
     }
   else if (SLP_TREE_CHILDREN (node).is_empty ())
-    /* This happens for reduction PHIs.  */
-    si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node)->stmt);
+    {
+      /* This happens for reduction and induction PHIs where we do not use the
+	 insertion iterator.  */
+      gcc_assert (STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node))
+		  == cycle_phi_info_type
+		  || (STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node))
+		      == induc_vec_info_type));
+      si = gsi_none ();
+    }
   else
     {
       /* Emit other stmts after the children vectorized defs which is
@@ -4341,6 +4348,20 @@  vect_schedule_slp_instance (vec_info *vinfo,
       FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
 	if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
 	  {
+	    /* For fold-left reductions we are retaining the scalar
+	       reduction PHI but we still have SLP_TREE_NUM_VEC_STMTS
+	       set so the representation isn't perfect.  Resort to the
+	       last scalar def here.  */
+	    if (SLP_TREE_VEC_STMTS (child).is_empty ())
+	      {
+		gcc_assert (STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (child))
+			    == cycle_phi_info_type);
+		gphi *phi = as_a <gphi *>
+			      (vect_find_last_scalar_stmt_in_slp (child)->stmt);
+		if (!last_stmt
+		    || vect_stmt_dominates_stmt_p (last_stmt, phi))
+		  last_stmt = phi;
+	      }
 	    /* We are emitting all vectorized stmts in the same place and
 	       the last one is the last.
 	       ???  Unless we have a load permutation applied and that
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index ab1e70a50c3..7cd39f0f23d 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -10549,7 +10549,7 @@  vect_analyze_stmt (vec_info *vinfo,
 	  || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
 				     node, node_instance, cost_vec)
 	  || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
-				     NULL, NULL, node, cost_vec)
+				     NULL, node, cost_vec)
 	  || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
 	  || vectorizable_condition (vinfo, stmt_info,
 				     NULL, NULL, node, cost_vec)
@@ -10631,7 +10631,7 @@  vect_transform_stmt (vec_info *vinfo,
 
     case induc_vec_info_type:
       done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
-				     stmt_info, gsi, &vec_stmt, slp_node,
+				     stmt_info, &vec_stmt, slp_node,
 				     NULL);
       gcc_assert (done);
       break;
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index e4d132493ca..d9f6a67264d 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1967,7 +1967,6 @@  extern bool vectorizable_reduction (loop_vec_info, stmt_vec_info,
 				    slp_tree, slp_instance,
 				    stmt_vector_for_cost *);
 extern bool vectorizable_induction (loop_vec_info, stmt_vec_info,
-				    gimple_stmt_iterator *,
 				    gimple **, slp_tree,
 				    stmt_vector_for_cost *);
 extern bool vect_transform_reduction (loop_vec_info, stmt_vec_info,