[COMMITTED] tree-optimization/96542 - Add wi_fold_in_parts.

Message ID 618a0878-d150-d82b-4181-14ea236d337e@redhat.com
State New
Headers show
Series
  • [COMMITTED] tree-optimization/96542 - Add wi_fold_in_parts.
Related show

Commit Message

Ian Lance Taylor via Gcc-patches July 17, 2021, 2:31 a.m.
Range-ops uses wi_fold (implemented by each opcode)  to individually 
fold subranges one at a time and then combines them. This patch first 
calls wi_fold_in_parts which checks if one of the subranges is small, 
and if so, further splits that subrange into constants.

Currently, if a subrange is 4 or less values, then we call it 
individually for each of the 4 values. 4 was chosen as a reasonable 
tradeoff between excess work vs benefit.     We could consider 
increasing that number for -O3 perhaps.

This allows us under some circumstances to generate much more precise 
values. For instance:

     tmp_5 = x_4(D) != 0;
     _1 = (int) tmp_5;
     _2 = 255 >> _1;
     _3 = (unsigned char) _2;
     _6 = _3 * 2;
     return _6;

_1 : int [0, 1]
_2 : int [127, 255]
_3 : unsigned char [127, +INF]

we currently produce a range of [127,255] for _2.   the RHS of the shift 
(_1)  only has 2 values, so by further splitting that subrange in [0,0] 
and [1,1], we can produce a more precise range, and do better:

     y_5 = x_4(D) != 0;
     _1 = (int) y_5;
     _2 = 255 >> _1;
     _3 = (unsigned char) _2;
     return 254;

_1 : int [0, 1]
_2 : int [127, 127][255, 255]
_3 : unsigned char [127, 127][+INF, +INF]

Now _2 is a perfect 127 or 255, and we can fold this function to just 
the return value now.

This will work for many different opcodes without having to go and 
rework range-ops for them.

Bootstraps on x86_64-pc-linux-gnu  with no regressions and fixes all 3 
testcases in the PR.  pushed.

Andrew

Patch

commit 704e8a825c78b9a8424c291509413bbb48e602c7
Author: Andrew MacLeod <amacleod@redhat.com>
Date:   Fri Jul 16 11:42:14 2021 -0400

    Add wi_fold_in_parts.
    
    range-ops uses wi_fold to individually fold subranges one at a time and
    then combined them.  This patch first calls wi_fold_in_parts which checks if
    one of the subranges is small, and if so, further splits that subrange
    into constants.
    
            gcc/
            PR tree-optimization/96542
            * range-op.cc (range_operator::wi_fold_in_parts): New.
            (range_operator::fold_range): Call wi_fold_in_parts.
            (operator_lshift::wi_fold): Fix broken lshift by [0,0].
            * range-op.h (wi_fold_in_parts): Add prototype.
    
            gcc/testsuite
            * gcc.dg/pr96542.c: New.

diff --git a/gcc/range-op.cc b/gcc/range-op.cc
index 08000465fd9..e0be51dbc90 100644
--- a/gcc/range-op.cc
+++ b/gcc/range-op.cc
@@ -133,6 +133,65 @@  range_operator::wi_fold (irange &r, tree type,
   r.set_varying (type);
 }
 
+// Call wi_fold, except further split small subranges into constants.
+// This can provide better precision. For something   8 >> [0,1]
+// Instead of [8, 16], we will produce [8,8][16,16]
+
+void
+range_operator::wi_fold_in_parts (irange &r, tree type,
+				  const wide_int &lh_lb,
+				  const wide_int &lh_ub,
+				  const wide_int &rh_lb,
+				  const wide_int &rh_ub) const
+{
+  wi::overflow_type ov_rh, ov_lh;
+  int_range_max tmp;
+  wide_int rh_range = wi::sub (rh_ub, rh_lb, TYPE_SIGN (type), &ov_rh);
+  wide_int lh_range = wi::sub (lh_ub, lh_lb, TYPE_SIGN (type), &ov_lh);
+  signop sign = TYPE_SIGN (type);;
+  // If there are 2, 3, or 4 values in the RH range, do them separately.
+  // Call wi_fold_in_parts to check the RH side.
+  if (wi::gt_p (rh_range, 0, sign) && wi::lt_p (rh_range, 4, sign)
+      && ov_rh == wi::OVF_NONE)
+    {
+      wi_fold_in_parts (r, type, lh_lb, lh_ub, rh_lb, rh_lb);
+      if (wi::gt_p (rh_range, 1, sign))
+	{
+	  wi_fold_in_parts (tmp, type, lh_lb, lh_ub, rh_lb + 1, rh_lb + 1);
+	  r.union_ (tmp);
+	  if (wi::eq_p (rh_range, 3))
+	    {
+	      wi_fold_in_parts (tmp, type, lh_lb, lh_ub, rh_lb + 2, rh_lb + 2);
+	      r.union_ (tmp);
+	    }
+	}
+      wi_fold_in_parts (tmp, type, lh_lb, lh_ub, rh_ub, rh_ub);
+      r.union_ (tmp);
+    }
+  // Otherise check for 2, 3, or 4 values in the LH range and split them up.
+  // The RH side has been checked, so no recursion needed.
+  else if (wi::gt_p (lh_range, 0, sign) && wi::lt_p (lh_range, 4, sign)
+	   && ov_lh == wi::OVF_NONE)
+    {
+      wi_fold (r, type, lh_lb, lh_lb, rh_lb, rh_ub);
+      if (wi::gt_p (lh_range, 1, sign))
+	{
+	  wi_fold (tmp, type, lh_lb + 1, lh_lb + 1, rh_lb, rh_ub);
+	  r.union_ (tmp);
+	  if (wi::eq_p (lh_range, 3))
+	    {
+	      wi_fold (tmp, type, lh_lb + 2, lh_lb + 2, rh_lb, rh_ub);
+	      r.union_ (tmp);
+	    }
+	}
+      wi_fold (tmp, type, lh_ub, lh_ub, rh_lb, rh_ub);
+      r.union_ (tmp);
+    }
+  // Otherwise just call wi_fold.
+  else
+    wi_fold (r, type, lh_lb, lh_ub, rh_lb, rh_ub);
+}
+
 // The default for fold is to break all ranges into sub-ranges and
 // invoke the wi_fold method on each sub-range pair.
 
@@ -152,8 +211,8 @@  range_operator::fold_range (irange &r, tree type,
   // If both ranges are single pairs, fold directly into the result range.
   if (num_lh == 1 && num_rh == 1)
     {
-      wi_fold (r, type, lh.lower_bound (0), lh.upper_bound (0),
-	       rh.lower_bound (0), rh.upper_bound (0));
+      wi_fold_in_parts (r, type, lh.lower_bound (0), lh.upper_bound (0),
+			rh.lower_bound (0), rh.upper_bound (0));
       op1_op2_relation_effect (r, type, lh, rh, rel);
       return true;
     }
@@ -167,7 +226,7 @@  range_operator::fold_range (irange &r, tree type,
 	wide_int lh_ub = lh.upper_bound (x);
 	wide_int rh_lb = rh.lower_bound (y);
 	wide_int rh_ub = rh.upper_bound (y);
-	wi_fold (tmp, type, lh_lb, lh_ub, rh_lb, rh_ub);
+	wi_fold_in_parts (tmp, type, lh_lb, lh_ub, rh_lb, rh_ub);
 	r.union_ (tmp);
 	if (r.varying_p ())
 	  {
@@ -1915,8 +1974,14 @@  operator_lshift::wi_fold (irange &r, tree type,
   int bound_shift = overflow_pos - rh_ub.to_shwi ();
   // If bound_shift == HOST_BITS_PER_WIDE_INT, the llshift can
   // overflow.  However, for that to happen, rh.max needs to be zero,
-  // which means rh is a singleton range of zero, which means it
-  // should be handled by the lshift fold_range above.
+  // which means rh is a singleton range of zero, which means we simply return
+  // [lh_lb, lh_ub] as the range.
+  if (wi::eq_p (rh_ub, rh_lb) && wi::eq_p (rh_ub, 0))
+    {
+      r = int_range<2> (type, lh_lb, lh_ub);
+      return;
+    }
+
   wide_int bound = wi::set_bit_in_zero (bound_shift, prec);
   wide_int complement = ~(bound - 1);
   wide_int low_bound, high_bound;
diff --git a/gcc/range-op.h b/gcc/range-op.h
index 2b5db64bb98..17be9e00c08 100644
--- a/gcc/range-op.h
+++ b/gcc/range-op.h
@@ -97,6 +97,12 @@  protected:
 					const irange &op1_range,
 					const irange &op2_range,
 					relation_kind rel) const;
+  // Called by fold range to split small subranges into parts.
+  void wi_fold_in_parts (irange &r, tree type,
+			 const wide_int &lh_lb,
+			 const wide_int &lh_ub,
+			 const wide_int &rh_lb,
+			 const wide_int &rh_ub) const;
 };
 
 extern range_operator *range_op_handler (enum tree_code code, tree type);
diff --git a/gcc/testsuite/gcc.dg/pr96542.c b/gcc/testsuite/gcc.dg/pr96542.c
new file mode 100644
index 00000000000..5014f2acad8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr96542.c
@@ -0,0 +1,27 @@ 
+/* { dg-do compile} */
+/* { dg-options "-O2 -fdump-tree-evrp" } */
+
+
+unsigned char
+foo (unsigned int x)
+{
+  _Bool y = x;
+  return (((unsigned char) ~0) >> y) * 2;
+}
+
+unsigned char
+bar (unsigned int x)
+{
+  return (((unsigned char) ~0) >> (_Bool) x) * 2;
+}
+
+unsigned
+baz (unsigned int x)
+{
+  if (x >= 4) return 32;
+  return (-1U >> x) * 16;
+}
+
+/* { dg-final { scan-tree-dump-times  "254" 2 "evrp" } }  */
+/* { dg-final { scan-tree-dump "= PHI <32.*, 4294967280" "evrp" } }  */
+