Fix scaling of profiles in ipa_merge_profiles

Message ID 20191128142215.d7tsr6jtt5llvv6j@kam.mff.cuni.cz
State New
Headers show
Series
  • Fix scaling of profiles in ipa_merge_profiles
Related show

Commit Message

Jan Hubicka Nov. 28, 2019, 2:22 p.m.
Hi
this patch fixes two problems in ipa_merge_profiles.  First we allow cfg
profile to diverge from cgraph profile and prior summing cfg profiles we
must compensate for this change.

Second the function is trying to preserve as much information as possible (for
example to handle cases one function has guessed profile and other function has
IPA profile) but it does so independently on each proflie counter which is not
good since all type transitions must be done same way in order for resulting
profile to be meaninful.

In partiuclar the code sometimes makes node->count to be global count 
while some edges gets globa0 counters which is not meaningful and leads to ICE
with sanity checking I want to commit incrementally.

profiledbootstrapped x86_64-linux, comitted.

	* ipa-utils.c (ipa_merge_profiles): Be sure that all type transtions
	of counters are done same way.

Patch

Index: ipa-utils.c
===================================================================
--- ipa-utils.c	(revision 278681)
+++ ipa-utils.c	(working copy)
@@ -398,6 +398,7 @@  ipa_merge_profiles (struct cgraph_node *
   tree oldsrcdecl = src->decl;
   struct function *srccfun, *dstcfun;
   bool match = true;
+  bool copy_counts = false;
 
   if (!src->definition
       || !dst->definition)
@@ -429,10 +430,26 @@  ipa_merge_profiles (struct cgraph_node *
     }
   profile_count orig_count = dst->count;
 
-  if (dst->count.initialized_p () && dst->count.ipa () == dst->count)
-    dst->count += src->count.ipa ();
-  else 
-    dst->count = src->count.ipa ();
+  /* Either sum the profiles if both are IPA and not global0, or
+     pick more informative one (that is nonzero IPA if other is
+     uninitialized, guessed or global0).   */
+
+  if ((dst->count.ipa ().nonzero_p ()
+       || src->count.ipa ().nonzero_p ())
+      && dst->count.ipa ().initialized_p ()
+      && src->count.ipa ().initialized_p ())
+    dst->count = dst->count.ipa () + src->count.ipa ();
+  else if (dst->count.ipa ().initialized_p ())
+    ;
+  else if (src->count.ipa ().initialized_p ())
+    {
+      copy_counts = true;
+      dst->count = src->count.ipa ();
+    }
+
+  /* If no updating needed return early.  */
+  if (dst->count == orig_count)
+    return;
 
   /* First handle functions with no gimple body.  */
   if (dst->thunk.thunk_p || dst->alias
@@ -544,6 +561,16 @@  ipa_merge_profiles (struct cgraph_node *
       struct cgraph_edge *e, *e2;
       basic_block srcbb, dstbb;
 
+      /* Function and global profile may be out of sync.  First scale it same
+	 way as fixup_cfg would.  */
+      profile_count srcnum = src->count;
+      profile_count srcden = ENTRY_BLOCK_PTR_FOR_FN (srccfun)->count;
+      bool srcscale = srcnum.initialized_p () && !(srcnum == srcden);
+      profile_count dstnum = orig_count;
+      profile_count dstden = ENTRY_BLOCK_PTR_FOR_FN (dstcfun)->count;
+      bool dstscale = !copy_counts
+		      && dstnum.initialized_p () && !(dstnum == dstden);
+
       /* TODO: merge also statement histograms.  */
       FOR_ALL_BB_FN (srcbb, srccfun)
 	{
@@ -551,15 +578,15 @@  ipa_merge_profiles (struct cgraph_node *
 
 	  dstbb = BASIC_BLOCK_FOR_FN (dstcfun, srcbb->index);
 
-	  /* Either sum the profiles if both are IPA and not global0, or
-	     pick more informative one (that is nonzero IPA if other is
-	     uninitialized, guessed or global0).   */
-	  if (!dstbb->count.ipa ().initialized_p ()
-	      || (dstbb->count.ipa () == profile_count::zero ()
-		  && (srcbb->count.ipa ().initialized_p ()
-		      && !(srcbb->count.ipa () == profile_count::zero ()))))
+	  profile_count srccount = srcbb->count;
+	  if (srcscale)
+	    srccount = srccount.apply_scale (srcnum, srcden);
+	  if (dstscale)
+	    dstbb->count = dstbb->count.apply_scale (dstnum, dstden);
+
+	  if (copy_counts)
 	    {
-	      dstbb->count = srcbb->count;
+	      dstbb->count = srccount;
 	      for (i = 0; i < EDGE_COUNT (srcbb->succs); i++)
 		{
 		  edge srce = EDGE_SUCC (srcbb, i);
@@ -568,18 +595,21 @@  ipa_merge_profiles (struct cgraph_node *
 		    dste->probability = srce->probability;
 		}
 	    }	
-	  else if (srcbb->count.ipa ().initialized_p ()
-		   && !(srcbb->count.ipa () == profile_count::zero ()))
+	  else 
 	    {
 	      for (i = 0; i < EDGE_COUNT (srcbb->succs); i++)
 		{
 		  edge srce = EDGE_SUCC (srcbb, i);
 		  edge dste = EDGE_SUCC (dstbb, i);
 		  dste->probability = 
-		    dste->probability * dstbb->count.probability_in (dstbb->count + srcbb->count)
-		    + srce->probability * srcbb->count.probability_in (dstbb->count + srcbb->count);
+		    dste->probability * dstbb->count.ipa ().probability_in
+						 (dstbb->count.ipa ()
+						  + srccount.ipa ())
+		    + srce->probability * srcbb->count.ipa ().probability_in
+						 (dstbb->count.ipa ()
+						  + srccount.ipa ());
 		}
-	      dstbb->count += srcbb->count;
+	      dstbb->count = dstbb->count.ipa () + srccount.ipa ();
 	    }
 	}
       push_cfun (dstcfun);