middle-end: Parity folding optimizations.

Message ID 005501d64078$f1931610$d4b94230$@nextmovesoftware.com
State New
Headers show
Series
  • middle-end: Parity folding optimizations.
Related show

Commit Message

Roger Sayle June 12, 2020, 5:18 a.m.
This patch implements several constant folding optimizations

for __builtin_parity and friends.  We canonicalize popcount(x)&1

as parity(x) in gimple, and potentially convert back again when

we expand to RTL.  parity(~x) is simplified to parity(x), which

is true for all integer modes with an even number of bits.

But probably most usefully, parity(x)^parity(y) can be simplified

to a parity(x^y), requiring only a single libcall or popcount.

This idiom is seen in PR target/44481 and elsewhere in the Linux

kernel.

 

This patch has been tested with "make bootstrap" and "make -k check" on

x86_64-pc-linux-gnu with no regressions.  If approved, I'd very much

appreciate it if someone could commit this change for me.

 

 

2020-06-12  Roger Sayle  <roger@nextmovesoftware.com>

 

        * match.pd (popcount(x)&1 -> parity(x)): New simplification.

        (parity(~x) -> parity(x)): New simplification.

        (parity(x&1) -> x&1): New simplification.

        (parity(x)^parity(y) -> parity(x^y)): New simplification.

 

2020-06-12  Roger Sayle  <roger@nextmovesoftware.com>

 

        * gcc.dg/fold-parity-1.c: New test.

        * gcc.dg/fold-parity-2.c: Likewise.

        * gcc.dg/fold-parity-3.c: Likewise.

        * gcc.dg/fold-parity-4.c: Likewise.

 

 

Thanks in advance,

Roger

--

Roger Sayle

NextMove Software

Cambridge, UK

/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-original" } */

int foo(unsigned int x)
{
  return __builtin_popcount(x) & 1;
}

int fool(unsigned long x)
{
  return __builtin_popcountl(x) & 1;
}

int fooll(unsigned long long x)
{
  return __builtin_popcountll(x) & 1;
}

/* { dg-final { scan-tree-dump-times "__builtin_popcount" 0 "original" } } */
/* { dg-final { scan-tree-dump-times "__builtin_parity" 3 "original" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-optimized" } */

int foo(unsigned int x)
{
  return __builtin_parity(~x);
}

int fool(unsigned long x)
{
  return __builtin_parityl(~x);
}

int fooll(unsigned long long x)
{
  return __builtin_parityll(~x);
}

/* { dg-final { scan-tree-dump-times "~" 0 "optimized" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-optimized" } */

int foo(unsigned int x)
{
  return __builtin_parity(x&1);
}

int fool(unsigned long x)
{
  return __builtin_parityl(x&1);
}

int fooll(unsigned long long x)
{
  return __builtin_parityll(x&1);
}

/* { dg-final { scan-tree-dump-times "__builtin_parity" 0 "optimized" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-optimized" } */

int foo(unsigned int x, unsigned int y)
{
  return __builtin_parity(x) ^ __builtin_parity(y);
}

int fool(unsigned long x, unsigned long y)
{
  return __builtin_parityl(x) ^ __builtin_parityl(y);
}

int fooll(unsigned long long x, unsigned long long y)
{
  return __builtin_parityll(x) ^ __builtin_parityll(y);
}

/* { dg-final { scan-tree-dump-times "__builtin_parity" 3 "optimized" } } */

Patch

diff --git a/gcc/match.pd b/gcc/match.pd
index 2b8c37e..ddb0650 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5949,6 +5949,32 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
       (cmp (popcount @0) integer_zerop)
       (rep @0 { build_zero_cst (TREE_TYPE (@0)); }))))
 
+/* Canonicalize POPCOUNT(x)&1 as PARITY(X).  */
+(for popcount (BUILT_IN_POPCOUNT BUILT_IN_POPCOUNTL BUILT_IN_POPCOUNTLL
+               BUILT_IN_POPCOUNTIMAX)
+     parity (BUILT_IN_PARITY BUILT_IN_PARITYL BUILT_IN_PARITYLL
+	     BUILT_IN_PARITYIMAX)
+  (simplify
+    (bit_and (popcount @0) integer_onep)
+    (parity @0)))
+
+/* PARITY simplifications.  */
+(for parity (BUILT_IN_PARITY BUILT_IN_PARITYL BUILT_IN_PARITYLL
+	     BUILT_IN_PARITYIMAX)
+  /* parity(~X) is parity(X).  */
+  (simplify
+    (parity (bit_not @0))
+    (parity @0))
+  /* parity(X&1) is nop_expr(X&1).  */
+  (simplify
+    (parity @0)
+    (if (tree_nonzero_bits (@0) == 1)
+      (convert @0)))
+  /* parity(X)^parity(Y) is parity(X^Y).  */
+  (simplify
+    (bit_xor (parity:s @0) (parity:s @1))
+    (parity (bit_xor @0 @1))))
+
 #if GIMPLE
 /* 64- and 32-bits branchless implementations of popcount are detected: