Fold bswap32(x) != 0 to x != 0 (and related transforms)

Message ID 013801d77bbe$077aa5c0$166ff140$@nextmovesoftware.com
State Superseded
Headers show
Series
  • Fold bswap32(x) != 0 to x != 0 (and related transforms)
Related show

Commit Message

Roger Sayle July 18, 2021, 10:16 a.m.
This patch to match.pd implements several closely related folding
simplifications at the tree-level, that make use of the property
that bit permutation functions, rotate and bswap have inverses.

[1]     bswap(X) eq/ne C, for constant C, simplifies to X eq/ne C'
        where C'=bswap(C), generalizing the transform in the subject.
[2]     bswap(X) eq/ne bswap(Y) simplifies to X eq/ne Y.
[3]     lrotate(X,C1) eq/ne C2 simplifies to X eq/ne C3, where
        C3 = rrotate(C2,C1), i.e. apply the inverse rotation to C2.
[4]     Likewise, rrotate(X,C1) eq/ne C2 simplifies to X eq/ne C3,
        where C3 = lrotate(C2,C1).
[5]     rotate(X,Z) eq/ne rotate(Y,Z) simplifies to X eq/ne Y, when
        the bit-count Z (the same on both sides) has no side-effects.
[6]     rotate(X,Y) eq/ne 0 simplifies to X eq/ne 0 if Y has no
        side-effects.
[7]     Likewise, rotate(X,Y) eq/ne -1 simplifies to X eq/ne -1,
        if Y has no side-effects.

This patch has been tested on x86_64-pc-linux-gnu with a "make
bootstrap" and "make -k check" with no new failures.

Ok for mainline?


2010-07-18  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
	* match.pd (rotate): Simplify equality/inequality of rotations.
	(bswap): Simplify equality/inequality tests of byte swapping.

gcc/testsuite/ChangeLog
	* gcc.dg/fold-eqrotate-1.c: New test case.
	* gcc.dg/fold-eqbswap-1.c: New test case.

Roger
--
Roger Sayle
NextMove Software
Cambridge, UK

/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-optimized" } */

int test1(int x, int y)
{
#if __SIZEOF_INT__ == 4
  return __builtin_bswap32(x) == __builtin_bswap32(y);
#else
  return x == y;
#endif
}

int test2(int x, int y)
{
#if __SIZEOF_INT__ == 4
  return __builtin_bswap32(x) != __builtin_bswap32(y);
#else
  return x != y;
#endif
}

int test3(int x)
{
#if __SIZEOF_INT__ == 4
  return __builtin_bswap32(x) == 12345;
#else
  return x;
#endif
}

int test4(int x)
{
#if __SIZEOF_INT__ == 4
  return __builtin_bswap32(x) != 12345;
#else
  return x;
#endif
}

int test1ll(long long x, long long y)
{
#if __SIZEOF_LONG_LONG__ == 8
  return __builtin_bswap64(x) == __builtin_bswap64(y);
#else
  return x == y;
#endif
}

int test2ll(long long x, long long y)
{
#if __SIZEOF_LONG_LONG__ == 8
  return __builtin_bswap64(x) != __builtin_bswap64(y);
#else
  return x != y;
#endif
}

int test3ll(long long x)
{
#if __SIZEOF_LONG_LONG__ == 8
  return __builtin_bswap64(x) == 12345;
#else
  return (int)x;
#endif
}

int test4ll(long long x)
{
#if __SIZEOF_LONG_LONG__ == 8
  return __builtin_bswap64(x) != 12345;
#else
  return (int)x;
#endif
}

int test1s(short x, short y)
{
#if __SIZEOF_SHORT__ == 2
  return __builtin_bswap16(x) == __builtin_bswap16(y);
#else
  return x == y;
#endif
}

int test2s(short x, short y)
{
#if __SIZEOF_SHORT__ == 2
  return __builtin_bswap16(x) != __builtin_bswap16(y);
#else
  return x != y;
#endif
}

int test3s(short x)
{
#if __SIZEOF_SHORT__ == 2
  return __builtin_bswap16(x) == 12345;
#else
  return (int)x;
#endif
}

int test4s(short x)
{
#if __SIZEOF_SHORT__ == 2
  return __builtin_bswap16(x) != 12345;
#else
  return (int)x;
#endif
}

/* { dg-final { scan-tree-dump-times "__builtin_bswap" 0 "optimized" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-optimized" } */

int test1(unsigned int x, unsigned int y)
{
#if __SIZEOF_INT__ == 4
  unsigned int r1 = (x << 16) | (x >> 16);
  unsigned int r2 = (y << 16) | (y >> 16);
  return r1 == r2;
#else
  return x == y;
#endif
}

int test2(unsigned int x)
{
#if __SIZEOF_INT__ == 4
  unsigned int r1 = (x << 16) | (x >> 16);
  return r1 == 12345;
#else
  return x == 12345;
#endif
}

int test3(unsigned int x)
{
#if __SIZEOF_INT__ == 4
  unsigned int r1 = (x << 16) | (x >> 16);
  return r1 == 0;
#else
  return x == 0;
#endif
}

int test4(unsigned int x)
{
#if __SIZEOF_INT__ == 4
  unsigned int r1 = (x << 16) | (x >> 16);
  return r1 == ~0;
#else
  return x == ~0;
#endif
}

/* { dg-final { scan-tree-dump-times "r>>" 0 "optimized" } } */

Comments

Marc Glisse July 18, 2021, 10:03 p.m. | #1
On Sun, 18 Jul 2021, Roger Sayle wrote:

> +    (if (GIMPLE || !TREE_SIDE_EFFECTS (@0))


I don't think you need to worry about that, the general genmatch machinery 
is already supposed to take care of it. All the existing cases in match.pd 
are about cond_expr, where counting the occurrences of each @i is not 
reliable.

-- 
Marc Glisse
Ian Lance Taylor via Gcc-patches July 20, 2021, 7:36 p.m. | #2
On 7/18/2021 4:03 PM, Marc Glisse wrote:
> On Sun, 18 Jul 2021, Roger Sayle wrote:

>

>> +    (if (GIMPLE || !TREE_SIDE_EFFECTS (@0))

>

> I don't think you need to worry about that, the general genmatch 

> machinery is already supposed to take care of it. All the existing 

> cases in match.pd are about cond_expr, where counting the occurrences 

> of each @i is not reliable.

OK with those tests removed.

Jeff
Ian Lance Taylor via Gcc-patches July 22, 2021, 12:04 p.m. | #3
On Mon, Jul 19, 2021 at 12:03 AM Marc Glisse <marc.glisse@inria.fr> wrote:
>

> On Sun, 18 Jul 2021, Roger Sayle wrote:

>

> > +    (if (GIMPLE || !TREE_SIDE_EFFECTS (@0))

>

> I don't think you need to worry about that, the general genmatch machinery

> is already supposed to take care of it. All the existing cases in match.pd

> are about cond_expr, where counting the occurrences of each @i is not

> reliable.


Yes, genmatch tries to track omitted and duplicated operands and appropriately
preserves side-effects.  When you invoke genmatch with -v then you might see
warnings like "forcing no side-effects on possibly lost leaf" when it was not
possible to see exactly whether a capture was dropped or used multiple times.

Richard.

>

> --

> Marc Glisse

Patch

diff --git a/gcc/match.pd b/gcc/match.pd
index beb8d27..aa850bb 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3312,6 +3312,25 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
      { tree rotate_type = TREE_TYPE (@0); }
       (convert (rotate (convert:rotate_type @1) @2))))))
 
+(for cmp (eq ne)
+ (for rotate (lrotate rrotate)
+      invrot (rrotate lrotate)
+  /* (X >>r Y) cmp (Z >>r Y) may simplify to X cmp Y. */
+  (simplify
+   (cmp (rotate @1 @0) (rotate @2 @0))
+    (if (GIMPLE || !TREE_SIDE_EFFECTS (@0))
+     (cmp @1 @2)))
+  /* (X >>r C1) cmp C2 may simplify to X cmp C3. */
+  (simplify
+   (cmp (rotate @0 INTEGER_CST@1) INTEGER_CST@2)
+   (cmp @0 { const_binop (invrot, TREE_TYPE (@0), @2, @1); }))
+  /* (X >>r Y) cmp C where C is 0 or ~0, may simplify to X cmp C.  */
+  (simplify
+   (cmp (rotate @0 @1) INTEGER_CST@2)
+    (if ((GIMPLE || !TREE_SIDE_EFFECTS (@1))
+	 && (integer_zerop (@2) || integer_all_onesp (@2)))
+     (cmp @0 @2)))))
+
 /* Simplifications of conversions.  */
 
 /* Basic strip-useless-type-conversions / strip_nops.  */
@@ -3622,6 +3641,13 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (simplify
    (bswap (bitop:c (bswap @0) @1))
    (bitop @0 (bswap @1))))
+ (for cmp (eq ne)
+  (simplify
+   (cmp (bswap @0) (bswap @1))
+   (cmp @0 @1))
+  (simplify
+   (cmp (bswap @0) INTEGER_CST@1)
+   (cmp @0 (bswap @1))))
  /* (bswap(x) >> C1) & C2 can sometimes be simplified to (x >> C3) & C2.  */
  (simplify
   (bit_and (convert1? (rshift@0 (convert2? (bswap@4 @1)) INTEGER_CST@2))