[3/3] Add IEEE 128-bit fp conditional move on PowerPC.

Message ID 20210609002447.GC18854@ibm-toto.the-meissners.org
State New
Headers show
Series
  • Add Power10 IEEE 128-bit min, max, conditional move
Related show

Commit Message

Jason Merrill via Gcc-patches June 9, 2021, 12:24 a.m.
[PATCH 3/3] Add IEEE 128-bit fp conditional move on PowerPC.

This patch adds the support for power10 IEEE 128-bit floating point conditional
move and for automatically generating min/max.

In this patch, I simplified things compared to previous patches.  Instead of
allowing any four of the modes to be used for the conditional move comparison
and the move itself could use different modes, I restricted the conditional
move to just the same mode.  I.e. you can do:

        _Float128 a, b, c, d, e, r;

        r = (a == b) ? c : d;

But you can't do:

        _Float128 c, d, r;
        double a, b;

        r = (a == b) ? c : d;

or:

        _Float128 a, b;
        double c, d, r;

        r = (a == b) ? c : d;

This eliminates a lot of the complexity of the code, because you don't have to
worry about the sizes being different, and the IEEE 128-bit types being
restricted to Altivec registers, while the SF/DF modes can use any VSX
register.

I did not modify the existing support that allowed conditional moves where
SFmode operands are compared and DFmode operands are moved (and vice versa).

Compared to the May 18th patches, this patch replaces the complicated test that
was complained about.

I tested it on 3 platforms:

    *	Power9 little endian, --with-code=power9;
    *	Power8 big endian, --with-code=power8, both 32/64-bit tests done;
    *	Power10 little endian, --with-code=power10.

All systems bootstrapped and there were no new regressions.  I believe I have
addressed the issues with the last patch.

Can I check this into the master branch, and after a soak-in period, back port
it to the GCC 11 branch?

gcc/
2021-06-08 Michael Meissner  <meissner@linux.ibm.com>

        * config/rs6000/rs6000.c (rs6000_maybe_emit_fp_cmove): Add IEEE
	128-bit floating point conditional move support.
	(have_compare_and_set_mask): Add IEEE 128-bit floating point
	types.
	* config/rs6000/rs6000.md (mov<mode>cc, IEEE128 iterator): New insn.
	(mov<mode>cc_p10, IEEE128 iterator): New insn.
	(mov<mode>cc_invert_p10, IEEE128 iterator): New insn.
	(fpmask<mode>, IEEE128 iterator): New insn.
	(xxsel<mode>, IEEE128 iterator): New insn.

gcc/testsuite/
2021-06-08  Michael Meissner  <meissner@linux.ibm.com>

        * gcc.target/powerpc/float128-cmove.c: New test.
        * gcc.target/powerpc/float128-minmax-3.c: New test.
---
 gcc/config/rs6000/rs6000.c                    |  38 ++++++-
 gcc/config/rs6000/rs6000.md                   | 106 ++++++++++++++++++
 .../gcc.target/powerpc/float128-cmove.c       |  58 ++++++++++
 .../gcc.target/powerpc/float128-minmax-3.c    |  15 +++
 4 files changed, 215 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/float128-cmove.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/float128-minmax-3.c

-- 
2.31.1


-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meissner@linux.ibm.com, phone: +1 (978) 899-4797

Comments

Jason Merrill via Gcc-patches June 17, 2021, 12:34 a.m. | #1
Ping patch.  In particular, we would like to get this patch into GCC 11.2 for
power10 enablement.

| Date: Tue, 8 Jun 2021 20:24:47 -0400
| Subject: [PATCH 3/3] Add IEEE 128-bit fp conditional move on PowerPC.
| Message-ID: <20210609002447.GC18854@ibm-toto.the-meissners.org>

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meissner@linux.ibm.com, phone: +1 (978) 899-4797

Patch

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 1651788df6a..411e7539019 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -15698,8 +15698,8 @@  rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
   return 1;
 }
 
-/* Possibly emit the xsmaxcdp and xsmincdp instructions to emit a maximum or
-   minimum with "C" semantics.
+/* Possibly emit the xsmaxc{dp,qp} and xsminc{dp,qp} instructions to emit a
+   maximum or minimum with "C" semantics.
 
    Unless you use -ffast-math, you can't use these instructions to replace
    conditions that implicitly reverse the condition because the comparison
@@ -15775,6 +15775,7 @@  rs6000_maybe_emit_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
   enum rtx_code code = GET_CODE (op);
   rtx op0 = XEXP (op, 0);
   rtx op1 = XEXP (op, 1);
+  machine_mode compare_mode = GET_MODE (op0);
   machine_mode result_mode = GET_MODE (dest);
   rtx compare_rtx;
   rtx cmove_rtx;
@@ -15783,6 +15784,35 @@  rs6000_maybe_emit_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
   if (!can_create_pseudo_p ())
     return 0;
 
+  /* We allow the comparison to be either SFmode/DFmode and the true/false
+     condition to be either SFmode/DFmode.  I.e. we allow:
+
+	float a, b;
+	double c, d, r;
+
+	r = (a == b) ? c : d;
+
+    and:
+
+	double a, b;
+	float c, d, r;
+
+	r = (a == b) ? c : d;
+
+    but we don't allow intermixing the IEEE 128-bit floating point types with
+    the 32/64-bit scalar types.
+
+    It gets too messy where SFmode/DFmode can use any register and TFmode/KFmode
+    can only use Altivec registers.  In addtion, we would need to do a XXPERMDI
+    if we compare SFmode/DFmode and move TFmode/KFmode.  */
+
+  if (compare_mode == result_mode
+      || (compare_mode == SFmode && result_mode == DFmode)
+      || (compare_mode == DFmode && result_mode == SFmode))
+    ;
+  else
+    return false;
+
   switch (code)
     {
     case EQ:
@@ -15835,6 +15865,10 @@  have_compare_and_set_mask (machine_mode mode)
     case E_DFmode:
       return TARGET_P9_MINMAX;
 
+    case E_KFmode:
+    case E_TFmode:
+      return TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode);
+
     default:
       break;
     }
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 064c3a2d9d6..ff87d8c6eaa 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -5449,6 +5449,112 @@  (define_insn "*xxsel<mode>"
   "xxsel %x0,%x4,%x3,%x1"
   [(set_attr "type" "vecmove")])
 
+;; Support for ISA 3.1 IEEE 128-bit conditional move.  The mode used in the
+;; comparison must be the same as used in the conditional move.
+(define_expand "mov<mode>cc"
+   [(set (match_operand:IEEE128 0 "gpc_reg_operand")
+	 (if_then_else:IEEE128 (match_operand 1 "comparison_operator")
+			       (match_operand:IEEE128 2 "gpc_reg_operand")
+			       (match_operand:IEEE128 3 "gpc_reg_operand")))]
+  "TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+{
+  if (rs6000_emit_cmove (operands[0], operands[1], operands[2], operands[3]))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_insn_and_split "*mov<mode>cc_p10"
+  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=&v,v")
+	(if_then_else:IEEE128
+	 (match_operator:CCFP 1 "fpmask_comparison_operator"
+		[(match_operand:IEEE128 2 "altivec_register_operand" "v,v")
+		 (match_operand:IEEE128 3 "altivec_register_operand" "v,v")])
+	 (match_operand:IEEE128 4 "altivec_register_operand" "v,v")
+	 (match_operand:IEEE128 5 "altivec_register_operand" "v,v")))
+   (clobber (match_scratch:V2DI 6 "=0,&v"))]
+  "TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+  "#"
+  "&& 1"
+  [(set (match_dup 6)
+	(if_then_else:V2DI (match_dup 1)
+			   (match_dup 7)
+			   (match_dup 8)))
+   (set (match_dup 0)
+	(if_then_else:IEEE128 (ne (match_dup 6)
+				  (match_dup 8))
+			      (match_dup 4)
+			      (match_dup 5)))]
+{
+  if (GET_CODE (operands[6]) == SCRATCH)
+    operands[6] = gen_reg_rtx (V2DImode);
+
+  operands[7] = CONSTM1_RTX (V2DImode);
+  operands[8] = CONST0_RTX (V2DImode);
+}
+ [(set_attr "length" "8")
+  (set_attr "type" "vecperm")])
+
+;; Handle inverting the fpmask comparisons.
+(define_insn_and_split "*mov<mode>cc_invert_p10"
+  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=&v,v")
+	(if_then_else:IEEE128
+	 (match_operator:CCFP 1 "invert_fpmask_comparison_operator"
+		[(match_operand:IEEE128 2 "altivec_register_operand" "v,v")
+		 (match_operand:IEEE128 3 "altivec_register_operand" "v,v")])
+	 (match_operand:IEEE128 4 "altivec_register_operand" "v,v")
+	 (match_operand:IEEE128 5 "altivec_register_operand" "v,v")))
+   (clobber (match_scratch:V2DI 6 "=0,&v"))]
+  "TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+  "#"
+  "&& 1"
+  [(set (match_dup 6)
+	(if_then_else:V2DI (match_dup 9)
+			   (match_dup 7)
+			   (match_dup 8)))
+   (set (match_dup 0)
+	(if_then_else:IEEE128 (ne (match_dup 6)
+				  (match_dup 8))
+			      (match_dup 5)
+			      (match_dup 4)))]
+{
+  rtx op1 = operands[1];
+  enum rtx_code cond = reverse_condition_maybe_unordered (GET_CODE (op1));
+
+  if (GET_CODE (operands[6]) == SCRATCH)
+    operands[6] = gen_reg_rtx (V2DImode);
+
+  operands[7] = CONSTM1_RTX (V2DImode);
+  operands[8] = CONST0_RTX (V2DImode);
+
+  operands[9] = gen_rtx_fmt_ee (cond, CCFPmode, operands[2], operands[3]);
+}
+ [(set_attr "length" "8")
+  (set_attr "type" "vecperm")])
+
+(define_insn "*fpmask<mode>"
+  [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
+	(if_then_else:V2DI
+	 (match_operator:CCFP 1 "fpmask_comparison_operator"
+		[(match_operand:IEEE128 2 "altivec_register_operand" "v")
+		 (match_operand:IEEE128 3 "altivec_register_operand" "v")])
+	 (match_operand:V2DI 4 "all_ones_constant" "")
+	 (match_operand:V2DI 5 "zero_constant" "")))]
+  "TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+  "xscmp%V1qp %0,%2,%3"
+  [(set_attr "type" "fpcompare")])
+
+(define_insn "*xxsel<mode>"
+  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
+	(if_then_else:IEEE128
+	 (ne (match_operand:V2DI 1 "altivec_register_operand" "v")
+	     (match_operand:V2DI 2 "zero_constant" ""))
+	 (match_operand:IEEE128 3 "altivec_register_operand" "v")
+	 (match_operand:IEEE128 4 "altivec_register_operand" "v")))]
+  "TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+  "xxsel %x0,%x4,%x3,%x1"
+  [(set_attr "type" "vecmove")])
+
 
 ;; Conversions to and from floating-point.
 
diff --git a/gcc/testsuite/gcc.target/powerpc/float128-cmove.c b/gcc/testsuite/gcc.target/powerpc/float128-cmove.c
new file mode 100644
index 00000000000..2fae8dc23bc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/float128-cmove.c
@@ -0,0 +1,58 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target ppc_float128_hw } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#ifndef TYPE
+#ifdef __LONG_DOUBLE_IEEE128__
+#define TYPE long double
+
+#else
+#define TYPE _Float128
+#endif
+#endif
+
+/* Verify that the ISA 3.1 (power10) IEEE 128-bit conditional move instructions
+   are generated.  */
+
+TYPE
+eq (TYPE a, TYPE b, TYPE c, TYPE d)
+{
+  return (a == b) ? c : d;
+}
+
+TYPE
+ne (TYPE a, TYPE b, TYPE c, TYPE d)
+{
+  return (a != b) ? c : d;
+}
+
+TYPE
+lt (TYPE a, TYPE b, TYPE c, TYPE d)
+{
+  return (a < b) ? c : d;
+}
+
+TYPE
+le (TYPE a, TYPE b, TYPE c, TYPE d)
+{
+  return (a <= b) ? c : d;
+}
+
+TYPE
+gt (TYPE a, TYPE b, TYPE c, TYPE d)
+{
+  return (a > b) ? c : d;
+}
+
+TYPE
+ge (TYPE a, TYPE b, TYPE c, TYPE d)
+{
+  return (a >= b) ? c : d;
+}
+
+/* { dg-final { scan-assembler-times {\mxscmpeqqp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxscmpgeqp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxscmpgtqp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxxsel\M}     6 } } */
+/* { dg-final { scan-assembler-not   {\mxscmpuqp\M}    } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/float128-minmax-3.c b/gcc/testsuite/gcc.target/powerpc/float128-minmax-3.c
new file mode 100644
index 00000000000..6f7627c0f2a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/float128-minmax-3.c
@@ -0,0 +1,15 @@ 
+/* { dg-require-effective-target ppc_float128_hw } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#ifndef TYPE
+#define TYPE _Float128
+#endif
+
+/* Test that the fminf128/fmaxf128 functions generate if/then/else and not a
+   call.  */
+TYPE f128_min (TYPE a, TYPE b) { return (a < b) ? a : b; }
+TYPE f128_max (TYPE a, TYPE b) { return (b > a) ? b : a; }
+
+/* { dg-final { scan-assembler {\mxsmaxcqp\M} } } */
+/* { dg-final { scan-assembler {\mxsmincqp\M} } } */