x86: Convert load from constand pool to SSE constant load

Message ID 20210722123139.1529202-1-hjl.tools@gmail.com
State New
Headers show
Series
  • x86: Convert load from constand pool to SSE constant load
Related show

Commit Message

Ian Lance Taylor via Gcc-patches July 22, 2021, 12:31 p.m.
For

(set (reg:V32QI 108)
        (const_vector:V32QI [
                (const_int -1 [0xffffffffffffffff]) repeated x32
            ])) "x.c":15:5 1573 {movv32qi_internal}
     (expr_list:REG_EQUIV (const_vector:V32QI [
                (const_int -1 [0xffffffffffffffff]) repeated x32
            ])
        (nil))

LRA may generate

 #6  0x0000000001077c8a in lra_emit_move (x=0x7ffff7f81b58, y=0x7ffff7f81b40)
    at /export/gnu/import/git/gitlab/x86-gcc/gcc/lra.c:502
502		      ? emit_move_insn (x, y) : emit_insn (gen_rtx_SET (x, y)));
(gdb) call debug_rtx (x)
(reg:V32QI 326)
(gdb) call debug_rtx (y)
(mem/u/c:V32QI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0  S32 A256])
(gdb)

Convert load from constand pool to SSE constant load:

(set (reg:V32QI 24 xmm4 [327])
        (const_vector:V32QI [
                (const_int -1 [0xffffffffffffffff]) repeated x32
            ])) "x.c":15:5 1573 {movv32qi_internal}

gcc/

	PR target/101504
	* config/i386/i386-expand.c (ix86_broadcast_from_constant):
	Return the standard SSE constant.
	(ix86_expand_vector_move): Handle standard SSE constants.

gcc/testsuite/

	PR target/101504
	* gcc.target/i386/pr101504.c: New test.
---
 gcc/config/i386/i386-expand.c            | 11 +++++++++++
 gcc/testsuite/gcc.target/i386/pr101504.c | 23 +++++++++++++++++++++++
 2 files changed, 34 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr101504.c

-- 
2.31.1

Comments

Ian Lance Taylor via Gcc-patches July 23, 2021, 2:23 a.m. | #1
On Thu, Jul 22, 2021 at 8:34 PM H.J. Lu via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>

> For

>

> (set (reg:V32QI 108)

>         (const_vector:V32QI [

>                 (const_int -1 [0xffffffffffffffff]) repeated x32

>             ])) "x.c":15:5 1573 {movv32qi_internal}

>      (expr_list:REG_EQUIV (const_vector:V32QI [

>                 (const_int -1 [0xffffffffffffffff]) repeated x32

>             ])

>         (nil))

>

> LRA may generate

LRA will call ix86_expand_vector_move, and using
ix86_gen_scratch_sse_rtx there may result in no hard reg splits and
cause ICE.
I'm afraid this patch is just a walk round, the real issue here should
be ix86_gen_scratch_sse_rtx usage when lra_in_progress.
>

>  #6  0x0000000001077c8a in lra_emit_move (x=0x7ffff7f81b58, y=0x7ffff7f81b40)

>     at /export/gnu/import/git/gitlab/x86-gcc/gcc/lra.c:502

> 502                   ? emit_move_insn (x, y) : emit_insn (gen_rtx_SET (x, y)));

> (gdb) call debug_rtx (x)

> (reg:V32QI 326)

> (gdb) call debug_rtx (y)

> (mem/u/c:V32QI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0  S32 A256])

> (gdb)

>

> Convert load from constand pool to SSE constant load:

>

> (set (reg:V32QI 24 xmm4 [327])

>         (const_vector:V32QI [

>                 (const_int -1 [0xffffffffffffffff]) repeated x32

>             ])) "x.c":15:5 1573 {movv32qi_internal}

>

> gcc/

>

>         PR target/101504

>         * config/i386/i386-expand.c (ix86_broadcast_from_constant):

>         Return the standard SSE constant.

>         (ix86_expand_vector_move): Handle standard SSE constants.

>

> gcc/testsuite/

>

>         PR target/101504

>         * gcc.target/i386/pr101504.c: New test.

> ---

>  gcc/config/i386/i386-expand.c            | 11 +++++++++++

>  gcc/testsuite/gcc.target/i386/pr101504.c | 23 +++++++++++++++++++++++

>  2 files changed, 34 insertions(+)

>  create mode 100644 gcc/testsuite/gcc.target/i386/pr101504.c

>

> diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c

> index 896bd685b1f..7b6055bbf83 100644

> --- a/gcc/config/i386/i386-expand.c

> +++ b/gcc/config/i386/i386-expand.c

> @@ -493,6 +493,10 @@ ix86_broadcast_from_constant (machine_mode mode, rtx op)

>    if (GET_CODE (constant) != CONST_VECTOR)

>      return nullptr;

>

> +  /* Return the standard SSE constant.  */

> +  if (standard_sse_constant_p (constant, mode))

> +    return constant;

> +

>    /* There could be some rtx like

>       (mem/u/c:V16QI (symbol_ref/u:DI ("*.LC1")))

>       but with "*.LC1" refer to V2DI constant vector.  */

> @@ -577,6 +581,13 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[])

>        rtx first = ix86_broadcast_from_constant (mode, op1);

>        if (first != nullptr)

>         {

> +         if (GET_CODE (first) == CONST_VECTOR)

> +           {

> +             /* This is a standard SSE constant.  */

> +             emit_move_insn (op0, first);

> +             return;

> +           }

> +

>           /* Broadcast to XMM/YMM/ZMM register from an integer

>              constant or scalar mem.  */

>           /* Hard registers are used for 2 purposes:

> diff --git a/gcc/testsuite/gcc.target/i386/pr101504.c b/gcc/testsuite/gcc.target/i386/pr101504.c

> new file mode 100644

> index 00000000000..2ad0405dd7b

> --- /dev/null

> +++ b/gcc/testsuite/gcc.target/i386/pr101504.c

> @@ -0,0 +1,23 @@

> +/* { dg-do compile } */

> +/* { dg-options "-O2 -march=skylake" } */

> +

> +typedef unsigned int __attribute__((__vector_size__ (32))) U;

> +typedef unsigned char __attribute__((__vector_size__ (64))) V;

> +

> +V g;

> +

> +U

> +foo (void)

> +{

> +  V v = __builtin_shufflevector (g, g,

> +                                0, 1, 2, 0, 5, 1, 0, 1, 3, 2, 3, 0, 4, 3, 1, 2,

> +                                2, 0, 4, 2, 3, 1, 1, 2, 3, 4, 1, 1, 0, 0, 5, 2,

> +                                0, 3, 3, 3, 3, 4, 5, 0, 1, 5, 2, 1, 0, 1, 1, 2,

> +                                3, 2, 0, 5, 4, 5, 1, 0, 1, 4, 4, 3, 4, 5, 2, 0);

> +  v ^= 255;

> +  V w = v + g;

> +  U u = ((union { V a; U b; }) w).b + ((union { V a; U b; }) w).b[1];

> +  return u;

> +}

> +

> +/* { dg-final { scan-assembler-not "\.byte\[ \t\]+-1\n" } } */

> --

> 2.31.1

>



-- 
BR,
Hongtao
Ian Lance Taylor via Gcc-patches July 23, 2021, 2:42 a.m. | #2
On Thu, Jul 22, 2021 at 7:17 PM Hongtao Liu <crazylht@gmail.com> wrote:
>

> On Thu, Jul 22, 2021 at 8:34 PM H.J. Lu via Gcc-patches

> <gcc-patches@gcc.gnu.org> wrote:

> >

> > For

> >

> > (set (reg:V32QI 108)

> >         (const_vector:V32QI [

> >                 (const_int -1 [0xffffffffffffffff]) repeated x32

> >             ])) "x.c":15:5 1573 {movv32qi_internal}

> >      (expr_list:REG_EQUIV (const_vector:V32QI [

> >                 (const_int -1 [0xffffffffffffffff]) repeated x32

> >             ])

> >         (nil))

> >

> > LRA may generate

> LRA will call ix86_expand_vector_move, and using

> ix86_gen_scratch_sse_rtx there may result in no hard reg splits and

> cause ICE.

> I'm afraid this patch is just a walk round, the real issue here should

> be ix86_gen_scratch_sse_rtx usage when lra_in_progress.


Fixed.  I sent the v2 patch.

Thanks.

> >

> >  #6  0x0000000001077c8a in lra_emit_move (x=0x7ffff7f81b58, y=0x7ffff7f81b40)

> >     at /export/gnu/import/git/gitlab/x86-gcc/gcc/lra.c:502

> > 502                   ? emit_move_insn (x, y) : emit_insn (gen_rtx_SET (x, y)));

> > (gdb) call debug_rtx (x)

> > (reg:V32QI 326)

> > (gdb) call debug_rtx (y)

> > (mem/u/c:V32QI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0  S32 A256])

> > (gdb)

> >

> > Convert load from constand pool to SSE constant load:

> >

> > (set (reg:V32QI 24 xmm4 [327])

> >         (const_vector:V32QI [

> >                 (const_int -1 [0xffffffffffffffff]) repeated x32

> >             ])) "x.c":15:5 1573 {movv32qi_internal}

> >

> > gcc/

> >

> >         PR target/101504

> >         * config/i386/i386-expand.c (ix86_broadcast_from_constant):

> >         Return the standard SSE constant.

> >         (ix86_expand_vector_move): Handle standard SSE constants.

> >

> > gcc/testsuite/

> >

> >         PR target/101504

> >         * gcc.target/i386/pr101504.c: New test.

> > ---

> >  gcc/config/i386/i386-expand.c            | 11 +++++++++++

> >  gcc/testsuite/gcc.target/i386/pr101504.c | 23 +++++++++++++++++++++++

> >  2 files changed, 34 insertions(+)

> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr101504.c

> >

> > diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c

> > index 896bd685b1f..7b6055bbf83 100644

> > --- a/gcc/config/i386/i386-expand.c

> > +++ b/gcc/config/i386/i386-expand.c

> > @@ -493,6 +493,10 @@ ix86_broadcast_from_constant (machine_mode mode, rtx op)

> >    if (GET_CODE (constant) != CONST_VECTOR)

> >      return nullptr;

> >

> > +  /* Return the standard SSE constant.  */

> > +  if (standard_sse_constant_p (constant, mode))

> > +    return constant;

> > +

> >    /* There could be some rtx like

> >       (mem/u/c:V16QI (symbol_ref/u:DI ("*.LC1")))

> >       but with "*.LC1" refer to V2DI constant vector.  */

> > @@ -577,6 +581,13 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[])

> >        rtx first = ix86_broadcast_from_constant (mode, op1);

> >        if (first != nullptr)

> >         {

> > +         if (GET_CODE (first) == CONST_VECTOR)

> > +           {

> > +             /* This is a standard SSE constant.  */

> > +             emit_move_insn (op0, first);

> > +             return;

> > +           }

> > +

> >           /* Broadcast to XMM/YMM/ZMM register from an integer

> >              constant or scalar mem.  */

> >           /* Hard registers are used for 2 purposes:

> > diff --git a/gcc/testsuite/gcc.target/i386/pr101504.c b/gcc/testsuite/gcc.target/i386/pr101504.c

> > new file mode 100644

> > index 00000000000..2ad0405dd7b

> > --- /dev/null

> > +++ b/gcc/testsuite/gcc.target/i386/pr101504.c

> > @@ -0,0 +1,23 @@

> > +/* { dg-do compile } */

> > +/* { dg-options "-O2 -march=skylake" } */

> > +

> > +typedef unsigned int __attribute__((__vector_size__ (32))) U;

> > +typedef unsigned char __attribute__((__vector_size__ (64))) V;

> > +

> > +V g;

> > +

> > +U

> > +foo (void)

> > +{

> > +  V v = __builtin_shufflevector (g, g,

> > +                                0, 1, 2, 0, 5, 1, 0, 1, 3, 2, 3, 0, 4, 3, 1, 2,

> > +                                2, 0, 4, 2, 3, 1, 1, 2, 3, 4, 1, 1, 0, 0, 5, 2,

> > +                                0, 3, 3, 3, 3, 4, 5, 0, 1, 5, 2, 1, 0, 1, 1, 2,

> > +                                3, 2, 0, 5, 4, 5, 1, 0, 1, 4, 4, 3, 4, 5, 2, 0);

> > +  v ^= 255;

> > +  V w = v + g;

> > +  U u = ((union { V a; U b; }) w).b + ((union { V a; U b; }) w).b[1];

> > +  return u;

> > +}

> > +

> > +/* { dg-final { scan-assembler-not "\.byte\[ \t\]+-1\n" } } */

> > --

> > 2.31.1

> >

>

>

> --

> BR,

> Hongtao




-- 
H.J.

Patch

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 896bd685b1f..7b6055bbf83 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -493,6 +493,10 @@  ix86_broadcast_from_constant (machine_mode mode, rtx op)
   if (GET_CODE (constant) != CONST_VECTOR)
     return nullptr;
 
+  /* Return the standard SSE constant.  */
+  if (standard_sse_constant_p (constant, mode))
+    return constant;
+
   /* There could be some rtx like
      (mem/u/c:V16QI (symbol_ref/u:DI ("*.LC1")))
      but with "*.LC1" refer to V2DI constant vector.  */
@@ -577,6 +581,13 @@  ix86_expand_vector_move (machine_mode mode, rtx operands[])
       rtx first = ix86_broadcast_from_constant (mode, op1);
       if (first != nullptr)
 	{
+	  if (GET_CODE (first) == CONST_VECTOR)
+	    {
+	      /* This is a standard SSE constant.  */
+	      emit_move_insn (op0, first);
+	      return;
+	    }
+
 	  /* Broadcast to XMM/YMM/ZMM register from an integer
 	     constant or scalar mem.  */
 	  /* Hard registers are used for 2 purposes:
diff --git a/gcc/testsuite/gcc.target/i386/pr101504.c b/gcc/testsuite/gcc.target/i386/pr101504.c
new file mode 100644
index 00000000000..2ad0405dd7b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr101504.c
@@ -0,0 +1,23 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake" } */
+
+typedef unsigned int __attribute__((__vector_size__ (32))) U;
+typedef unsigned char __attribute__((__vector_size__ (64))) V;
+
+V g;
+
+U
+foo (void)
+{
+  V v = __builtin_shufflevector (g, g,
+				 0, 1, 2, 0, 5, 1, 0, 1, 3, 2, 3, 0, 4, 3, 1, 2,
+				 2, 0, 4, 2, 3, 1, 1, 2, 3, 4, 1, 1, 0, 0, 5, 2,
+				 0, 3, 3, 3, 3, 4, 5, 0, 1, 5, 2, 1, 0, 1, 1, 2,
+				 3, 2, 0, 5, 4, 5, 1, 0, 1, 4, 4, 3, 4, 5, 2, 0);
+  v ^= 255;
+  V w = v + g;
+  U u = ((union { V a; U b; }) w).b + ((union { V a; U b; }) w).b[1];
+  return u;
+}
+
+/* { dg-final { scan-assembler-not "\.byte\[ \t\]+-1\n" } } */