[i386] Remove copysign post_reload splitter for scalar modes.

Message ID 20210909075420.2442868-1-hongtao.liu@intel.com
State New
Headers show
Series
  • [i386] Remove copysign post_reload splitter for scalar modes.
Related show

Commit Message

Harald Anlauf via Gcc-patches Sept. 9, 2021, 7:54 a.m.
Hi:
  As a follow up of [1], the patch removes all scalar mode copysign related
post_reload splitter/define_insn and expand copysign directly into below using
paradoxical subregs.

  op3 = op1 & ~mask;
  op4 = op2 & mask;
  dest = op3 | op4;

It can sometimes generate better code just like avx512dq-abs-copysign-1.c
shows.

  Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.

gcc/ChangeLog:

	* config/i386/i386-expand.c (ix86_expand_copysign): Expand
	right into ANDNOT + AND + IOR, using paradoxical subregs.
	(ix86_split_copysign_const): Remove.
	(ix86_split_copysign_var): Ditto.
	* config/i386/i386-protos.h (ix86_split_copysign_const): Dotto.
	(ix86_split_copysign_var): Ditto.
	* config/i386/i386.md (@copysign<mode>3_const): Ditto.
	(@copysign<mode>3_var): Ditto.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/avx512dq-abs-copysign-1.c: Adjust testcase.
	* gcc.target/i386/avx512vl-abs-copysign-1.c: Adjust testcase.
---
 gcc/config/i386/i386-expand.c                 | 152 +++---------------
 gcc/config/i386/i386-protos.h                 |   2 -
 gcc/config/i386/i386.md                       |  44 -----
 .../gcc.target/i386/avx512dq-abs-copysign-1.c |   4 +-
 .../gcc.target/i386/avx512vl-abs-copysign-1.c |   4 +-
 5 files changed, 30 insertions(+), 176 deletions(-)

-- 
2.27.0

Comments

Harald Anlauf via Gcc-patches Sept. 9, 2021, 8 a.m. | #1
On Thu, Sep 9, 2021 at 3:54 PM liuhongt <hongtao.liu@intel.com> wrote:
>

> Hi:

>   As a follow up of [1], the patch removes all scalar mode copysign related

> post_reload splitter/define_insn and expand copysign directly into below using

> paradoxical subregs.


[1] https://gcc.gnu.org/pipermail/gcc-patches/2021-September/579057.html
>

>   op3 = op1 & ~mask;

>   op4 = op2 & mask;

>   dest = op3 | op4;

>

> It can sometimes generate better code just like avx512dq-abs-copysign-1.c

> shows.

>

>   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.

>

> gcc/ChangeLog:

>

>         * config/i386/i386-expand.c (ix86_expand_copysign): Expand

>         right into ANDNOT + AND + IOR, using paradoxical subregs.

>         (ix86_split_copysign_const): Remove.

>         (ix86_split_copysign_var): Ditto.

>         * config/i386/i386-protos.h (ix86_split_copysign_const): Dotto.

>         (ix86_split_copysign_var): Ditto.

>         * config/i386/i386.md (@copysign<mode>3_const): Ditto.

>         (@copysign<mode>3_var): Ditto.

>

> gcc/testsuite/ChangeLog:

>

>         * gcc.target/i386/avx512dq-abs-copysign-1.c: Adjust testcase.

>         * gcc.target/i386/avx512vl-abs-copysign-1.c: Adjust testcase.

> ---

>  gcc/config/i386/i386-expand.c                 | 152 +++---------------

>  gcc/config/i386/i386-protos.h                 |   2 -

>  gcc/config/i386/i386.md                       |  44 -----

>  .../gcc.target/i386/avx512dq-abs-copysign-1.c |   4 +-

>  .../gcc.target/i386/avx512vl-abs-copysign-1.c |   4 +-

>  5 files changed, 30 insertions(+), 176 deletions(-)

>

> diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c

> index badbacc19d8..a0262a8f47d 100644

> --- a/gcc/config/i386/i386-expand.c

> +++ b/gcc/config/i386/i386-expand.c

> @@ -2115,13 +2115,9 @@ void

>  ix86_expand_copysign (rtx operands[])

>  {

>    machine_mode mode, vmode;

> -  rtx dest, op0, op1, mask;

> +  rtx dest, op0, op1, mask, op2, op3;

>

> -  dest = operands[0];

> -  op0 = operands[1];

> -  op1 = operands[2];

> -

> -  mode = GET_MODE (dest);

> +  mode = GET_MODE (operands[0]);

>

>    if (mode == SFmode)

>      vmode = V4SFmode;

> @@ -2132,136 +2128,40 @@ ix86_expand_copysign (rtx operands[])

>    else

>      gcc_unreachable ();

>

> -  mask = ix86_build_signbit_mask (vmode, 0, 0);

> -

> -  if (CONST_DOUBLE_P (op0))

> +  if (rtx_equal_p (operands[1], operands[2]))

>      {

> -      if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))

> -       op0 = simplify_unary_operation (ABS, mode, op0, mode);

> -

> -      if (mode == SFmode || mode == DFmode)

> -       {

> -         if (op0 == CONST0_RTX (mode))

> -           op0 = CONST0_RTX (vmode);

> -         else

> -           {

> -             rtx v = ix86_build_const_vector (vmode, false, op0);

> -

> -             op0 = force_reg (vmode, v);

> -           }

> -       }

> -      else if (op0 != CONST0_RTX (mode))

> -       op0 = force_reg (mode, op0);

> -

> -      emit_insn (gen_copysign3_const (mode, dest, op0, op1, mask));

> -    }

> -  else

> -    {

> -      rtx nmask = ix86_build_signbit_mask (vmode, 0, 1);

> -

> -      emit_insn (gen_copysign3_var

> -                (mode, dest, NULL_RTX, op0, op1, nmask, mask));

> -    }

> -}

> -

> -/* Deconstruct a copysign operation into bit masks.  Operand 0 is known to

> -   be a constant, and so has already been expanded into a vector constant.  */

> -

> -void

> -ix86_split_copysign_const (rtx operands[])

> -{

> -  machine_mode mode, vmode;

> -  rtx dest, op0, mask, x;

> -

> -  dest = operands[0];

> -  op0 = operands[1];

> -  mask = operands[3];

> -

> -  mode = GET_MODE (dest);

> -  vmode = GET_MODE (mask);

> -

> -  dest = lowpart_subreg (vmode, dest, mode);

> -  x = gen_rtx_AND (vmode, dest, mask);

> -  emit_insn (gen_rtx_SET (dest, x));

> -

> -  if (op0 != CONST0_RTX (vmode))

> -    {

> -      x = gen_rtx_IOR (vmode, dest, op0);

> -      emit_insn (gen_rtx_SET (dest, x));

> -    }

> -}

> -

> -/* Deconstruct a copysign operation into bit masks.  Operand 0 is variable,

> -   so we have to do two masks.  */

> -

> -void

> -ix86_split_copysign_var (rtx operands[])

> -{

> -  machine_mode mode, vmode;

> -  rtx dest, scratch, op0, op1, mask, nmask, x;

> -

> -  dest = operands[0];

> -  scratch = operands[1];

> -  op0 = operands[2];

> -  op1 = operands[3];

> -  nmask = operands[4];

> -  mask = operands[5];

> -

> -  mode = GET_MODE (dest);

> -  vmode = GET_MODE (mask);

> -

> -  if (rtx_equal_p (op0, op1))

> -    {

> -      /* Shouldn't happen often (it's useless, obviously), but when it does

> -        we'd generate incorrect code if we continue below.  */

> -      emit_move_insn (dest, op0);

> +      emit_move_insn (operands[0], operands[1]);

>        return;

>      }

>

> -  if (REG_P (mask) && REGNO (dest) == REGNO (mask))    /* alternative 0 */

> -    {

> -      gcc_assert (REGNO (op1) == REGNO (scratch));

> -

> -      x = gen_rtx_AND (vmode, scratch, mask);

> -      emit_insn (gen_rtx_SET (scratch, x));

> +  dest = lowpart_subreg (vmode, operands[0], mode);

> +  op1 = lowpart_subreg (vmode, operands[2], mode);

> +  mask = ix86_build_signbit_mask (vmode, 0, 0);

>

> -      dest = mask;

> -      op0 = lowpart_subreg (vmode, op0, mode);

> -      x = gen_rtx_NOT (vmode, dest);

> -      x = gen_rtx_AND (vmode, x, op0);

> -      emit_insn (gen_rtx_SET (dest, x));

> -    }

> -  else

> +  if (CONST_DOUBLE_P (operands[1]))

>      {

> -      if (REGNO (op1) == REGNO (scratch))              /* alternative 1,3 */

> -       {

> -         x = gen_rtx_AND (vmode, scratch, mask);

> -       }

> -      else                                             /* alternative 2,4 */

> +      op0 = simplify_unary_operation (ABS, mode, operands[1], mode);

> +      /* Optimize for 0, simplify b = copy_signf (0.0f, a) to b = mask & a.  */

> +      if (op0 == CONST0_RTX (mode))

>         {

> -          gcc_assert (REGNO (mask) == REGNO (scratch));

> -          op1 = lowpart_subreg (vmode, op1, mode);

> -         x = gen_rtx_AND (vmode, scratch, op1);

> +         emit_move_insn (dest, gen_rtx_AND (vmode, mask, op1));

> +         return;

>         }

> -      emit_insn (gen_rtx_SET (scratch, x));

>

> -      if (REGNO (op0) == REGNO (dest))                 /* alternative 1,2 */

> -       {

> -         dest = lowpart_subreg (vmode, op0, mode);

> -         x = gen_rtx_AND (vmode, dest, nmask);

> -       }

> -      else                                             /* alternative 3,4 */

> -       {

> -          gcc_assert (REGNO (nmask) == REGNO (dest));

> -         dest = nmask;

> -         op0 = lowpart_subreg (vmode, op0, mode);

> -         x = gen_rtx_AND (vmode, dest, op0);

> -       }

> -      emit_insn (gen_rtx_SET (dest, x));

> +      if (GET_MODE_SIZE (mode) < 16)

> +       op0 = ix86_build_const_vector (vmode, false, op0);

> +      op0 = force_reg (vmode, op0);

>      }

> -

> -  x = gen_rtx_IOR (vmode, dest, scratch);

> -  emit_insn (gen_rtx_SET (dest, x));

> +  else

> +    op0 = lowpart_subreg (vmode, operands[1], mode);

> +

> +  op2 = gen_reg_rtx (vmode);

> +  op3 = gen_reg_rtx (vmode);

> +  emit_move_insn (op2, gen_rtx_AND (vmode,

> +                                   gen_rtx_NOT (vmode, mask),

> +                                   op0));

> +  emit_move_insn (op3, gen_rtx_AND (vmode, mask, op1));

> +  emit_move_insn (dest, gen_rtx_IOR (vmode, op2, op3));

>  }

>

>  /* Expand an xorsign operation.  */

> diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h

> index 72644e33a92..dcae34b915e 100644

> --- a/gcc/config/i386/i386-protos.h

> +++ b/gcc/config/i386/i386-protos.h

> @@ -135,8 +135,6 @@ extern void ix86_expand_fp_absneg_operator (enum rtx_code, machine_mode,

>  extern void ix86_split_fp_absneg_operator (enum rtx_code, machine_mode,

>                                            rtx[]);

>  extern void ix86_expand_copysign (rtx []);

> -extern void ix86_split_copysign_const (rtx []);

> -extern void ix86_split_copysign_var (rtx []);

>  extern void ix86_expand_xorsign (rtx []);

>  extern bool ix86_unary_operator_ok (enum rtx_code, machine_mode, rtx[]);

>  extern bool ix86_match_ccmode (rtx, machine_mode);

> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md

> index 6b4ceb2bce3..ba0058dad81 100644

> --- a/gcc/config/i386/i386.md

> +++ b/gcc/config/i386/i386.md

> @@ -10861,50 +10861,6 @@ (define_expand "copysign<mode>3"

>     || (TARGET_SSE && (<MODE>mode == TFmode))"

>    "ix86_expand_copysign (operands); DONE;")

>

> -(define_insn_and_split "@copysign<mode>3_const"

> -  [(set (match_operand:SSEMODEF 0 "register_operand" "=Yv")

> -       (unspec:SSEMODEF

> -         [(match_operand:<ssevecmodef> 1 "nonimm_or_0_operand" "YvmC")

> -          (match_operand:SSEMODEF 2 "register_operand" "0")

> -          (match_operand:<ssevecmodef> 3 "nonimmediate_operand" "Yvm")]

> -         UNSPEC_COPYSIGN))]

> -  "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)

> -   || (TARGET_SSE && (<MODE>mode == TFmode))"

> -  "#"

> -  "&& reload_completed"

> -  [(const_int 0)]

> -  "ix86_split_copysign_const (operands); DONE;")

> -

> -(define_insn "@copysign<mode>3_var"

> -  [(set (match_operand:SSEMODEF 0 "register_operand" "=Yv,Yv,Yv,Yv,Yv")

> -       (unspec:SSEMODEF

> -         [(match_operand:SSEMODEF 2 "register_operand" "Yv,0,0,Yv,Yv")

> -          (match_operand:SSEMODEF 3 "register_operand" "1,1,Yv,1,Yv")

> -          (match_operand:<ssevecmodef> 4

> -            "nonimmediate_operand" "X,Yvm,Yvm,0,0")

> -          (match_operand:<ssevecmodef> 5

> -            "nonimmediate_operand" "0,Yvm,1,Yvm,1")]

> -         UNSPEC_COPYSIGN))

> -   (clobber (match_scratch:<ssevecmodef> 1 "=Yv,Yv,Yv,Yv,Yv"))]

> -  "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)

> -   || (TARGET_SSE && (<MODE>mode == TFmode))"

> -  "#")

> -

> -(define_split

> -  [(set (match_operand:SSEMODEF 0 "register_operand")

> -       (unspec:SSEMODEF

> -         [(match_operand:SSEMODEF 2 "register_operand")

> -          (match_operand:SSEMODEF 3 "register_operand")

> -          (match_operand:<ssevecmodef> 4)

> -          (match_operand:<ssevecmodef> 5)]

> -         UNSPEC_COPYSIGN))

> -   (clobber (match_scratch:<ssevecmodef> 1))]

> -  "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)

> -    || (TARGET_SSE && (<MODE>mode == TFmode)))

> -   && reload_completed"

> -  [(const_int 0)]

> -  "ix86_split_copysign_var (operands); DONE;")

> -

>  (define_expand "xorsign<mode>3"

>    [(match_operand:MODEF 0 "register_operand")

>     (match_operand:MODEF 1 "register_operand")

> diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c

> index cb542d09058..0107df7741a 100644

> --- a/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c

> +++ b/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c

> @@ -64,8 +64,8 @@ f6 (double x)

>  }

>

>  /* { dg-final { scan-assembler "vandps\[^\n\r\]*xmm16" } } */

> -/* { dg-final { scan-assembler "vorps\[^\n\r\]*xmm16" } } */

> +/* { dg-final { scan-assembler "vpternlogd\[^\n\r\]*xmm16" } } */

>  /* { dg-final { scan-assembler "vxorps\[^\n\r\]*xmm16" } } */

>  /* { dg-final { scan-assembler "vandpd\[^\n\r\]*xmm18" } } */

> -/* { dg-final { scan-assembler "vorpd\[^\n\r\]*xmm18" } } */

> +/* { dg-final { scan-assembler "vpternlogq\[^\n\r\]*xmm18" } } */

>  /* { dg-final { scan-assembler "vxorpd\[^\n\r\]*xmm18" } } */

> diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c

> index b375c5fad80..b27335b9d99 100644

> --- a/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c

> +++ b/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c

> @@ -64,8 +64,8 @@ f6 (double x)

>  }

>

>  /* { dg-final { scan-assembler "vpandd\[^\n\r\]*xmm16" } } */

> -/* { dg-final { scan-assembler "vpord\[^\n\r\]*xmm16" } } */

> +/* { dg-final { scan-assembler "vpternlogd\[^\n\r\]*xmm16" } } */

>  /* { dg-final { scan-assembler "vpxord\[^\n\r\]*xmm16" } } */

>  /* { dg-final { scan-assembler "vpandq\[^\n\r\]*xmm18" } } */

> -/* { dg-final { scan-assembler "vporq\[^\n\r\]*xmm18" } } */

> +/* { dg-final { scan-assembler "vpternlogq\[^\n\r\]*xmm18" } } */

>  /* { dg-final { scan-assembler "vpxorq\[^\n\r\]*xmm18" } } */

> --

> 2.27.0

>



-- 
BR,
Hongtao
Harald Anlauf via Gcc-patches Sept. 10, 2021, 5:04 a.m. | #2
On Thu, Sep 9, 2021 at 4:00 PM Hongtao Liu <crazylht@gmail.com> wrote:
>

> On Thu, Sep 9, 2021 at 3:54 PM liuhongt <hongtao.liu@intel.com> wrote:

> >

> > Hi:

> >   As a follow up of [1], the patch removes all scalar mode copysign related

> > post_reload splitter/define_insn and expand copysign directly into below using

> > paradoxical subregs.

>

> [1] https://gcc.gnu.org/pipermail/gcc-patches/2021-September/579057.html

> >

> >   op3 = op1 & ~mask;

> >   op4 = op2 & mask;

> >   dest = op3 | op4;

> >

> > It can sometimes generate better code just like avx512dq-abs-copysign-1.c

> > shows.

> >

> >   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.

> >

Committed.
> > gcc/ChangeLog:

> >

> >         * config/i386/i386-expand.c (ix86_expand_copysign): Expand

> >         right into ANDNOT + AND + IOR, using paradoxical subregs.

> >         (ix86_split_copysign_const): Remove.

> >         (ix86_split_copysign_var): Ditto.

> >         * config/i386/i386-protos.h (ix86_split_copysign_const): Dotto.

> >         (ix86_split_copysign_var): Ditto.

> >         * config/i386/i386.md (@copysign<mode>3_const): Ditto.

> >         (@copysign<mode>3_var): Ditto.

> >

> > gcc/testsuite/ChangeLog:

> >

> >         * gcc.target/i386/avx512dq-abs-copysign-1.c: Adjust testcase.

> >         * gcc.target/i386/avx512vl-abs-copysign-1.c: Adjust testcase.

> > ---

> >  gcc/config/i386/i386-expand.c                 | 152 +++---------------

> >  gcc/config/i386/i386-protos.h                 |   2 -

> >  gcc/config/i386/i386.md                       |  44 -----

> >  .../gcc.target/i386/avx512dq-abs-copysign-1.c |   4 +-

> >  .../gcc.target/i386/avx512vl-abs-copysign-1.c |   4 +-

> >  5 files changed, 30 insertions(+), 176 deletions(-)

> >

> > diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c

> > index badbacc19d8..a0262a8f47d 100644

> > --- a/gcc/config/i386/i386-expand.c

> > +++ b/gcc/config/i386/i386-expand.c

> > @@ -2115,13 +2115,9 @@ void

> >  ix86_expand_copysign (rtx operands[])

> >  {

> >    machine_mode mode, vmode;

> > -  rtx dest, op0, op1, mask;

> > +  rtx dest, op0, op1, mask, op2, op3;

> >

> > -  dest = operands[0];

> > -  op0 = operands[1];

> > -  op1 = operands[2];

> > -

> > -  mode = GET_MODE (dest);

> > +  mode = GET_MODE (operands[0]);

> >

> >    if (mode == SFmode)

> >      vmode = V4SFmode;

> > @@ -2132,136 +2128,40 @@ ix86_expand_copysign (rtx operands[])

> >    else

> >      gcc_unreachable ();

> >

> > -  mask = ix86_build_signbit_mask (vmode, 0, 0);

> > -

> > -  if (CONST_DOUBLE_P (op0))

> > +  if (rtx_equal_p (operands[1], operands[2]))

> >      {

> > -      if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))

> > -       op0 = simplify_unary_operation (ABS, mode, op0, mode);

> > -

> > -      if (mode == SFmode || mode == DFmode)

> > -       {

> > -         if (op0 == CONST0_RTX (mode))

> > -           op0 = CONST0_RTX (vmode);

> > -         else

> > -           {

> > -             rtx v = ix86_build_const_vector (vmode, false, op0);

> > -

> > -             op0 = force_reg (vmode, v);

> > -           }

> > -       }

> > -      else if (op0 != CONST0_RTX (mode))

> > -       op0 = force_reg (mode, op0);

> > -

> > -      emit_insn (gen_copysign3_const (mode, dest, op0, op1, mask));

> > -    }

> > -  else

> > -    {

> > -      rtx nmask = ix86_build_signbit_mask (vmode, 0, 1);

> > -

> > -      emit_insn (gen_copysign3_var

> > -                (mode, dest, NULL_RTX, op0, op1, nmask, mask));

> > -    }

> > -}

> > -

> > -/* Deconstruct a copysign operation into bit masks.  Operand 0 is known to

> > -   be a constant, and so has already been expanded into a vector constant.  */

> > -

> > -void

> > -ix86_split_copysign_const (rtx operands[])

> > -{

> > -  machine_mode mode, vmode;

> > -  rtx dest, op0, mask, x;

> > -

> > -  dest = operands[0];

> > -  op0 = operands[1];

> > -  mask = operands[3];

> > -

> > -  mode = GET_MODE (dest);

> > -  vmode = GET_MODE (mask);

> > -

> > -  dest = lowpart_subreg (vmode, dest, mode);

> > -  x = gen_rtx_AND (vmode, dest, mask);

> > -  emit_insn (gen_rtx_SET (dest, x));

> > -

> > -  if (op0 != CONST0_RTX (vmode))

> > -    {

> > -      x = gen_rtx_IOR (vmode, dest, op0);

> > -      emit_insn (gen_rtx_SET (dest, x));

> > -    }

> > -}

> > -

> > -/* Deconstruct a copysign operation into bit masks.  Operand 0 is variable,

> > -   so we have to do two masks.  */

> > -

> > -void

> > -ix86_split_copysign_var (rtx operands[])

> > -{

> > -  machine_mode mode, vmode;

> > -  rtx dest, scratch, op0, op1, mask, nmask, x;

> > -

> > -  dest = operands[0];

> > -  scratch = operands[1];

> > -  op0 = operands[2];

> > -  op1 = operands[3];

> > -  nmask = operands[4];

> > -  mask = operands[5];

> > -

> > -  mode = GET_MODE (dest);

> > -  vmode = GET_MODE (mask);

> > -

> > -  if (rtx_equal_p (op0, op1))

> > -    {

> > -      /* Shouldn't happen often (it's useless, obviously), but when it does

> > -        we'd generate incorrect code if we continue below.  */

> > -      emit_move_insn (dest, op0);

> > +      emit_move_insn (operands[0], operands[1]);

> >        return;

> >      }

> >

> > -  if (REG_P (mask) && REGNO (dest) == REGNO (mask))    /* alternative 0 */

> > -    {

> > -      gcc_assert (REGNO (op1) == REGNO (scratch));

> > -

> > -      x = gen_rtx_AND (vmode, scratch, mask);

> > -      emit_insn (gen_rtx_SET (scratch, x));

> > +  dest = lowpart_subreg (vmode, operands[0], mode);

> > +  op1 = lowpart_subreg (vmode, operands[2], mode);

> > +  mask = ix86_build_signbit_mask (vmode, 0, 0);

> >

> > -      dest = mask;

> > -      op0 = lowpart_subreg (vmode, op0, mode);

> > -      x = gen_rtx_NOT (vmode, dest);

> > -      x = gen_rtx_AND (vmode, x, op0);

> > -      emit_insn (gen_rtx_SET (dest, x));

> > -    }

> > -  else

> > +  if (CONST_DOUBLE_P (operands[1]))

> >      {

> > -      if (REGNO (op1) == REGNO (scratch))              /* alternative 1,3 */

> > -       {

> > -         x = gen_rtx_AND (vmode, scratch, mask);

> > -       }

> > -      else                                             /* alternative 2,4 */

> > +      op0 = simplify_unary_operation (ABS, mode, operands[1], mode);

> > +      /* Optimize for 0, simplify b = copy_signf (0.0f, a) to b = mask & a.  */

> > +      if (op0 == CONST0_RTX (mode))

> >         {

> > -          gcc_assert (REGNO (mask) == REGNO (scratch));

> > -          op1 = lowpart_subreg (vmode, op1, mode);

> > -         x = gen_rtx_AND (vmode, scratch, op1);

> > +         emit_move_insn (dest, gen_rtx_AND (vmode, mask, op1));

> > +         return;

> >         }

> > -      emit_insn (gen_rtx_SET (scratch, x));

> >

> > -      if (REGNO (op0) == REGNO (dest))                 /* alternative 1,2 */

> > -       {

> > -         dest = lowpart_subreg (vmode, op0, mode);

> > -         x = gen_rtx_AND (vmode, dest, nmask);

> > -       }

> > -      else                                             /* alternative 3,4 */

> > -       {

> > -          gcc_assert (REGNO (nmask) == REGNO (dest));

> > -         dest = nmask;

> > -         op0 = lowpart_subreg (vmode, op0, mode);

> > -         x = gen_rtx_AND (vmode, dest, op0);

> > -       }

> > -      emit_insn (gen_rtx_SET (dest, x));

> > +      if (GET_MODE_SIZE (mode) < 16)

> > +       op0 = ix86_build_const_vector (vmode, false, op0);

> > +      op0 = force_reg (vmode, op0);

> >      }

> > -

> > -  x = gen_rtx_IOR (vmode, dest, scratch);

> > -  emit_insn (gen_rtx_SET (dest, x));

> > +  else

> > +    op0 = lowpart_subreg (vmode, operands[1], mode);

> > +

> > +  op2 = gen_reg_rtx (vmode);

> > +  op3 = gen_reg_rtx (vmode);

> > +  emit_move_insn (op2, gen_rtx_AND (vmode,

> > +                                   gen_rtx_NOT (vmode, mask),

> > +                                   op0));

> > +  emit_move_insn (op3, gen_rtx_AND (vmode, mask, op1));

> > +  emit_move_insn (dest, gen_rtx_IOR (vmode, op2, op3));

> >  }

> >

> >  /* Expand an xorsign operation.  */

> > diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h

> > index 72644e33a92..dcae34b915e 100644

> > --- a/gcc/config/i386/i386-protos.h

> > +++ b/gcc/config/i386/i386-protos.h

> > @@ -135,8 +135,6 @@ extern void ix86_expand_fp_absneg_operator (enum rtx_code, machine_mode,

> >  extern void ix86_split_fp_absneg_operator (enum rtx_code, machine_mode,

> >                                            rtx[]);

> >  extern void ix86_expand_copysign (rtx []);

> > -extern void ix86_split_copysign_const (rtx []);

> > -extern void ix86_split_copysign_var (rtx []);

> >  extern void ix86_expand_xorsign (rtx []);

> >  extern bool ix86_unary_operator_ok (enum rtx_code, machine_mode, rtx[]);

> >  extern bool ix86_match_ccmode (rtx, machine_mode);

> > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md

> > index 6b4ceb2bce3..ba0058dad81 100644

> > --- a/gcc/config/i386/i386.md

> > +++ b/gcc/config/i386/i386.md

> > @@ -10861,50 +10861,6 @@ (define_expand "copysign<mode>3"

> >     || (TARGET_SSE && (<MODE>mode == TFmode))"

> >    "ix86_expand_copysign (operands); DONE;")

> >

> > -(define_insn_and_split "@copysign<mode>3_const"

> > -  [(set (match_operand:SSEMODEF 0 "register_operand" "=Yv")

> > -       (unspec:SSEMODEF

> > -         [(match_operand:<ssevecmodef> 1 "nonimm_or_0_operand" "YvmC")

> > -          (match_operand:SSEMODEF 2 "register_operand" "0")

> > -          (match_operand:<ssevecmodef> 3 "nonimmediate_operand" "Yvm")]

> > -         UNSPEC_COPYSIGN))]

> > -  "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)

> > -   || (TARGET_SSE && (<MODE>mode == TFmode))"

> > -  "#"

> > -  "&& reload_completed"

> > -  [(const_int 0)]

> > -  "ix86_split_copysign_const (operands); DONE;")

> > -

> > -(define_insn "@copysign<mode>3_var"

> > -  [(set (match_operand:SSEMODEF 0 "register_operand" "=Yv,Yv,Yv,Yv,Yv")

> > -       (unspec:SSEMODEF

> > -         [(match_operand:SSEMODEF 2 "register_operand" "Yv,0,0,Yv,Yv")

> > -          (match_operand:SSEMODEF 3 "register_operand" "1,1,Yv,1,Yv")

> > -          (match_operand:<ssevecmodef> 4

> > -            "nonimmediate_operand" "X,Yvm,Yvm,0,0")

> > -          (match_operand:<ssevecmodef> 5

> > -            "nonimmediate_operand" "0,Yvm,1,Yvm,1")]

> > -         UNSPEC_COPYSIGN))

> > -   (clobber (match_scratch:<ssevecmodef> 1 "=Yv,Yv,Yv,Yv,Yv"))]

> > -  "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)

> > -   || (TARGET_SSE && (<MODE>mode == TFmode))"

> > -  "#")

> > -

> > -(define_split

> > -  [(set (match_operand:SSEMODEF 0 "register_operand")

> > -       (unspec:SSEMODEF

> > -         [(match_operand:SSEMODEF 2 "register_operand")

> > -          (match_operand:SSEMODEF 3 "register_operand")

> > -          (match_operand:<ssevecmodef> 4)

> > -          (match_operand:<ssevecmodef> 5)]

> > -         UNSPEC_COPYSIGN))

> > -   (clobber (match_scratch:<ssevecmodef> 1))]

> > -  "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)

> > -    || (TARGET_SSE && (<MODE>mode == TFmode)))

> > -   && reload_completed"

> > -  [(const_int 0)]

> > -  "ix86_split_copysign_var (operands); DONE;")

> > -

> >  (define_expand "xorsign<mode>3"

> >    [(match_operand:MODEF 0 "register_operand")

> >     (match_operand:MODEF 1 "register_operand")

> > diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c

> > index cb542d09058..0107df7741a 100644

> > --- a/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c

> > +++ b/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c

> > @@ -64,8 +64,8 @@ f6 (double x)

> >  }

> >

> >  /* { dg-final { scan-assembler "vandps\[^\n\r\]*xmm16" } } */

> > -/* { dg-final { scan-assembler "vorps\[^\n\r\]*xmm16" } } */

> > +/* { dg-final { scan-assembler "vpternlogd\[^\n\r\]*xmm16" } } */

> >  /* { dg-final { scan-assembler "vxorps\[^\n\r\]*xmm16" } } */

> >  /* { dg-final { scan-assembler "vandpd\[^\n\r\]*xmm18" } } */

> > -/* { dg-final { scan-assembler "vorpd\[^\n\r\]*xmm18" } } */

> > +/* { dg-final { scan-assembler "vpternlogq\[^\n\r\]*xmm18" } } */

> >  /* { dg-final { scan-assembler "vxorpd\[^\n\r\]*xmm18" } } */

> > diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c

> > index b375c5fad80..b27335b9d99 100644

> > --- a/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c

> > +++ b/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c

> > @@ -64,8 +64,8 @@ f6 (double x)

> >  }

> >

> >  /* { dg-final { scan-assembler "vpandd\[^\n\r\]*xmm16" } } */

> > -/* { dg-final { scan-assembler "vpord\[^\n\r\]*xmm16" } } */

> > +/* { dg-final { scan-assembler "vpternlogd\[^\n\r\]*xmm16" } } */

> >  /* { dg-final { scan-assembler "vpxord\[^\n\r\]*xmm16" } } */

> >  /* { dg-final { scan-assembler "vpandq\[^\n\r\]*xmm18" } } */

> > -/* { dg-final { scan-assembler "vporq\[^\n\r\]*xmm18" } } */

> > +/* { dg-final { scan-assembler "vpternlogq\[^\n\r\]*xmm18" } } */

> >  /* { dg-final { scan-assembler "vpxorq\[^\n\r\]*xmm18" } } */

> > --

> > 2.27.0

> >

>

>

> --

> BR,

> Hongtao




-- 
BR,
Hongtao

Patch

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index badbacc19d8..a0262a8f47d 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -2115,13 +2115,9 @@  void
 ix86_expand_copysign (rtx operands[])
 {
   machine_mode mode, vmode;
-  rtx dest, op0, op1, mask;
+  rtx dest, op0, op1, mask, op2, op3;
 
-  dest = operands[0];
-  op0 = operands[1];
-  op1 = operands[2];
-
-  mode = GET_MODE (dest);
+  mode = GET_MODE (operands[0]);
 
   if (mode == SFmode)
     vmode = V4SFmode;
@@ -2132,136 +2128,40 @@  ix86_expand_copysign (rtx operands[])
   else
     gcc_unreachable ();
 
-  mask = ix86_build_signbit_mask (vmode, 0, 0);
-
-  if (CONST_DOUBLE_P (op0))
+  if (rtx_equal_p (operands[1], operands[2]))
     {
-      if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
-	op0 = simplify_unary_operation (ABS, mode, op0, mode);
-
-      if (mode == SFmode || mode == DFmode)
-	{
-	  if (op0 == CONST0_RTX (mode))
-	    op0 = CONST0_RTX (vmode);
-	  else
-	    {
-	      rtx v = ix86_build_const_vector (vmode, false, op0);
-
-	      op0 = force_reg (vmode, v);
-	    }
-	}
-      else if (op0 != CONST0_RTX (mode))
-	op0 = force_reg (mode, op0);
-
-      emit_insn (gen_copysign3_const (mode, dest, op0, op1, mask));
-    }
-  else
-    {
-      rtx nmask = ix86_build_signbit_mask (vmode, 0, 1);
-
-      emit_insn (gen_copysign3_var
-		 (mode, dest, NULL_RTX, op0, op1, nmask, mask));
-    }
-}
-
-/* Deconstruct a copysign operation into bit masks.  Operand 0 is known to
-   be a constant, and so has already been expanded into a vector constant.  */
-
-void
-ix86_split_copysign_const (rtx operands[])
-{
-  machine_mode mode, vmode;
-  rtx dest, op0, mask, x;
-
-  dest = operands[0];
-  op0 = operands[1];
-  mask = operands[3];
-
-  mode = GET_MODE (dest);
-  vmode = GET_MODE (mask);
-
-  dest = lowpart_subreg (vmode, dest, mode);
-  x = gen_rtx_AND (vmode, dest, mask);
-  emit_insn (gen_rtx_SET (dest, x));
-
-  if (op0 != CONST0_RTX (vmode))
-    {
-      x = gen_rtx_IOR (vmode, dest, op0);
-      emit_insn (gen_rtx_SET (dest, x));
-    }
-}
-
-/* Deconstruct a copysign operation into bit masks.  Operand 0 is variable,
-   so we have to do two masks.  */
-
-void
-ix86_split_copysign_var (rtx operands[])
-{
-  machine_mode mode, vmode;
-  rtx dest, scratch, op0, op1, mask, nmask, x;
-
-  dest = operands[0];
-  scratch = operands[1];
-  op0 = operands[2];
-  op1 = operands[3];
-  nmask = operands[4];
-  mask = operands[5];
-
-  mode = GET_MODE (dest);
-  vmode = GET_MODE (mask);
-
-  if (rtx_equal_p (op0, op1))
-    {
-      /* Shouldn't happen often (it's useless, obviously), but when it does
-	 we'd generate incorrect code if we continue below.  */
-      emit_move_insn (dest, op0);
+      emit_move_insn (operands[0], operands[1]);
       return;
     }
 
-  if (REG_P (mask) && REGNO (dest) == REGNO (mask))	/* alternative 0 */
-    {
-      gcc_assert (REGNO (op1) == REGNO (scratch));
-
-      x = gen_rtx_AND (vmode, scratch, mask);
-      emit_insn (gen_rtx_SET (scratch, x));
+  dest = lowpart_subreg (vmode, operands[0], mode);
+  op1 = lowpart_subreg (vmode, operands[2], mode);
+  mask = ix86_build_signbit_mask (vmode, 0, 0);
 
-      dest = mask;
-      op0 = lowpart_subreg (vmode, op0, mode);
-      x = gen_rtx_NOT (vmode, dest);
-      x = gen_rtx_AND (vmode, x, op0);
-      emit_insn (gen_rtx_SET (dest, x));
-    }
-  else
+  if (CONST_DOUBLE_P (operands[1]))
     {
-      if (REGNO (op1) == REGNO (scratch))		/* alternative 1,3 */
-	{
-	  x = gen_rtx_AND (vmode, scratch, mask);
-	}
-      else						/* alternative 2,4 */
+      op0 = simplify_unary_operation (ABS, mode, operands[1], mode);
+      /* Optimize for 0, simplify b = copy_signf (0.0f, a) to b = mask & a.  */
+      if (op0 == CONST0_RTX (mode))
 	{
-          gcc_assert (REGNO (mask) == REGNO (scratch));
-          op1 = lowpart_subreg (vmode, op1, mode);
-	  x = gen_rtx_AND (vmode, scratch, op1);
+	  emit_move_insn (dest, gen_rtx_AND (vmode, mask, op1));
+	  return;
 	}
-      emit_insn (gen_rtx_SET (scratch, x));
 
-      if (REGNO (op0) == REGNO (dest))			/* alternative 1,2 */
-	{
-	  dest = lowpart_subreg (vmode, op0, mode);
-	  x = gen_rtx_AND (vmode, dest, nmask);
-	}
-      else						/* alternative 3,4 */
-	{
-          gcc_assert (REGNO (nmask) == REGNO (dest));
-	  dest = nmask;
-	  op0 = lowpart_subreg (vmode, op0, mode);
-	  x = gen_rtx_AND (vmode, dest, op0);
-	}
-      emit_insn (gen_rtx_SET (dest, x));
+      if (GET_MODE_SIZE (mode) < 16)
+	op0 = ix86_build_const_vector (vmode, false, op0);
+      op0 = force_reg (vmode, op0);
     }
-
-  x = gen_rtx_IOR (vmode, dest, scratch);
-  emit_insn (gen_rtx_SET (dest, x));
+  else
+    op0 = lowpart_subreg (vmode, operands[1], mode);
+
+  op2 = gen_reg_rtx (vmode);
+  op3 = gen_reg_rtx (vmode);
+  emit_move_insn (op2, gen_rtx_AND (vmode,
+				    gen_rtx_NOT (vmode, mask),
+				    op0));
+  emit_move_insn (op3, gen_rtx_AND (vmode, mask, op1));
+  emit_move_insn (dest, gen_rtx_IOR (vmode, op2, op3));
 }
 
 /* Expand an xorsign operation.  */
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 72644e33a92..dcae34b915e 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -135,8 +135,6 @@  extern void ix86_expand_fp_absneg_operator (enum rtx_code, machine_mode,
 extern void ix86_split_fp_absneg_operator (enum rtx_code, machine_mode,
 					   rtx[]);
 extern void ix86_expand_copysign (rtx []);
-extern void ix86_split_copysign_const (rtx []);
-extern void ix86_split_copysign_var (rtx []);
 extern void ix86_expand_xorsign (rtx []);
 extern bool ix86_unary_operator_ok (enum rtx_code, machine_mode, rtx[]);
 extern bool ix86_match_ccmode (rtx, machine_mode);
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 6b4ceb2bce3..ba0058dad81 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -10861,50 +10861,6 @@  (define_expand "copysign<mode>3"
    || (TARGET_SSE && (<MODE>mode == TFmode))"
   "ix86_expand_copysign (operands); DONE;")
 
-(define_insn_and_split "@copysign<mode>3_const"
-  [(set (match_operand:SSEMODEF 0 "register_operand" "=Yv")
-	(unspec:SSEMODEF
-	  [(match_operand:<ssevecmodef> 1 "nonimm_or_0_operand" "YvmC")
-	   (match_operand:SSEMODEF 2 "register_operand" "0")
-	   (match_operand:<ssevecmodef> 3 "nonimmediate_operand" "Yvm")]
-	  UNSPEC_COPYSIGN))]
-  "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
-   || (TARGET_SSE && (<MODE>mode == TFmode))"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
-  "ix86_split_copysign_const (operands); DONE;")
-
-(define_insn "@copysign<mode>3_var"
-  [(set (match_operand:SSEMODEF 0 "register_operand" "=Yv,Yv,Yv,Yv,Yv")
-	(unspec:SSEMODEF
-	  [(match_operand:SSEMODEF 2 "register_operand"	"Yv,0,0,Yv,Yv")
-	   (match_operand:SSEMODEF 3 "register_operand"	"1,1,Yv,1,Yv")
-	   (match_operand:<ssevecmodef> 4
-	     "nonimmediate_operand" "X,Yvm,Yvm,0,0")
-	   (match_operand:<ssevecmodef> 5
-	     "nonimmediate_operand" "0,Yvm,1,Yvm,1")]
-	  UNSPEC_COPYSIGN))
-   (clobber (match_scratch:<ssevecmodef> 1 "=Yv,Yv,Yv,Yv,Yv"))]
-  "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
-   || (TARGET_SSE && (<MODE>mode == TFmode))"
-  "#")
-
-(define_split
-  [(set (match_operand:SSEMODEF 0 "register_operand")
-	(unspec:SSEMODEF
-	  [(match_operand:SSEMODEF 2 "register_operand")
-	   (match_operand:SSEMODEF 3 "register_operand")
-	   (match_operand:<ssevecmodef> 4)
-	   (match_operand:<ssevecmodef> 5)]
-	  UNSPEC_COPYSIGN))
-   (clobber (match_scratch:<ssevecmodef> 1))]
-  "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
-    || (TARGET_SSE && (<MODE>mode == TFmode)))
-   && reload_completed"
-  [(const_int 0)]
-  "ix86_split_copysign_var (operands); DONE;")
-
 (define_expand "xorsign<mode>3"
   [(match_operand:MODEF 0 "register_operand")
    (match_operand:MODEF 1 "register_operand")
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c
index cb542d09058..0107df7741a 100644
--- a/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c
@@ -64,8 +64,8 @@  f6 (double x)
 }
 
 /* { dg-final { scan-assembler "vandps\[^\n\r\]*xmm16" } } */
-/* { dg-final { scan-assembler "vorps\[^\n\r\]*xmm16" } } */
+/* { dg-final { scan-assembler "vpternlogd\[^\n\r\]*xmm16" } } */
 /* { dg-final { scan-assembler "vxorps\[^\n\r\]*xmm16" } } */
 /* { dg-final { scan-assembler "vandpd\[^\n\r\]*xmm18" } } */
-/* { dg-final { scan-assembler "vorpd\[^\n\r\]*xmm18" } } */
+/* { dg-final { scan-assembler "vpternlogq\[^\n\r\]*xmm18" } } */
 /* { dg-final { scan-assembler "vxorpd\[^\n\r\]*xmm18" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c
index b375c5fad80..b27335b9d99 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c
@@ -64,8 +64,8 @@  f6 (double x)
 }
 
 /* { dg-final { scan-assembler "vpandd\[^\n\r\]*xmm16" } } */
-/* { dg-final { scan-assembler "vpord\[^\n\r\]*xmm16" } } */
+/* { dg-final { scan-assembler "vpternlogd\[^\n\r\]*xmm16" } } */
 /* { dg-final { scan-assembler "vpxord\[^\n\r\]*xmm16" } } */
 /* { dg-final { scan-assembler "vpandq\[^\n\r\]*xmm18" } } */
-/* { dg-final { scan-assembler "vporq\[^\n\r\]*xmm18" } } */
+/* { dg-final { scan-assembler "vpternlogq\[^\n\r\]*xmm18" } } */
 /* { dg-final { scan-assembler "vpxorq\[^\n\r\]*xmm18" } } */