Support logic shift left/right for avx512 mask type.

Message ID 20210720123314.79588-1-hongtao.liu@intel.com
State New
Headers show
Series
  • Support logic shift left/right for avx512 mask type.
Related show

Commit Message

Eugene Rozenfeld via Gcc-patches July 20, 2021, 12:33 p.m.
Hi:
  As mention in https://gcc.gnu.org/pipermail/gcc-patches/2021-July/575420.html

----cut start-----
> note for the lowpart we can just view-convert away the excess bits,

> fully re-using the mask.  We generate surprisingly "good" code:

>

>         kmovb   %k1, %edi

>         shrb    $4, %dil

>         kmovb   %edi, %k2

>

> besides the lack of using kshiftrb.  I guess we're just lacking

> a mask register alternative for

Yes, we can do it similar as kor/kand/kxor.
---cut end--------

  Bootstrap and regtested on x86_64-linux-gnu{-m32,}.
  Ok for trunk?

gcc/ChangeLog:

	* config/i386/constraints.md (Wb): New constraint.
	(Ww): Ditto.
	* config/i386/i386.md (*ashlhi3_1): Extend to avx512 mask
	shift.
	(*ashlqi3_1): Ditto.
	(*<insn><mode>3_1): Ditto.
	(*<insn><mode>3_1): Ditto.
	* config/i386/sse.md (k<code><mode>): New define_split after
	it to convert generic shift pattern to mask shift ones.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/mask-shift.c: New test.
---
 gcc/config/i386/constraints.md             | 10 +++
 gcc/config/i386/i386.md                    | 94 +++++++++++++++-------
 gcc/config/i386/sse.md                     | 14 ++++
 gcc/testsuite/gcc.target/i386/mask-shift.c | 83 +++++++++++++++++++
 4 files changed, 173 insertions(+), 28 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/mask-shift.c

-- 
2.18.1

Comments

Eugene Rozenfeld via Gcc-patches July 20, 2021, 1:40 p.m. | #1
On Tue, Jul 20, 2021 at 2:33 PM liuhongt <hongtao.liu@intel.com> wrote:
>

> Hi:

>   As mention in https://gcc.gnu.org/pipermail/gcc-patches/2021-July/575420.html

>

> ----cut start-----

> > note for the lowpart we can just view-convert away the excess bits,

> > fully re-using the mask.  We generate surprisingly "good" code:

> >

> >         kmovb   %k1, %edi

> >         shrb    $4, %dil

> >         kmovb   %edi, %k2

> >

> > besides the lack of using kshiftrb.  I guess we're just lacking

> > a mask register alternative for

> Yes, we can do it similar as kor/kand/kxor.

> ---cut end--------

>

>   Bootstrap and regtested on x86_64-linux-gnu{-m32,}.

>   Ok for trunk?

>

> gcc/ChangeLog:

>

>         * config/i386/constraints.md (Wb): New constraint.

>         (Ww): Ditto.

>         * config/i386/i386.md (*ashlhi3_1): Extend to avx512 mask

>         shift.

>         (*ashlqi3_1): Ditto.

>         (*<insn><mode>3_1): Ditto.

>         (*<insn><mode>3_1): Ditto.

>         * config/i386/sse.md (k<code><mode>): New define_split after

>         it to convert generic shift pattern to mask shift ones.

>

> gcc/testsuite/ChangeLog:

>

>         * gcc.target/i386/mask-shift.c: New test.

> ---

>  gcc/config/i386/constraints.md             | 10 +++

>  gcc/config/i386/i386.md                    | 94 +++++++++++++++-------

>  gcc/config/i386/sse.md                     | 14 ++++

>  gcc/testsuite/gcc.target/i386/mask-shift.c | 83 +++++++++++++++++++

>  4 files changed, 173 insertions(+), 28 deletions(-)

>  create mode 100644 gcc/testsuite/gcc.target/i386/mask-shift.c

>

> diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md

> index 485e3f5b2cf..4aa28a5621c 100644

> --- a/gcc/config/i386/constraints.md

> +++ b/gcc/config/i386/constraints.md

> @@ -222,6 +222,16 @@ (define_constraint "BC"

>             (match_operand 0 "vector_all_ones_operand"))))

>

>  ;; Integer constant constraints.

> +(define_constraint "Wb"

> +  "Integer constant in the range 0 @dots{} 7, for 8-bit shifts."

> +  (and (match_code "const_int")

> +       (match_test "IN_RANGE (ival, 0, 7)")))

> +

> +(define_constraint "Ww"

> +  "Integer constant in the range 0 @dots{} 15, for 16-bit shifts."

> +  (and (match_code "const_int")

> +       (match_test "IN_RANGE (ival, 0, 15)")))

> +

>  (define_constraint "I"

>    "Integer constant in the range 0 @dots{} 31, for 32-bit shifts."

>    (and (match_code "const_int")

> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md

> index 8b809c49fe0..c5f9bd4d4d8 100644

> --- a/gcc/config/i386/i386.md

> +++ b/gcc/config/i386/i386.md

> @@ -1136,6 +1136,7 @@ (define_mode_attr di [(SI "nF") (DI "Wd")])

>

>  ;; Immediate operand constraint for shifts.

>  (define_mode_attr S [(QI "I") (HI "I") (SI "I") (DI "J") (TI "O")])

> +(define_mode_attr KS [(QI "Wb") (HI "Ww") (SI "I") (DI "J")])

>

>  ;; Print register name in the specified mode.

>  (define_mode_attr k [(QI "b") (HI "w") (SI "k") (DI "q")])

> @@ -11088,9 +11089,9 @@ (define_insn "*bmi2_ashl<mode>3_1"

>     (set_attr "mode" "<MODE>")])

>

>  (define_insn "*ashl<mode>3_1"

> -  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")

> -       (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm")

> -                     (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r")))

> +  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k")

> +       (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k")

> +                     (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r,<KS>")))

>     (clobber (reg:CC FLAGS_REG))]

>    "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)"

>  {

> @@ -11098,6 +11099,7 @@ (define_insn "*ashl<mode>3_1"

>      {

>      case TYPE_LEA:

>      case TYPE_ISHIFTX:

> +    case TYPE_MSKLOG:

>        return "#";

>

>      case TYPE_ALU:

> @@ -11113,7 +11115,11 @@ (define_insn "*ashl<mode>3_1"

>         return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";

>      }

>  }

> -  [(set_attr "isa" "*,*,bmi2")

> +  [(set_attr "isa" "*,*,bmi2,avx512bw")

>     (set (attr "type")

>       (cond [(eq_attr "alternative" "1")

>               (const_string "lea")

> @@ -11123,6 +11129,8 @@ (define_insn "*ashl<mode>3_1"

>                       (match_operand 0 "register_operand"))

>                  (match_operand 2 "const1_operand"))

>               (const_string "alu")

> +           (eq_attr "alternative" "3")

> +             (const_string "msklog")

>            ]

>            (const_string "ishift")))

>     (set (attr "length_immediate")

> @@ -11218,15 +11226,16 @@ (define_split

>    "operands[2] = gen_lowpart (SImode, operands[2]);")

>

>  (define_insn "*ashlhi3_1"

> -  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp")

> -       (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l")

> -                  (match_operand:QI 2 "nonmemory_operand" "cI,M")))

> +  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k")

> +       (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k")

> +                  (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww")))

>     (clobber (reg:CC FLAGS_REG))]

>    "ix86_binary_operator_ok (ASHIFT, HImode, operands)"

>  {

>    switch (get_attr_type (insn))

>      {

>      case TYPE_LEA:

> +    case TYPE_MSKLOG:

>        return "#";

>

>      case TYPE_ALU:

> @@ -11241,9 +11246,12 @@ (define_insn "*ashlhi3_1"

>         return "sal{w}\t{%2, %0|%0, %2}";

>      }

>  }

> -  [(set (attr "type")

> +  [(set_attr "isa" "*,*,avx512f")

> +   (set (attr "type")

>       (cond [(eq_attr "alternative" "1")

>               (const_string "lea")

> +           (eq_attr "alternative" "2")

> +             (const_string "msklog")

>              (and (and (match_test "TARGET_DOUBLE_WITH_ADD")

>                       (match_operand 0 "register_operand"))

>                  (match_operand 2 "const1_operand"))

> @@ -11259,18 +11270,19 @@ (define_insn "*ashlhi3_1"

>                            (match_test "optimize_function_for_size_p (cfun)")))))

>         (const_string "0")

>         (const_string "*")))

> -   (set_attr "mode" "HI,SI")])

> +   (set_attr "mode" "HI,SI,HI")])

>

>  (define_insn "*ashlqi3_1"

> -  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp")

> -       (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l")

> -                  (match_operand:QI 2 "nonmemory_operand" "cI,cI,M")))

> +  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k")

> +       (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k")

> +                  (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb")))

>     (clobber (reg:CC FLAGS_REG))]

>    "ix86_binary_operator_ok (ASHIFT, QImode, operands)"

>  {

>    switch (get_attr_type (insn))

>      {

>      case TYPE_LEA:

> +    case TYPE_MSKLOG:

>        return "#";

>

>      case TYPE_ALU:

> @@ -11298,9 +11307,12 @@ (define_insn "*ashlqi3_1"

>         }

>      }

>  }

> -  [(set (attr "type")

> +  [(set_attr "isa" "*,*,*,avx512dq")

> +   (set (attr "type")

>       (cond [(eq_attr "alternative" "2")

>               (const_string "lea")

> +           (eq_attr "alternative" "3")

> +             (const_string "msklog")

>              (and (and (match_test "TARGET_DOUBLE_WITH_ADD")

>                       (match_operand 0 "register_operand"))

>                  (match_operand 2 "const1_operand"))

> @@ -11316,7 +11334,7 @@ (define_insn "*ashlqi3_1"

>                            (match_test "optimize_function_for_size_p (cfun)")))))

>         (const_string "0")

>         (const_string "*")))

> -   (set_attr "mode" "QI,SI,SI")

> +   (set_attr "mode" "QI,SI,SI,QI")

>     ;; Potential partial reg stall on alternative 1.

>     (set (attr "preferred_for_speed")

>       (cond [(eq_attr "alternative" "1")

> @@ -11819,16 +11837,17 @@ (define_insn "*bmi2_<insn><mode>3_1"

>     (set_attr "mode" "<MODE>")])

>

>  (define_insn "*<insn><mode>3_1"

> -  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")

> +  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,?k")

>         (any_shiftrt:SWI48

> -         (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")

> -         (match_operand:QI 2 "nonmemory_operand" "c<S>,r")))

> +         (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,k")

> +         (match_operand:QI 2 "nonmemory_operand" "c<S>,r,<KS>")))

>     (clobber (reg:CC FLAGS_REG))]

>    "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"

>  {

>    switch (get_attr_type (insn))

>      {

>      case TYPE_ISHIFTX:

> +    case TYPE_MSKLOG:

>        return "#";

>

>      default:

> @@ -11839,11 +11858,16 @@ (define_insn "*<insn><mode>3_1"

>         return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";

>      }

>  }

> -  [(set_attr "isa" "*,bmi2")

> -   (set_attr "type" "ishift,ishiftx")

> +  [(set_attr "isa" "*,bmi2,avx512bw")

> +   (set_attr "type" "ishift,ishiftx,msklog")

> +   (set (attr "enabled")

> +       (if_then_else (eq_attr "alternative" "2")

> +         (symbol_ref "<CODE> == LSHIFTRT && TARGET_AVX512BW")


Please rather split the pattern to ASHIFTRT and LSHIFTRT. The
macroization has no point if we need to use enabled attribute in this
way.

> +         (const_string "*")))

>     (set (attr "length_immediate")

>       (if_then_else

> -       (and (match_operand 2 "const1_operand")

> +       (and (and (match_operand 2 "const1_operand")

> +                (eq_attr "alternative" "0"))

>             (ior (match_test "TARGET_SHIFT1")

>                  (match_test "optimize_function_for_size_p (cfun)")))

>         (const_string "0")

> @@ -11916,27 +11940,41 @@ (define_split

>    "operands[2] = gen_lowpart (SImode, operands[2]);")

>

>  (define_insn "*<insn><mode>3_1"

> -  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m")

> +  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m, ?k")

>         (any_shiftrt:SWI12

> -         (match_operand:SWI12 1 "nonimmediate_operand" "0")

> -         (match_operand:QI 2 "nonmemory_operand" "c<S>")))

> +         (match_operand:SWI12 1 "nonimmediate_operand" "0, k")

> +         (match_operand:QI 2 "nonmemory_operand" "c<S>, <KS>")))

>     (clobber (reg:CC FLAGS_REG))]

>    "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"

>  {

> -  if (operands[2] == const1_rtx

> -      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))

> -    return "<shift>{<imodesuffix>}\t%0";

> -  else

> -    return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";

> +  switch (get_attr_type (insn))

> +    {

> +    case TYPE_ISHIFT:

> +      if (operands[2] == const1_rtx

> +         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))

> +       return "<shift>{<imodesuffix>}\t%0";

> +      else

> +       return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";

> +    case TYPE_MSKLOG:

> +      return "#";

> +    default:

> +      gcc_unreachable ();

> +    }

>  }

> -  [(set_attr "type" "ishift")

> +  [(set_attr "type" "ishift,msklog")

>     (set (attr "length_immediate")

>       (if_then_else

> -       (and (match_operand 2 "const1_operand")

> +       (and (and (match_operand 2 "const1_operand")

> +                (eq_attr "alternative" "0"))

>             (ior (match_test "TARGET_SHIFT1")

>                  (match_test "optimize_function_for_size_p (cfun)")))

>         (const_string "0")

>         (const_string "*")))

> +   (set (attr "enabled")

> +       (if_then_else (eq_attr "alternative" "1")

> +         (symbol_ref "<CODE> == LSHIFTRT && TARGET_AVX512F

> +                      && (<MODE>mode != QImode || TARGET_AVX512DQ)")


Also here, please split out LSHIFTRT and perhaps use conditional
constraint to avoid enabled attribute.

Uros.

> +         (const_string "*")))

>     (set_attr "mode" "<MODE>")])

>

>  (define_insn "*<insn><mode>3_1_slp"

> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md

> index ab29999023d..f8759e4d758 100644

> --- a/gcc/config/i386/sse.md

> +++ b/gcc/config/i386/sse.md

> @@ -1755,6 +1755,20 @@ (define_insn "k<code><mode>"

>     (set_attr "prefix" "vex")

>     (set_attr "mode" "<MODE>")])

>

> +(define_split

> +  [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")

> +       (any_lshift:SWI1248_AVX512BW

> +         (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand")

> +         (match_operand 2 "const_int_operand")))

> +   (clobber (reg:CC FLAGS_REG))]

> +  "TARGET_AVX512F && reload_completed"

> +  [(parallel

> +     [(set (match_dup 0)

> +          (any_lshift:SWI1248_AVX512BW

> +            (match_dup 1)

> +            (match_dup 2)))

> +      (unspec [(const_int 0)] UNSPEC_MASKOP)])])

> +

>  (define_insn "ktest<mode>"

>    [(set (reg:CC FLAGS_REG)

>         (unspec:CC

> diff --git a/gcc/testsuite/gcc.target/i386/mask-shift.c b/gcc/testsuite/gcc.target/i386/mask-shift.c

> new file mode 100644

> index 00000000000..4cb6ef37821

> --- /dev/null

> +++ b/gcc/testsuite/gcc.target/i386/mask-shift.c

> @@ -0,0 +1,83 @@

> +/* { dg-do compile } */

> +/* { dg-options "-mavx512bw -mavx512dq -O2" } */

> +

> +#include<immintrin.h>

> +void

> +fooq (__m512i a, __m512i b, void* p)

> +{

> +  __mmask8 m1 = _mm512_cmpeq_epi64_mask (a, b);

> +  m1 >>= 4;

> +  _mm512_mask_storeu_epi64 (p, m1, a);

> +}

> +

> +/* { dg-final { scan-assembler-times {(?n)kshiftrb} "1" } }  */

> +

> +void

> +food (__m512i a, __m512i b, void* p)

> +{

> +  __mmask16 m1 = _mm512_cmpeq_epi32_mask (a, b);

> +  m1 >>= 8;

> +  _mm512_mask_storeu_epi32 (p, m1, a);

> +}

> +

> +/* { dg-final { scan-assembler-times {(?n)kshiftrw} "1" } }  */

> +

> +void

> +foow (__m512i a, __m512i b, void* p)

> +{

> +  __mmask32 m1 = _mm512_cmpeq_epi16_mask (a, b);

> +  m1 >>= 16;

> +  _mm512_mask_storeu_epi16 (p, m1, a);

> +}

> +

> +/* { dg-final { scan-assembler-times {(?n)kshiftrd} "1" } }  */

> +

> +void

> +foob (__m512i a, __m512i b, void* p)

> +{

> +  __mmask64 m1 = _mm512_cmpeq_epi8_mask (a, b);

> +  m1 >>= 32;

> +  _mm512_mask_storeu_epi8 (p, m1, a);

> +}

> +

> +/* { dg-final { scan-assembler-times {(?n)kshiftrq} "1" { target { ! ia32 } } } }  */

> +

> +void

> +fooq1 (__m512i a, __m512i b, void* p)

> +{

> +  __mmask8 m1 = _mm512_cmpeq_epi64_mask (a, b);

> +  m1 <<= 4;

> +  _mm512_mask_storeu_epi64 (p, m1, a);

> +}

> +

> +/* { dg-final { scan-assembler-times {(?n)kshiftlb} "1" } }  */

> +

> +void

> +food1 (__m512i a, __m512i b, void* p)

> +{

> +  __mmask16 m1 = _mm512_cmpeq_epi32_mask (a, b);

> +  m1 <<= 8;

> +  _mm512_mask_storeu_epi32 (p, m1, a);

> +}

> +

> +/* { dg-final { scan-assembler-times {(?n)kshiftlw} "1" } }  */

> +

> +void

> +foow1 (__m512i a, __m512i b, void* p)

> +{

> +  __mmask32 m1 = _mm512_cmpeq_epi16_mask (a, b);

> +  m1 <<= 16;

> +  _mm512_mask_storeu_epi16 (p, m1, a);

> +}

> +

> +/* { dg-final { scan-assembler-times {(?n)kshiftld} "1" } }  */

> +

> +void

> +foob1 (__m512i a, __m512i b, void* p)

> +{

> +  __mmask64 m1 = _mm512_cmpeq_epi8_mask (a, b);

> +  m1 <<= 32;

> +  _mm512_mask_storeu_epi8 (p, m1, a);

> +}

> +

> +/* { dg-final { scan-assembler-times {(?n)kshiftlq} "1" { target { ! ia32 } } } }  */

> --

> 2.18.1

>
Eugene Rozenfeld via Gcc-patches July 21, 2021, 3:11 a.m. | #2
On Tue, Jul 20, 2021 at 9:41 PM Uros Bizjak <ubizjak@gmail.com> wrote:
>

> On Tue, Jul 20, 2021 at 2:33 PM liuhongt <hongtao.liu@intel.com> wrote:

> >

> > Hi:

> >   As mention in https://gcc.gnu.org/pipermail/gcc-patches/2021-July/575420.html

> >

> > ----cut start-----

> > > note for the lowpart we can just view-convert away the excess bits,

> > > fully re-using the mask.  We generate surprisingly "good" code:

> > >

> > >         kmovb   %k1, %edi

> > >         shrb    $4, %dil

> > >         kmovb   %edi, %k2

> > >

> > > besides the lack of using kshiftrb.  I guess we're just lacking

> > > a mask register alternative for

> > Yes, we can do it similar as kor/kand/kxor.

> > ---cut end--------

> >

> >   Bootstrap and regtested on x86_64-linux-gnu{-m32,}.

> >   Ok for trunk?

> >

> > gcc/ChangeLog:

> >

> >         * config/i386/constraints.md (Wb): New constraint.

> >         (Ww): Ditto.

> >         * config/i386/i386.md (*ashlhi3_1): Extend to avx512 mask

> >         shift.

> >         (*ashlqi3_1): Ditto.

> >         (*<insn><mode>3_1): Ditto.

> >         (*<insn><mode>3_1): Ditto.

> >         * config/i386/sse.md (k<code><mode>): New define_split after

> >         it to convert generic shift pattern to mask shift ones.

> >

> > gcc/testsuite/ChangeLog:

> >

> >         * gcc.target/i386/mask-shift.c: New test.

> > ---

> >  gcc/config/i386/constraints.md             | 10 +++

> >  gcc/config/i386/i386.md                    | 94 +++++++++++++++-------

> >  gcc/config/i386/sse.md                     | 14 ++++

> >  gcc/testsuite/gcc.target/i386/mask-shift.c | 83 +++++++++++++++++++

> >  4 files changed, 173 insertions(+), 28 deletions(-)

> >  create mode 100644 gcc/testsuite/gcc.target/i386/mask-shift.c

> >

> > diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md

> > index 485e3f5b2cf..4aa28a5621c 100644

> > --- a/gcc/config/i386/constraints.md

> > +++ b/gcc/config/i386/constraints.md

> > @@ -222,6 +222,16 @@ (define_constraint "BC"

> >             (match_operand 0 "vector_all_ones_operand"))))

> >

> >  ;; Integer constant constraints.

> > +(define_constraint "Wb"

> > +  "Integer constant in the range 0 @dots{} 7, for 8-bit shifts."

> > +  (and (match_code "const_int")

> > +       (match_test "IN_RANGE (ival, 0, 7)")))

> > +

> > +(define_constraint "Ww"

> > +  "Integer constant in the range 0 @dots{} 15, for 16-bit shifts."

> > +  (and (match_code "const_int")

> > +       (match_test "IN_RANGE (ival, 0, 15)")))

> > +

> >  (define_constraint "I"

> >    "Integer constant in the range 0 @dots{} 31, for 32-bit shifts."

> >    (and (match_code "const_int")

> > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md

> > index 8b809c49fe0..c5f9bd4d4d8 100644

> > --- a/gcc/config/i386/i386.md

> > +++ b/gcc/config/i386/i386.md

> > @@ -1136,6 +1136,7 @@ (define_mode_attr di [(SI "nF") (DI "Wd")])

> >

> >  ;; Immediate operand constraint for shifts.

> >  (define_mode_attr S [(QI "I") (HI "I") (SI "I") (DI "J") (TI "O")])

> > +(define_mode_attr KS [(QI "Wb") (HI "Ww") (SI "I") (DI "J")])

> >

> >  ;; Print register name in the specified mode.

> >  (define_mode_attr k [(QI "b") (HI "w") (SI "k") (DI "q")])

> > @@ -11088,9 +11089,9 @@ (define_insn "*bmi2_ashl<mode>3_1"

> >     (set_attr "mode" "<MODE>")])

> >

> >  (define_insn "*ashl<mode>3_1"

> > -  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")

> > -       (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm")

> > -                     (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r")))

> > +  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k")

> > +       (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k")

> > +                     (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r,<KS>")))

> >     (clobber (reg:CC FLAGS_REG))]

> >    "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)"

> >  {

> > @@ -11098,6 +11099,7 @@ (define_insn "*ashl<mode>3_1"

> >      {

> >      case TYPE_LEA:

> >      case TYPE_ISHIFTX:

> > +    case TYPE_MSKLOG:

> >        return "#";

> >

> >      case TYPE_ALU:

> > @@ -11113,7 +11115,11 @@ (define_insn "*ashl<mode>3_1"

> >         return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";

> >      }

> >  }

> > -  [(set_attr "isa" "*,*,bmi2")

> > +  [(set_attr "isa" "*,*,bmi2,avx512bw")

> >     (set (attr "type")

> >       (cond [(eq_attr "alternative" "1")

> >               (const_string "lea")

> > @@ -11123,6 +11129,8 @@ (define_insn "*ashl<mode>3_1"

> >                       (match_operand 0 "register_operand"))

> >                  (match_operand 2 "const1_operand"))

> >               (const_string "alu")

> > +           (eq_attr "alternative" "3")

> > +             (const_string "msklog")

> >            ]

> >            (const_string "ishift")))

> >     (set (attr "length_immediate")

> > @@ -11218,15 +11226,16 @@ (define_split

> >    "operands[2] = gen_lowpart (SImode, operands[2]);")

> >

> >  (define_insn "*ashlhi3_1"

> > -  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp")

> > -       (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l")

> > -                  (match_operand:QI 2 "nonmemory_operand" "cI,M")))

> > +  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k")

> > +       (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k")

> > +                  (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww")))

> >     (clobber (reg:CC FLAGS_REG))]

> >    "ix86_binary_operator_ok (ASHIFT, HImode, operands)"

> >  {

> >    switch (get_attr_type (insn))

> >      {

> >      case TYPE_LEA:

> > +    case TYPE_MSKLOG:

> >        return "#";

> >

> >      case TYPE_ALU:

> > @@ -11241,9 +11246,12 @@ (define_insn "*ashlhi3_1"

> >         return "sal{w}\t{%2, %0|%0, %2}";

> >      }

> >  }

> > -  [(set (attr "type")

> > +  [(set_attr "isa" "*,*,avx512f")

> > +   (set (attr "type")

> >       (cond [(eq_attr "alternative" "1")

> >               (const_string "lea")

> > +           (eq_attr "alternative" "2")

> > +             (const_string "msklog")

> >              (and (and (match_test "TARGET_DOUBLE_WITH_ADD")

> >                       (match_operand 0 "register_operand"))

> >                  (match_operand 2 "const1_operand"))

> > @@ -11259,18 +11270,19 @@ (define_insn "*ashlhi3_1"

> >                            (match_test "optimize_function_for_size_p (cfun)")))))

> >         (const_string "0")

> >         (const_string "*")))

> > -   (set_attr "mode" "HI,SI")])

> > +   (set_attr "mode" "HI,SI,HI")])

> >

> >  (define_insn "*ashlqi3_1"

> > -  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp")

> > -       (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l")

> > -                  (match_operand:QI 2 "nonmemory_operand" "cI,cI,M")))

> > +  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k")

> > +       (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k")

> > +                  (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb")))

> >     (clobber (reg:CC FLAGS_REG))]

> >    "ix86_binary_operator_ok (ASHIFT, QImode, operands)"

> >  {

> >    switch (get_attr_type (insn))

> >      {

> >      case TYPE_LEA:

> > +    case TYPE_MSKLOG:

> >        return "#";

> >

> >      case TYPE_ALU:

> > @@ -11298,9 +11307,12 @@ (define_insn "*ashlqi3_1"

> >         }

> >      }

> >  }

> > -  [(set (attr "type")

> > +  [(set_attr "isa" "*,*,*,avx512dq")

> > +   (set (attr "type")

> >       (cond [(eq_attr "alternative" "2")

> >               (const_string "lea")

> > +           (eq_attr "alternative" "3")

> > +             (const_string "msklog")

> >              (and (and (match_test "TARGET_DOUBLE_WITH_ADD")

> >                       (match_operand 0 "register_operand"))

> >                  (match_operand 2 "const1_operand"))

> > @@ -11316,7 +11334,7 @@ (define_insn "*ashlqi3_1"

> >                            (match_test "optimize_function_for_size_p (cfun)")))))

> >         (const_string "0")

> >         (const_string "*")))

> > -   (set_attr "mode" "QI,SI,SI")

> > +   (set_attr "mode" "QI,SI,SI,QI")

> >     ;; Potential partial reg stall on alternative 1.

> >     (set (attr "preferred_for_speed")

> >       (cond [(eq_attr "alternative" "1")

> > @@ -11819,16 +11837,17 @@ (define_insn "*bmi2_<insn><mode>3_1"

> >     (set_attr "mode" "<MODE>")])

> >

> >  (define_insn "*<insn><mode>3_1"

> > -  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")

> > +  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,?k")

> >         (any_shiftrt:SWI48

> > -         (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")

> > -         (match_operand:QI 2 "nonmemory_operand" "c<S>,r")))

> > +         (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,k")

> > +         (match_operand:QI 2 "nonmemory_operand" "c<S>,r,<KS>")))

> >     (clobber (reg:CC FLAGS_REG))]

> >    "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"

> >  {

> >    switch (get_attr_type (insn))

> >      {

> >      case TYPE_ISHIFTX:

> > +    case TYPE_MSKLOG:

> >        return "#";

> >

> >      default:

> > @@ -11839,11 +11858,16 @@ (define_insn "*<insn><mode>3_1"

> >         return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";

> >      }

> >  }

> > -  [(set_attr "isa" "*,bmi2")

> > -   (set_attr "type" "ishift,ishiftx")

> > +  [(set_attr "isa" "*,bmi2,avx512bw")

> > +   (set_attr "type" "ishift,ishiftx,msklog")

> > +   (set (attr "enabled")

> > +       (if_then_else (eq_attr "alternative" "2")

> > +         (symbol_ref "<CODE> == LSHIFTRT && TARGET_AVX512BW")

>

> Please rather split the pattern to ASHIFTRT and LSHIFTRT. The

> macroization has no point if we need to use enabled attribute in this

> way.

Changed.
>

> > +         (const_string "*")))

> >     (set (attr "length_immediate")

> >       (if_then_else

> > -       (and (match_operand 2 "const1_operand")

> > +       (and (and (match_operand 2 "const1_operand")

> > +                (eq_attr "alternative" "0"))

> >             (ior (match_test "TARGET_SHIFT1")

> >                  (match_test "optimize_function_for_size_p (cfun)")))

> >         (const_string "0")

> > @@ -11916,27 +11940,41 @@ (define_split

> >    "operands[2] = gen_lowpart (SImode, operands[2]);")

> >

> >  (define_insn "*<insn><mode>3_1"

> > -  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m")

> > +  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m, ?k")

> >         (any_shiftrt:SWI12

> > -         (match_operand:SWI12 1 "nonimmediate_operand" "0")

> > -         (match_operand:QI 2 "nonmemory_operand" "c<S>")))

> > +         (match_operand:SWI12 1 "nonimmediate_operand" "0, k")

> > +         (match_operand:QI 2 "nonmemory_operand" "c<S>, <KS>")))

> >     (clobber (reg:CC FLAGS_REG))]

> >    "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"

> >  {

> > -  if (operands[2] == const1_rtx

> > -      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))

> > -    return "<shift>{<imodesuffix>}\t%0";

> > -  else

> > -    return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";

> > +  switch (get_attr_type (insn))

> > +    {

> > +    case TYPE_ISHIFT:

> > +      if (operands[2] == const1_rtx

> > +         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))

> > +       return "<shift>{<imodesuffix>}\t%0";

> > +      else

> > +       return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";

> > +    case TYPE_MSKLOG:

> > +      return "#";

> > +    default:

> > +      gcc_unreachable ();

> > +    }

> >  }

> > -  [(set_attr "type" "ishift")

> > +  [(set_attr "type" "ishift,msklog")

> >     (set (attr "length_immediate")

> >       (if_then_else

> > -       (and (match_operand 2 "const1_operand")

> > +       (and (and (match_operand 2 "const1_operand")

> > +                (eq_attr "alternative" "0"))

> >             (ior (match_test "TARGET_SHIFT1")

> >                  (match_test "optimize_function_for_size_p (cfun)")))

> >         (const_string "0")

> >         (const_string "*")))

> > +   (set (attr "enabled")

> > +       (if_then_else (eq_attr "alternative" "1")

> > +         (symbol_ref "<CODE> == LSHIFTRT && TARGET_AVX512F

> > +                      && (<MODE>mode != QImode || TARGET_AVX512DQ)")

>

> Also here, please split out LSHIFTRT and perhaps use conditional

> constraint to avoid enabled attribute.

>

Changed.
> Uros.

>

> > +         (const_string "*")))

> >     (set_attr "mode" "<MODE>")])

> >

> >  (define_insn "*<insn><mode>3_1_slp"

> > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md

> > index ab29999023d..f8759e4d758 100644

> > --- a/gcc/config/i386/sse.md

> > +++ b/gcc/config/i386/sse.md

> > @@ -1755,6 +1755,20 @@ (define_insn "k<code><mode>"

> >     (set_attr "prefix" "vex")

> >     (set_attr "mode" "<MODE>")])

> >

> > +(define_split

> > +  [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")

> > +       (any_lshift:SWI1248_AVX512BW

> > +         (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand")

> > +         (match_operand 2 "const_int_operand")))

> > +   (clobber (reg:CC FLAGS_REG))]

> > +  "TARGET_AVX512F && reload_completed"

> > +  [(parallel

> > +     [(set (match_dup 0)

> > +          (any_lshift:SWI1248_AVX512BW

> > +            (match_dup 1)

> > +            (match_dup 2)))

> > +      (unspec [(const_int 0)] UNSPEC_MASKOP)])])

> > +

> >  (define_insn "ktest<mode>"

> >    [(set (reg:CC FLAGS_REG)

> >         (unspec:CC

> > diff --git a/gcc/testsuite/gcc.target/i386/mask-shift.c b/gcc/testsuite/gcc.target/i386/mask-shift.c

> > new file mode 100644

> > index 00000000000..4cb6ef37821

> > --- /dev/null

> > +++ b/gcc/testsuite/gcc.target/i386/mask-shift.c

> > @@ -0,0 +1,83 @@

> > +/* { dg-do compile } */

> > +/* { dg-options "-mavx512bw -mavx512dq -O2" } */

> > +

> > +#include<immintrin.h>

> > +void

> > +fooq (__m512i a, __m512i b, void* p)

> > +{

> > +  __mmask8 m1 = _mm512_cmpeq_epi64_mask (a, b);

> > +  m1 >>= 4;

> > +  _mm512_mask_storeu_epi64 (p, m1, a);

> > +}

> > +

> > +/* { dg-final { scan-assembler-times {(?n)kshiftrb} "1" } }  */

> > +

> > +void

> > +food (__m512i a, __m512i b, void* p)

> > +{

> > +  __mmask16 m1 = _mm512_cmpeq_epi32_mask (a, b);

> > +  m1 >>= 8;

> > +  _mm512_mask_storeu_epi32 (p, m1, a);

> > +}

> > +

> > +/* { dg-final { scan-assembler-times {(?n)kshiftrw} "1" } }  */

> > +

> > +void

> > +foow (__m512i a, __m512i b, void* p)

> > +{

> > +  __mmask32 m1 = _mm512_cmpeq_epi16_mask (a, b);

> > +  m1 >>= 16;

> > +  _mm512_mask_storeu_epi16 (p, m1, a);

> > +}

> > +

> > +/* { dg-final { scan-assembler-times {(?n)kshiftrd} "1" } }  */

> > +

> > +void

> > +foob (__m512i a, __m512i b, void* p)

> > +{

> > +  __mmask64 m1 = _mm512_cmpeq_epi8_mask (a, b);

> > +  m1 >>= 32;

> > +  _mm512_mask_storeu_epi8 (p, m1, a);

> > +}

> > +

> > +/* { dg-final { scan-assembler-times {(?n)kshiftrq} "1" { target { ! ia32 } } } }  */

> > +

> > +void

> > +fooq1 (__m512i a, __m512i b, void* p)

> > +{

> > +  __mmask8 m1 = _mm512_cmpeq_epi64_mask (a, b);

> > +  m1 <<= 4;

> > +  _mm512_mask_storeu_epi64 (p, m1, a);

> > +}

> > +

> > +/* { dg-final { scan-assembler-times {(?n)kshiftlb} "1" } }  */

> > +

> > +void

> > +food1 (__m512i a, __m512i b, void* p)

> > +{

> > +  __mmask16 m1 = _mm512_cmpeq_epi32_mask (a, b);

> > +  m1 <<= 8;

> > +  _mm512_mask_storeu_epi32 (p, m1, a);

> > +}

> > +

> > +/* { dg-final { scan-assembler-times {(?n)kshiftlw} "1" } }  */

> > +

> > +void

> > +foow1 (__m512i a, __m512i b, void* p)

> > +{

> > +  __mmask32 m1 = _mm512_cmpeq_epi16_mask (a, b);

> > +  m1 <<= 16;

> > +  _mm512_mask_storeu_epi16 (p, m1, a);

> > +}

> > +

> > +/* { dg-final { scan-assembler-times {(?n)kshiftld} "1" } }  */

> > +

> > +void

> > +foob1 (__m512i a, __m512i b, void* p)

> > +{

> > +  __mmask64 m1 = _mm512_cmpeq_epi8_mask (a, b);

> > +  m1 <<= 32;

> > +  _mm512_mask_storeu_epi8 (p, m1, a);

> > +}

> > +

> > +/* { dg-final { scan-assembler-times {(?n)kshiftlq} "1" { target { ! ia32 } } } }  */

> > --

> > 2.18.1

> >


Update patch.

gcc/ChangeLog:

        * config/i386/constraints.md (Wb): New constraint.
        (Ww): Ditto.
        * config/i386/i386.md (*ashlhi3_1): Extend to avx512 mask
        shift.
        (*ashlqi3_1): Ditto.
        (*<insn><mode>3_1): Split to ..
        (*ashr<mode>3_1): this, ...
        (*lshr<mode>3_1): and this, also extend this pattern to avx512
        mask registers.
        (*<insn><mode>3_1): Split to ..
        (*ashr<mode>3_1): this, ...
        (*lshr<mode>3_1): and this, also extend this pattern to avx512
        mask registers.
        * config/i386/sse.md (k<code><mode>): New define_split after
        it to convert generic shift pattern to mask shift ones.


-- 
BR,
Hongtao
From 6f731b7ec4244faf8c0c49197a78cfcbbdd42dc9 Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao.liu@intel.com>
Date: Tue, 20 Jul 2021 18:32:35 +0800
Subject: [PATCH] Support logic shift left/right for avx512 mask type.

gcc/ChangeLog:

	* config/i386/constraints.md (Wb): New constraint.
	(Ww): Ditto.
	* config/i386/i386.md (*ashlhi3_1): Extend to avx512 mask
	shift.
	(*ashlqi3_1): Ditto.
	(*<insn><mode>3_1): Split to ..
	(*ashr<mode>3_1): this, ...
	(*lshr<mode>3_1): and this, also extend this pattern to avx512
	mask registers.
	(*<insn><mode>3_1): Split to ..
	(*ashr<mode>3_1): this, ...
	(*lshr<mode>3_1): and this, also extend this pattern to avx512
	mask registers.
	* config/i386/sse.md (k<code><mode>): New define_split after
	it to convert generic shift pattern to mask shift ones.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/mask-shift.c: New test.
---
 gcc/config/i386/constraints.md             |  10 ++
 gcc/config/i386/i386.md                    | 133 +++++++++++++++++----
 gcc/config/i386/sse.md                     |  14 +++
 gcc/testsuite/gcc.target/i386/mask-shift.c |  83 +++++++++++++
 4 files changed, 216 insertions(+), 24 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/mask-shift.c

diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index 485e3f5b2cf..4aa28a5621c 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -222,6 +222,16 @@ (define_constraint "BC"
 	    (match_operand 0 "vector_all_ones_operand"))))
 
 ;; Integer constant constraints.
+(define_constraint "Wb"
+  "Integer constant in the range 0 @dots{} 7, for 8-bit shifts."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 7)")))
+
+(define_constraint "Ww"
+  "Integer constant in the range 0 @dots{} 15, for 16-bit shifts."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 15)")))
+
 (define_constraint "I"
   "Integer constant in the range 0 @dots{} 31, for 32-bit shifts."
   (and (match_code "const_int")
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 8b809c49fe0..61aee28e2ea 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1136,6 +1136,7 @@ (define_mode_attr di [(SI "nF") (DI "Wd")])
 
 ;; Immediate operand constraint for shifts.
 (define_mode_attr S [(QI "I") (HI "I") (SI "I") (DI "J") (TI "O")])
+(define_mode_attr KS [(QI "Wb") (HI "Ww") (SI "I") (DI "J")])
 
 ;; Print register name in the specified mode.
 (define_mode_attr k [(QI "b") (HI "w") (SI "k") (DI "q")])
@@ -11088,9 +11089,9 @@ (define_insn "*bmi2_ashl<mode>3_1"
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*ashl<mode>3_1"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
-	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm")
-		      (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r")))
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k")
+	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k")
+		      (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r,<KS>")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)"
 {
@@ -11098,6 +11099,7 @@ (define_insn "*ashl<mode>3_1"
     {
     case TYPE_LEA:
     case TYPE_ISHIFTX:
+    case TYPE_MSKLOG:
       return "#";
 
     case TYPE_ALU:
@@ -11113,7 +11115,7 @@ (define_insn "*ashl<mode>3_1"
 	return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
-  [(set_attr "isa" "*,*,bmi2")
+  [(set_attr "isa" "*,*,bmi2,avx512bw")
    (set (attr "type")
      (cond [(eq_attr "alternative" "1")
 	      (const_string "lea")
@@ -11123,6 +11125,8 @@ (define_insn "*ashl<mode>3_1"
 		      (match_operand 0 "register_operand"))
 		 (match_operand 2 "const1_operand"))
 	      (const_string "alu")
+	    (eq_attr "alternative" "3")
+	      (const_string "msklog")
 	   ]
 	   (const_string "ishift")))
    (set (attr "length_immediate")
@@ -11218,15 +11222,16 @@ (define_split
   "operands[2] = gen_lowpart (SImode, operands[2]);")
 
 (define_insn "*ashlhi3_1"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp")
-	(ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l")
-		   (match_operand:QI 2 "nonmemory_operand" "cI,M")))
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k")
+	(ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k")
+		   (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (ASHIFT, HImode, operands)"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
+    case TYPE_MSKLOG:
       return "#";
 
     case TYPE_ALU:
@@ -11241,9 +11246,12 @@ (define_insn "*ashlhi3_1"
 	return "sal{w}\t{%2, %0|%0, %2}";
     }
 }
-  [(set (attr "type")
+  [(set_attr "isa" "*,*,avx512f")
+   (set (attr "type")
      (cond [(eq_attr "alternative" "1")
 	      (const_string "lea")
+	    (eq_attr "alternative" "2")
+	      (const_string "msklog")
             (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
 		      (match_operand 0 "register_operand"))
 		 (match_operand 2 "const1_operand"))
@@ -11259,18 +11267,19 @@ (define_insn "*ashlhi3_1"
 			   (match_test "optimize_function_for_size_p (cfun)")))))
        (const_string "0")
        (const_string "*")))
-   (set_attr "mode" "HI,SI")])
+   (set_attr "mode" "HI,SI,HI")])
 
 (define_insn "*ashlqi3_1"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp")
-	(ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l")
-		   (match_operand:QI 2 "nonmemory_operand" "cI,cI,M")))
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k")
+	(ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k")
+		   (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (ASHIFT, QImode, operands)"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
+    case TYPE_MSKLOG:
       return "#";
 
     case TYPE_ALU:
@@ -11298,9 +11307,12 @@ (define_insn "*ashlqi3_1"
 	}
     }
 }
-  [(set (attr "type")
+  [(set_attr "isa" "*,*,*,avx512dq")
+   (set (attr "type")
      (cond [(eq_attr "alternative" "2")
 	      (const_string "lea")
+	    (eq_attr "alternative" "3")
+	      (const_string "msklog")
             (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
 		      (match_operand 0 "register_operand"))
 		 (match_operand 2 "const1_operand"))
@@ -11316,7 +11328,7 @@ (define_insn "*ashlqi3_1"
 			   (match_test "optimize_function_for_size_p (cfun)")))))
        (const_string "0")
        (const_string "*")))
-   (set_attr "mode" "QI,SI,SI")
+   (set_attr "mode" "QI,SI,SI,QI")
    ;; Potential partial reg stall on alternative 1.
    (set (attr "preferred_for_speed")
      (cond [(eq_attr "alternative" "1")
@@ -11818,13 +11830,13 @@ (define_insn "*bmi2_<insn><mode>3_1"
   [(set_attr "type" "ishiftx")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*<insn><mode>3_1"
+(define_insn "*ashr<mode>3_1"
   [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
-	(any_shiftrt:SWI48
+	(ashiftrt:SWI48
 	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
 	  (match_operand:QI 2 "nonmemory_operand" "c<S>,r")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands)"
 {
   switch (get_attr_type (insn))
     {
@@ -11834,9 +11846,9 @@ (define_insn "*<insn><mode>3_1"
     default:
       if (operands[2] == const1_rtx
 	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-	return "<shift>{<imodesuffix>}\t%0";
+	return "sar{<imodesuffix>}\t%0";
       else
-	return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
+	return "sar{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
   [(set_attr "isa" "*,bmi2")
@@ -11850,6 +11862,40 @@ (define_insn "*<insn><mode>3_1"
        (const_string "*")))
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*lshr<mode>3_1"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,?k")
+	(lshiftrt:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,k")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>,r,<KS>")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (LSHIFTRT, <MODE>mode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ISHIFTX:
+    case TYPE_MSKLOG:
+      return "#";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "shr{<imodesuffix>}\t%0";
+      else
+	return "shr{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set_attr "isa" "*,bmi2,avx512bw")
+   (set_attr "type" "ishift,ishiftx,msklog")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (and (match_operand 2 "const1_operand")
+		 (eq_attr "alternative" "0"))
+	    (ior (match_test "TARGET_SHIFT1")
+		 (match_test "optimize_function_for_size_p (cfun)")))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
 ;; Convert shift to the shiftx pattern to avoid flags dependency.
 (define_split
   [(set (match_operand:SWI48 0 "register_operand")
@@ -11915,19 +11961,19 @@ (define_split
 	(zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))]
   "operands[2] = gen_lowpart (SImode, operands[2]);")
 
-(define_insn "*<insn><mode>3_1"
+(define_insn "*ashr<mode>3_1"
   [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m")
-	(any_shiftrt:SWI12
+	(ashiftrt:SWI12
 	  (match_operand:SWI12 1 "nonimmediate_operand" "0")
 	  (match_operand:QI 2 "nonmemory_operand" "c<S>")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands)"
 {
   if (operands[2] == const1_rtx
       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-    return "<shift>{<imodesuffix>}\t%0";
+    return "sar{<imodesuffix>}\t%0";
   else
-    return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
+    return "sar{<imodesuffix>}\t{%2, %0|%0, %2}";
 }
   [(set_attr "type" "ishift")
    (set (attr "length_immediate")
@@ -11939,6 +11985,45 @@ (define_insn "*<insn><mode>3_1"
        (const_string "*")))
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*lshr<mode>3_1"
+  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m, ?k")
+	(lshiftrt:SWI12
+	  (match_operand:SWI12 1 "nonimmediate_operand" "0, k")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>, <KS>")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (LSHIFTRT, <MODE>mode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ISHIFT:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "shr{<imodesuffix>}\t%0";
+      else
+	return "shr{<imodesuffix>}\t{%2, %0|%0, %2}";
+    case TYPE_MSKLOG:
+      return "#";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set (attr "isa")
+    (if_then_else (eq_attr "alternative" "1")
+      (if_then_else (match_test "<MODE>mode == QImode")
+	(const_string "avx512dq")
+	(const_string "avx512f"))
+      (const_string "*")))
+   (set_attr "type" "ishift,msklog")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (and (match_operand 2 "const1_operand")
+		 (eq_attr "alternative" "0"))
+	    (ior (match_test "TARGET_SHIFT1")
+		 (match_test "optimize_function_for_size_p (cfun)")))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*<insn><mode>3_1_slp"
   [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>"))
 	(any_shiftrt:SWI12 (match_operand:SWI12 1 "register_operand" "0")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ab29999023d..f8759e4d758 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1755,6 +1755,20 @@ (define_insn "k<code><mode>"
    (set_attr "prefix" "vex")
    (set_attr "mode" "<MODE>")])
 
+(define_split
+  [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
+	(any_lshift:SWI1248_AVX512BW
+	  (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand")
+	  (match_operand 2 "const_int_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_AVX512F && reload_completed"
+  [(parallel
+     [(set (match_dup 0)
+	   (any_lshift:SWI1248_AVX512BW
+	     (match_dup 1)
+	     (match_dup 2)))
+      (unspec [(const_int 0)] UNSPEC_MASKOP)])])
+
 (define_insn "ktest<mode>"
   [(set (reg:CC FLAGS_REG)
 	(unspec:CC
diff --git a/gcc/testsuite/gcc.target/i386/mask-shift.c b/gcc/testsuite/gcc.target/i386/mask-shift.c
new file mode 100644
index 00000000000..4cb6ef37821
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/mask-shift.c
@@ -0,0 +1,83 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512dq -O2" } */
+
+#include<immintrin.h>
+void
+fooq (__m512i a, __m512i b, void* p)
+{
+  __mmask8 m1 = _mm512_cmpeq_epi64_mask (a, b);
+  m1 >>= 4;
+  _mm512_mask_storeu_epi64 (p, m1, a);
+}
+
+/* { dg-final { scan-assembler-times {(?n)kshiftrb} "1" } }  */
+
+void
+food (__m512i a, __m512i b, void* p)
+{
+  __mmask16 m1 = _mm512_cmpeq_epi32_mask (a, b);
+  m1 >>= 8;
+  _mm512_mask_storeu_epi32 (p, m1, a);
+}
+
+/* { dg-final { scan-assembler-times {(?n)kshiftrw} "1" } }  */
+
+void
+foow (__m512i a, __m512i b, void* p)
+{
+  __mmask32 m1 = _mm512_cmpeq_epi16_mask (a, b);
+  m1 >>= 16;
+  _mm512_mask_storeu_epi16 (p, m1, a);
+}
+
+/* { dg-final { scan-assembler-times {(?n)kshiftrd} "1" } }  */
+
+void
+foob (__m512i a, __m512i b, void* p)
+{
+  __mmask64 m1 = _mm512_cmpeq_epi8_mask (a, b);
+  m1 >>= 32;
+  _mm512_mask_storeu_epi8 (p, m1, a);
+}
+
+/* { dg-final { scan-assembler-times {(?n)kshiftrq} "1" { target { ! ia32 } } } }  */
+
+void
+fooq1 (__m512i a, __m512i b, void* p)
+{
+  __mmask8 m1 = _mm512_cmpeq_epi64_mask (a, b);
+  m1 <<= 4;
+  _mm512_mask_storeu_epi64 (p, m1, a);
+}
+
+/* { dg-final { scan-assembler-times {(?n)kshiftlb} "1" } }  */
+
+void
+food1 (__m512i a, __m512i b, void* p)
+{
+  __mmask16 m1 = _mm512_cmpeq_epi32_mask (a, b);
+  m1 <<= 8;
+  _mm512_mask_storeu_epi32 (p, m1, a);
+}
+
+/* { dg-final { scan-assembler-times {(?n)kshiftlw} "1" } }  */
+
+void
+foow1 (__m512i a, __m512i b, void* p)
+{
+  __mmask32 m1 = _mm512_cmpeq_epi16_mask (a, b);
+  m1 <<= 16;
+  _mm512_mask_storeu_epi16 (p, m1, a);
+}
+
+/* { dg-final { scan-assembler-times {(?n)kshiftld} "1" } }  */
+
+void
+foob1 (__m512i a, __m512i b, void* p)
+{
+  __mmask64 m1 = _mm512_cmpeq_epi8_mask (a, b);
+  m1 <<= 32;
+  _mm512_mask_storeu_epi8 (p, m1, a);
+}
+
+/* { dg-final { scan-assembler-times {(?n)kshiftlq} "1" { target { ! ia32 } } } }  */
Eugene Rozenfeld via Gcc-patches July 21, 2021, 8:22 a.m. | #3
On Wed, Jul 21, 2021 at 5:05 AM Hongtao Liu <crazylht@gmail.com> wrote:
>

> On Tue, Jul 20, 2021 at 9:41 PM Uros Bizjak <ubizjak@gmail.com> wrote:

> >

> > On Tue, Jul 20, 2021 at 2:33 PM liuhongt <hongtao.liu@intel.com> wrote:

> > >

> > > Hi:

> > >   As mention in https://gcc.gnu.org/pipermail/gcc-patches/2021-July/575420.html

> > >

> > > ----cut start-----

> > > > note for the lowpart we can just view-convert away the excess bits,

> > > > fully re-using the mask.  We generate surprisingly "good" code:

> > > >

> > > >         kmovb   %k1, %edi

> > > >         shrb    $4, %dil

> > > >         kmovb   %edi, %k2

> > > >

> > > > besides the lack of using kshiftrb.  I guess we're just lacking

> > > > a mask register alternative for

> > > Yes, we can do it similar as kor/kand/kxor.

> > > ---cut end--------

> > >

> > >   Bootstrap and regtested on x86_64-linux-gnu{-m32,}.

> > >   Ok for trunk?

> > >

> > > gcc/ChangeLog:

> > >

> > >         * config/i386/constraints.md (Wb): New constraint.

> > >         (Ww): Ditto.

> > >         * config/i386/i386.md (*ashlhi3_1): Extend to avx512 mask

> > >         shift.

> > >         (*ashlqi3_1): Ditto.

> > >         (*<insn><mode>3_1): Ditto.

> > >         (*<insn><mode>3_1): Ditto.

> > >         * config/i386/sse.md (k<code><mode>): New define_split after

> > >         it to convert generic shift pattern to mask shift ones.

> > >

> > > gcc/testsuite/ChangeLog:

> > >

> > >         * gcc.target/i386/mask-shift.c: New test.



+(define_insn "*lshr<mode>3_1"
+  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m, ?k")
+    (lshiftrt:SWI12
+      (match_operand:SWI12 1 "nonimmediate_operand" "0, k")
+      (match_operand:QI 2 "nonmemory_operand" "c<S>, <KS>")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (LSHIFTRT, <MODE>mode, operands)"

Also split this one to QImode and HImode to avoid conditions in isa attribute.

OK with this change.

Thanks,
Uros.
Eugene Rozenfeld via Gcc-patches July 22, 2021, 1:32 a.m. | #4
>-----Original Message-----

>From: Uros Bizjak <ubizjak@gmail.com>

>Sent: Wednesday, July 21, 2021 4:23 PM

>To: Hongtao Liu <crazylht@gmail.com>

>Cc: Liu, Hongtao <hongtao.liu@intel.com>; gcc-patches@gcc.gnu.org; H. J. Lu

><hjl.tools@gmail.com>; Richard Biener <rguenther@suse.de>

>Subject: Re: [PATCH] Support logic shift left/right for avx512 mask type.

>

>On Wed, Jul 21, 2021 at 5:05 AM Hongtao Liu <crazylht@gmail.com> wrote:

>>

>> On Tue, Jul 20, 2021 at 9:41 PM Uros Bizjak <ubizjak@gmail.com> wrote:

>> >

>> > On Tue, Jul 20, 2021 at 2:33 PM liuhongt <hongtao.liu@intel.com> wrote:

>> > >

>> > > Hi:

>> > >   As mention in

>> > > https://gcc.gnu.org/pipermail/gcc-patches/2021-July/575420.html

>> > >

>> > > ----cut start-----

>> > > > note for the lowpart we can just view-convert away the excess

>> > > > bits, fully re-using the mask.  We generate surprisingly "good" code:

>> > > >

>> > > >         kmovb   %k1, %edi

>> > > >         shrb    $4, %dil

>> > > >         kmovb   %edi, %k2

>> > > >

>> > > > besides the lack of using kshiftrb.  I guess we're just lacking

>> > > > a mask register alternative for

>> > > Yes, we can do it similar as kor/kand/kxor.

>> > > ---cut end--------

>> > >

>> > >   Bootstrap and regtested on x86_64-linux-gnu{-m32,}.

>> > >   Ok for trunk?

>> > >

>> > > gcc/ChangeLog:

>> > >

>> > >         * config/i386/constraints.md (Wb): New constraint.

>> > >         (Ww): Ditto.

>> > >         * config/i386/i386.md (*ashlhi3_1): Extend to avx512 mask

>> > >         shift.

>> > >         (*ashlqi3_1): Ditto.

>> > >         (*<insn><mode>3_1): Ditto.

>> > >         (*<insn><mode>3_1): Ditto.

>> > >         * config/i386/sse.md (k<code><mode>): New define_split after

>> > >         it to convert generic shift pattern to mask shift ones.

>> > >

>> > > gcc/testsuite/ChangeLog:

>> > >

>> > >         * gcc.target/i386/mask-shift.c: New test.

>

>

>+(define_insn "*lshr<mode>3_1"

>+  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m, ?k")

>+    (lshiftrt:SWI12

>+      (match_operand:SWI12 1 "nonimmediate_operand" "0, k")

>+      (match_operand:QI 2 "nonmemory_operand" "c<S>, <KS>")))

>+   (clobber (reg:CC FLAGS_REG))]

>+  "ix86_binary_operator_ok (LSHIFTRT, <MODE>mode, operands)"

>

>Also split this one to QImode and HImode to avoid conditions in isa attribute.

>

>OK with this change.

>


Thanks for the review, here's the patch I'm check in.

>Thanks,

>Uros.
Richard Biener July 22, 2021, 6:38 a.m. | #5
On Thu, 22 Jul 2021, Liu, Hongtao wrote:

> 

> 

> >-----Original Message-----

> >From: Uros Bizjak <ubizjak@gmail.com>

> >Sent: Wednesday, July 21, 2021 4:23 PM

> >To: Hongtao Liu <crazylht@gmail.com>

> >Cc: Liu, Hongtao <hongtao.liu@intel.com>; gcc-patches@gcc.gnu.org; H. J. Lu

> ><hjl.tools@gmail.com>; Richard Biener <rguenther@suse.de>

> >Subject: Re: [PATCH] Support logic shift left/right for avx512 mask type.

> >

> >On Wed, Jul 21, 2021 at 5:05 AM Hongtao Liu <crazylht@gmail.com> wrote:

> >>

> >> On Tue, Jul 20, 2021 at 9:41 PM Uros Bizjak <ubizjak@gmail.com> wrote:

> >> >

> >> > On Tue, Jul 20, 2021 at 2:33 PM liuhongt <hongtao.liu@intel.com> wrote:

> >> > >

> >> > > Hi:

> >> > >   As mention in

> >> > > https://gcc.gnu.org/pipermail/gcc-patches/2021-July/575420.html

> >> > >

> >> > > ----cut start-----

> >> > > > note for the lowpart we can just view-convert away the excess

> >> > > > bits, fully re-using the mask.  We generate surprisingly "good" code:

> >> > > >

> >> > > >         kmovb   %k1, %edi

> >> > > >         shrb    $4, %dil

> >> > > >         kmovb   %edi, %k2

> >> > > >

> >> > > > besides the lack of using kshiftrb.  I guess we're just lacking

> >> > > > a mask register alternative for

> >> > > Yes, we can do it similar as kor/kand/kxor.

> >> > > ---cut end--------

> >> > >

> >> > >   Bootstrap and regtested on x86_64-linux-gnu{-m32,}.

> >> > >   Ok for trunk?

> >> > >

> >> > > gcc/ChangeLog:

> >> > >

> >> > >         * config/i386/constraints.md (Wb): New constraint.

> >> > >         (Ww): Ditto.

> >> > >         * config/i386/i386.md (*ashlhi3_1): Extend to avx512 mask

> >> > >         shift.

> >> > >         (*ashlqi3_1): Ditto.

> >> > >         (*<insn><mode>3_1): Ditto.

> >> > >         (*<insn><mode>3_1): Ditto.

> >> > >         * config/i386/sse.md (k<code><mode>): New define_split after

> >> > >         it to convert generic shift pattern to mask shift ones.

> >> > >

> >> > > gcc/testsuite/ChangeLog:

> >> > >

> >> > >         * gcc.target/i386/mask-shift.c: New test.

> >

> >

> >+(define_insn "*lshr<mode>3_1"

> >+  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m, ?k")

> >+    (lshiftrt:SWI12

> >+      (match_operand:SWI12 1 "nonimmediate_operand" "0, k")

> >+      (match_operand:QI 2 "nonmemory_operand" "c<S>, <KS>")))

> >+   (clobber (reg:CC FLAGS_REG))]

> >+  "ix86_binary_operator_ok (LSHIFTRT, <MODE>mode, operands)"

> >

> >Also split this one to QImode and HImode to avoid conditions in isa attribute.

> >

> >OK with this change.

> >

> 

> Thanks for the review, here's the patch I'm check in.


Works with my experimental patches, thanks!

Richard.

Patch

diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index 485e3f5b2cf..4aa28a5621c 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -222,6 +222,16 @@  (define_constraint "BC"
 	    (match_operand 0 "vector_all_ones_operand"))))
 
 ;; Integer constant constraints.
+(define_constraint "Wb"
+  "Integer constant in the range 0 @dots{} 7, for 8-bit shifts."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 7)")))
+
+(define_constraint "Ww"
+  "Integer constant in the range 0 @dots{} 15, for 16-bit shifts."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 15)")))
+
 (define_constraint "I"
   "Integer constant in the range 0 @dots{} 31, for 32-bit shifts."
   (and (match_code "const_int")
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 8b809c49fe0..c5f9bd4d4d8 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1136,6 +1136,7 @@  (define_mode_attr di [(SI "nF") (DI "Wd")])
 
 ;; Immediate operand constraint for shifts.
 (define_mode_attr S [(QI "I") (HI "I") (SI "I") (DI "J") (TI "O")])
+(define_mode_attr KS [(QI "Wb") (HI "Ww") (SI "I") (DI "J")])
 
 ;; Print register name in the specified mode.
 (define_mode_attr k [(QI "b") (HI "w") (SI "k") (DI "q")])
@@ -11088,9 +11089,9 @@  (define_insn "*bmi2_ashl<mode>3_1"
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*ashl<mode>3_1"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
-	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm")
-		      (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r")))
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k")
+	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k")
+		      (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r,<KS>")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)"
 {
@@ -11098,6 +11099,7 @@  (define_insn "*ashl<mode>3_1"
     {
     case TYPE_LEA:
     case TYPE_ISHIFTX:
+    case TYPE_MSKLOG:
       return "#";
 
     case TYPE_ALU:
@@ -11113,7 +11115,11 @@  (define_insn "*ashl<mode>3_1"
 	return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
-  [(set_attr "isa" "*,*,bmi2")
+  [(set_attr "isa" "*,*,bmi2,avx512bw")
    (set (attr "type")
      (cond [(eq_attr "alternative" "1")
 	      (const_string "lea")
@@ -11123,6 +11129,8 @@  (define_insn "*ashl<mode>3_1"
 		      (match_operand 0 "register_operand"))
 		 (match_operand 2 "const1_operand"))
 	      (const_string "alu")
+	    (eq_attr "alternative" "3")
+	      (const_string "msklog")
 	   ]
 	   (const_string "ishift")))
    (set (attr "length_immediate")
@@ -11218,15 +11226,16 @@  (define_split
   "operands[2] = gen_lowpart (SImode, operands[2]);")
 
 (define_insn "*ashlhi3_1"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp")
-	(ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l")
-		   (match_operand:QI 2 "nonmemory_operand" "cI,M")))
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k")
+	(ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k")
+		   (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (ASHIFT, HImode, operands)"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
+    case TYPE_MSKLOG:
       return "#";
 
     case TYPE_ALU:
@@ -11241,9 +11246,12 @@  (define_insn "*ashlhi3_1"
 	return "sal{w}\t{%2, %0|%0, %2}";
     }
 }
-  [(set (attr "type")
+  [(set_attr "isa" "*,*,avx512f")
+   (set (attr "type")
      (cond [(eq_attr "alternative" "1")
 	      (const_string "lea")
+	    (eq_attr "alternative" "2")
+	      (const_string "msklog")
             (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
 		      (match_operand 0 "register_operand"))
 		 (match_operand 2 "const1_operand"))
@@ -11259,18 +11270,19 @@  (define_insn "*ashlhi3_1"
 			   (match_test "optimize_function_for_size_p (cfun)")))))
        (const_string "0")
        (const_string "*")))
-   (set_attr "mode" "HI,SI")])
+   (set_attr "mode" "HI,SI,HI")])
 
 (define_insn "*ashlqi3_1"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp")
-	(ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l")
-		   (match_operand:QI 2 "nonmemory_operand" "cI,cI,M")))
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k")
+	(ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k")
+		   (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (ASHIFT, QImode, operands)"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
+    case TYPE_MSKLOG:
       return "#";
 
     case TYPE_ALU:
@@ -11298,9 +11307,12 @@  (define_insn "*ashlqi3_1"
 	}
     }
 }
-  [(set (attr "type")
+  [(set_attr "isa" "*,*,*,avx512dq")
+   (set (attr "type")
      (cond [(eq_attr "alternative" "2")
 	      (const_string "lea")
+	    (eq_attr "alternative" "3")
+	      (const_string "msklog")
             (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
 		      (match_operand 0 "register_operand"))
 		 (match_operand 2 "const1_operand"))
@@ -11316,7 +11334,7 @@  (define_insn "*ashlqi3_1"
 			   (match_test "optimize_function_for_size_p (cfun)")))))
        (const_string "0")
        (const_string "*")))
-   (set_attr "mode" "QI,SI,SI")
+   (set_attr "mode" "QI,SI,SI,QI")
    ;; Potential partial reg stall on alternative 1.
    (set (attr "preferred_for_speed")
      (cond [(eq_attr "alternative" "1")
@@ -11819,16 +11837,17 @@  (define_insn "*bmi2_<insn><mode>3_1"
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*<insn><mode>3_1"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,?k")
 	(any_shiftrt:SWI48
-	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
-	  (match_operand:QI 2 "nonmemory_operand" "c<S>,r")))
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,k")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>,r,<KS>")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_ISHIFTX:
+    case TYPE_MSKLOG:
       return "#";
 
     default:
@@ -11839,11 +11858,16 @@  (define_insn "*<insn><mode>3_1"
 	return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
-  [(set_attr "isa" "*,bmi2")
-   (set_attr "type" "ishift,ishiftx")
+  [(set_attr "isa" "*,bmi2,avx512bw")
+   (set_attr "type" "ishift,ishiftx,msklog")
+   (set (attr "enabled")
+	(if_then_else (eq_attr "alternative" "2")
+	  (symbol_ref "<CODE> == LSHIFTRT && TARGET_AVX512BW")
+	  (const_string "*")))
    (set (attr "length_immediate")
      (if_then_else
-       (and (match_operand 2 "const1_operand")
+       (and (and (match_operand 2 "const1_operand")
+		 (eq_attr "alternative" "0"))
 	    (ior (match_test "TARGET_SHIFT1")
 		 (match_test "optimize_function_for_size_p (cfun)")))
        (const_string "0")
@@ -11916,27 +11940,41 @@  (define_split
   "operands[2] = gen_lowpart (SImode, operands[2]);")
 
 (define_insn "*<insn><mode>3_1"
-  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m")
+  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m, ?k")
 	(any_shiftrt:SWI12
-	  (match_operand:SWI12 1 "nonimmediate_operand" "0")
-	  (match_operand:QI 2 "nonmemory_operand" "c<S>")))
+	  (match_operand:SWI12 1 "nonimmediate_operand" "0, k")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>, <KS>")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
 {
-  if (operands[2] == const1_rtx
-      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-    return "<shift>{<imodesuffix>}\t%0";
-  else
-    return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ISHIFT:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "<shift>{<imodesuffix>}\t%0";
+      else
+	return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
+    case TYPE_MSKLOG:
+      return "#";
+    default:
+      gcc_unreachable ();
+    }
 }
-  [(set_attr "type" "ishift")
+  [(set_attr "type" "ishift,msklog")
    (set (attr "length_immediate")
      (if_then_else
-       (and (match_operand 2 "const1_operand")
+       (and (and (match_operand 2 "const1_operand")
+		 (eq_attr "alternative" "0"))
 	    (ior (match_test "TARGET_SHIFT1")
 		 (match_test "optimize_function_for_size_p (cfun)")))
        (const_string "0")
        (const_string "*")))
+   (set (attr "enabled")
+	(if_then_else (eq_attr "alternative" "1")
+	  (symbol_ref "<CODE> == LSHIFTRT && TARGET_AVX512F
+		       && (<MODE>mode != QImode || TARGET_AVX512DQ)")
+	  (const_string "*")))
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*<insn><mode>3_1_slp"
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ab29999023d..f8759e4d758 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1755,6 +1755,20 @@  (define_insn "k<code><mode>"
    (set_attr "prefix" "vex")
    (set_attr "mode" "<MODE>")])
 
+(define_split
+  [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
+	(any_lshift:SWI1248_AVX512BW
+	  (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand")
+	  (match_operand 2 "const_int_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_AVX512F && reload_completed"
+  [(parallel
+     [(set (match_dup 0)
+	   (any_lshift:SWI1248_AVX512BW
+	     (match_dup 1)
+	     (match_dup 2)))
+      (unspec [(const_int 0)] UNSPEC_MASKOP)])])
+
 (define_insn "ktest<mode>"
   [(set (reg:CC FLAGS_REG)
 	(unspec:CC
diff --git a/gcc/testsuite/gcc.target/i386/mask-shift.c b/gcc/testsuite/gcc.target/i386/mask-shift.c
new file mode 100644
index 00000000000..4cb6ef37821
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/mask-shift.c
@@ -0,0 +1,83 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512dq -O2" } */
+
+#include<immintrin.h>
+void
+fooq (__m512i a, __m512i b, void* p)
+{
+  __mmask8 m1 = _mm512_cmpeq_epi64_mask (a, b);
+  m1 >>= 4;
+  _mm512_mask_storeu_epi64 (p, m1, a);
+}
+
+/* { dg-final { scan-assembler-times {(?n)kshiftrb} "1" } }  */
+
+void
+food (__m512i a, __m512i b, void* p)
+{
+  __mmask16 m1 = _mm512_cmpeq_epi32_mask (a, b);
+  m1 >>= 8;
+  _mm512_mask_storeu_epi32 (p, m1, a);
+}
+
+/* { dg-final { scan-assembler-times {(?n)kshiftrw} "1" } }  */
+
+void
+foow (__m512i a, __m512i b, void* p)
+{
+  __mmask32 m1 = _mm512_cmpeq_epi16_mask (a, b);
+  m1 >>= 16;
+  _mm512_mask_storeu_epi16 (p, m1, a);
+}
+
+/* { dg-final { scan-assembler-times {(?n)kshiftrd} "1" } }  */
+
+void
+foob (__m512i a, __m512i b, void* p)
+{
+  __mmask64 m1 = _mm512_cmpeq_epi8_mask (a, b);
+  m1 >>= 32;
+  _mm512_mask_storeu_epi8 (p, m1, a);
+}
+
+/* { dg-final { scan-assembler-times {(?n)kshiftrq} "1" { target { ! ia32 } } } }  */
+
+void
+fooq1 (__m512i a, __m512i b, void* p)
+{
+  __mmask8 m1 = _mm512_cmpeq_epi64_mask (a, b);
+  m1 <<= 4;
+  _mm512_mask_storeu_epi64 (p, m1, a);
+}
+
+/* { dg-final { scan-assembler-times {(?n)kshiftlb} "1" } }  */
+
+void
+food1 (__m512i a, __m512i b, void* p)
+{
+  __mmask16 m1 = _mm512_cmpeq_epi32_mask (a, b);
+  m1 <<= 8;
+  _mm512_mask_storeu_epi32 (p, m1, a);
+}
+
+/* { dg-final { scan-assembler-times {(?n)kshiftlw} "1" } }  */
+
+void
+foow1 (__m512i a, __m512i b, void* p)
+{
+  __mmask32 m1 = _mm512_cmpeq_epi16_mask (a, b);
+  m1 <<= 16;
+  _mm512_mask_storeu_epi16 (p, m1, a);
+}
+
+/* { dg-final { scan-assembler-times {(?n)kshiftld} "1" } }  */
+
+void
+foob1 (__m512i a, __m512i b, void* p)
+{
+  __mmask64 m1 = _mm512_cmpeq_epi8_mask (a, b);
+  m1 <<= 32;
+  _mm512_mask_storeu_epi8 (p, m1, a);
+}
+
+/* { dg-final { scan-assembler-times {(?n)kshiftlq} "1" { target { ! ia32 } } } }  */