i386: Implement mmx_pblendv to optimize SSE conditional moves [PR98218]

Message ID CAFULd4Ztko2sU-+hNY4Nsx5+F6Dr5TbhUL3Hxcjx7MDiznrQaw@mail.gmail.com
State New
Headers show
Series
  • i386: Implement mmx_pblendv to optimize SSE conditional moves [PR98218]
Related show

Commit Message

Martin Sebor via Gcc-patches May 7, 2021, 3:18 p.m.
Implement mmx_pblendv to optimize V8HI, V4HI and V2SI mode
conditional moves for SSE4.1 targets.

2021-05-07  UroŇ° Bizjak  <ubizjak@gmail.com>

gcc/
    PR target/98218
    * config/i386/i386-expand.c (ix86_expand_sse_movcc):
    Handle V8QI, V4HI and V2SI modes.
    * config/i386/mmx.md (mmx_pblendvb): New insn pattern.
    * config/i386/sse.md (unspec): Move UNSPEC_BLENDV ...
    * config/i386/i386.md (unspec): ... here.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Pushed to master.

Uros.

Patch

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 61b2f921f41..e9f11bca78a 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -3702,6 +3702,19 @@  ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
 	  op_true = force_reg (mode, op_true);
 	}
       break;
+    case E_V8QImode:
+    case E_V4HImode:
+    case E_V2SImode:
+      if (TARGET_SSE4_1)
+	{
+	  gen = gen_mmx_pblendvb;
+	  if (mode != V8QImode)
+	    d = gen_reg_rtx (V8QImode);
+	  op_false = gen_lowpart (V8QImode, op_false);
+	  op_true = gen_lowpart (V8QImode, op_true);
+	  cmp = gen_lowpart (V8QImode, cmp);
+	}
+      break;
     case E_V16QImode:
     case E_V8HImode:
     case E_V4SImode:
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index f79fd122f56..74e924f3c04 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -118,6 +118,7 @@  (define_c_enum "unspec" [
   UNSPEC_FIX_NOTRUNC
   UNSPEC_MASKMOV
   UNSPEC_MOVMSK
+  UNSPEC_BLENDV
   UNSPEC_RCP
   UNSPEC_RSQRT
   UNSPEC_PSADBW
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 295501dec2f..f08570856f9 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1700,6 +1700,26 @@  (define_expand "vcond_mask_<mode><mode>"
   DONE;
 })
 
+(define_insn "mmx_pblendvb"
+  [(set (match_operand:V8QI 0 "register_operand" "=Yr,*x,x")
+	(unspec:V8QI
+	  [(match_operand:V8QI 1 "register_operand" "0,0,x")
+	   (match_operand:V8QI 2 "register_operand" "Yr,*x,x")
+	   (match_operand:V8QI 3 "register_operand" "Yz,Yz,x")]
+	  UNSPEC_BLENDV))]
+  "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
+  "@
+   pblendvb\t{%3, %2, %0|%0, %2, %3}
+   pblendvb\t{%3, %2, %0|%0, %2, %3}
+   vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "isa" "noavx,noavx,avx")
+   (set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "*,*,1")
+   (set_attr "prefix" "orig,orig,vex")
+   (set_attr "btver2_decode" "vector")
+   (set_attr "mode" "TI")])
+
 ;; XOP parallel XMM conditional moves
 (define_insn "*xop_pcmov_<mode>"
   [(set (match_operand:MMXMODEI 0 "register_operand" "=x")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 897cf3eaea9..244fb13e97a 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -39,7 +39,6 @@  (define_c_enum "unspec" [
   UNSPEC_INSERTQ
 
   ;; For SSE4.1 support
-  UNSPEC_BLENDV
   UNSPEC_INSERTPS
   UNSPEC_DP
   UNSPEC_MOVNTDQA