[i386] : Remove support to STV variable DImode shifts (PR 89902, PR89903)

Message ID CAFULd4a49h_0rAvmShuq0OjaaHb1hy_gUTbpmRboTrzZukeiJg@mail.gmail.com
State New
Headers show
Series
  • [i386] : Remove support to STV variable DImode shifts (PR 89902, PR89903)
Related show

Commit Message

Uros Bizjak April 2, 2019, 8:15 a.m.
Hello!

As exposed by corner cases in PR 89902 and PR 89903, we can't always
reliably convert variable DImode shifts from a scalar to a vector
instruction. The problem is in count register of a SSE vector shift,
where the full DImode value is considered as a shift argument. Scalar
operations with general register set use size-masked value of a QImode
count register, so we have to zero-extend count operand from QImode to
DImode if we want to fully emulate scalar instruction with a vector
instruction.
The STV infrastructure does not correctly handle corner cases where
shift operand and count operand are matched, e.g.: "psllq %xmm0,
%xmm0", it substitutes every instance with its V2DImode subreg
representation. It can happen that a DImode result of a previous
vector operation is passed as a count operand without clearing high
bits outside QImode subreg, leading to runtime failures (n.b.: SSE
shifts don't mask count operand).

Attached patch removes unreliable support to STV variable DImode shifts.

2019-04-02  UroŇ° Bizjak  <ubizjak@gmail.com>

    PR target/89902
    PR target/89903
    * config/i386/i386.c (dimode_scalar_to_vector_candidate_p):
    Return false for variable DImode shifts.
    (dimode_scalar_chain::compute_convert_gain): Do not handle
    register count operand in variable DImode shifts.
    (dimode_scalar_chain::make_vector_copies): Remove support to copy
    count argument of a variable shift instruction to a vector register.
    (dimode_scalar_chain::convert_reg): Remove support to convert
    count argument of a variable shift instruction.

testsuite/ChangeLog:

2019-04-02  UroŇ° Bizjak  <ubizjak@gmail.com>

    PR target/89902
    PR target/89903
    * gcc.target/i386/pr70799-4.c: Remove.
    * gcc.target/i386/pr70799-5.c: Remove.
    * gcc.target/i386/pr89902.c: New test.
    * gcc.target/i386/pr89903.c: Ditto.

Patch was bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

I plan to commit the patch later today to mainline and gcc-8 branch.

Uros.

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 6926085fdd2f..a7544946e0ad 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -1058,16 +1058,8 @@  dimode_scalar_to_vector_candidate_p (rtx_insn *insn)
 
     case ASHIFT:
     case LSHIFTRT:
-      if (!REG_P (XEXP (src, 1))
-	  && (!SUBREG_P (XEXP (src, 1))
-	      || SUBREG_BYTE (XEXP (src, 1)) != 0
-	      || !REG_P (SUBREG_REG (XEXP (src, 1))))
-	  && (!CONST_INT_P (XEXP (src, 1))
-	      || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, 63)))
-	return false;
-
-      if (GET_MODE (XEXP (src, 1)) != QImode
-	  && !CONST_INT_P (XEXP (src, 1)))
+      if (!CONST_INT_P (XEXP (src, 1))
+	  || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, 63))
 	return false;
       break;
 
@@ -1664,15 +1656,10 @@  dimode_scalar_chain::compute_convert_gain ()
 	{
     	  if (CONST_INT_P (XEXP (src, 0)))
 	    gain -= vector_const_cost (XEXP (src, 0));
-	  if (CONST_INT_P (XEXP (src, 1)))
-	    {
-	      gain += ix86_cost->shift_const;
-	      if (INTVAL (XEXP (src, 1)) >= 32)
-		gain -= COSTS_N_INSNS (1);
-	    }
-	  else
-	    /* Additional gain for omitting two CMOVs.  */
-	    gain += ix86_cost->shift_var + COSTS_N_INSNS (2);
+
+	  gain += ix86_cost->shift_const;
+	  if (INTVAL (XEXP (src, 1)) >= 32)
+	    gain -= COSTS_N_INSNS (1);
 	}
       else if (GET_CODE (src) == PLUS
 	       || GET_CODE (src) == MINUS
@@ -1788,60 +1775,14 @@  dimode_scalar_chain::make_vector_copies (unsigned regno)
 {
   rtx reg = regno_reg_rtx[regno];
   rtx vreg = gen_reg_rtx (DImode);
-  bool count_reg = false;
   df_ref ref;
 
   for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
     if (!bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
       {
-	df_ref use;
-
-	/* Detect the count register of a shift instruction.  */
-	for (use = DF_REG_USE_CHAIN (regno); use; use = DF_REF_NEXT_REG (use))
-	  if (bitmap_bit_p (insns, DF_REF_INSN_UID (use)))
-	    {
-	      rtx_insn *insn = DF_REF_INSN (use);
-	      rtx def_set = single_set (insn);
-
-	      gcc_assert (def_set);
-
-	      rtx src = SET_SRC (def_set);
-
-	      if ((GET_CODE (src) == ASHIFT
-		   || GET_CODE (src) == ASHIFTRT
-		   || GET_CODE (src) == LSHIFTRT)
-		  && !CONST_INT_P (XEXP (src, 1))
-		  && reg_or_subregno (XEXP (src, 1)) == regno)
-		count_reg = true;
-	    }
-
 	start_sequence ();
-	if (count_reg)
-	  {
-	    rtx qreg = gen_lowpart (QImode, reg);
-	    rtx tmp = gen_reg_rtx (SImode);
 
-	    if (TARGET_ZERO_EXTEND_WITH_AND
-		&& optimize_function_for_speed_p (cfun))
-	      {
-		emit_move_insn (tmp, const0_rtx);
-		emit_insn (gen_movstrictqi
-			   (gen_lowpart (QImode, tmp), qreg));
-	      }
-	    else
-	      emit_insn (gen_rtx_SET
-			 (tmp, gen_rtx_ZERO_EXTEND (SImode, qreg)));
-
-	    if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
-	      {
-		rtx slot = assign_386_stack_local (SImode, SLOT_STV_TEMP);
-		emit_move_insn (slot, tmp);
-		tmp = copy_rtx (slot);
-	      }
-
-	    emit_insn (gen_zero_extendsidi2 (vreg, tmp));
-	  }
-	else if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
+	if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
 	  {
 	    rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP);
 	    emit_move_insn (adjust_address (tmp, SImode, 0),
@@ -1889,25 +1830,8 @@  dimode_scalar_chain::make_vector_copies (unsigned regno)
     if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
       {
 	rtx_insn *insn = DF_REF_INSN (ref);
-	if (count_reg)
-	  {
-	    rtx def_set = single_set (insn);
-	    gcc_assert (def_set);
 
-	    rtx src = SET_SRC (def_set);
-
-	    if ((GET_CODE (src) == ASHIFT
-		 || GET_CODE (src) == ASHIFTRT
-		 || GET_CODE (src) == LSHIFTRT)
-		&& !CONST_INT_P (XEXP (src, 1))
-		&& reg_or_subregno (XEXP (src, 1)) == regno)
-	      {
-		XEXP (src, 0) = replace_with_subreg (XEXP (src, 0), reg, reg);
-		XEXP (src, 1) = vreg;
-	      }
-	  }
-	else
-	  replace_with_subreg_in_insn (insn, reg, vreg);
+	replace_with_subreg_in_insn (insn, reg, vreg);
 
 	if (dump_file)
 	  fprintf (dump_file, "  Replaced r%d with r%d in insn %d\n",
@@ -2010,43 +1934,7 @@  dimode_scalar_chain::convert_reg (unsigned regno)
 	    rtx src = SET_SRC (def_set);
 	    rtx dst = SET_DEST (def_set);
 
-	    if ((GET_CODE (src) == ASHIFT
-		 || GET_CODE (src) == ASHIFTRT
-		 || GET_CODE (src) == LSHIFTRT)
-		&& !CONST_INT_P (XEXP (src, 1))
-		&& reg_or_subregno (XEXP (src, 1)) == regno)
-	      {
-		rtx tmp2 = gen_reg_rtx (V2DImode);
-
-		start_sequence ();
-
-		if (TARGET_SSE4_1)
-		  emit_insn (gen_sse4_1_zero_extendv2qiv2di2
-			     (tmp2, gen_rtx_SUBREG (V16QImode, reg, 0)));
-		else
-		  {
-		    rtx vec_cst
-		      = gen_rtx_CONST_VECTOR (V2DImode,
-					      gen_rtvec (2, GEN_INT (0xff),
-							 const0_rtx));
-		    vec_cst
-		      = validize_mem (force_const_mem (V2DImode, vec_cst));
-
-		    emit_insn (gen_rtx_SET
-			       (tmp2,
-				gen_rtx_AND (V2DImode,
-					     gen_rtx_SUBREG (V2DImode, reg, 0),
-					     vec_cst)));
-		  }
-		rtx_insn *seq = get_insns ();
-		end_sequence ();
-
-		emit_insn_before (seq, insn);
-
-		XEXP (src, 0) = replace_with_subreg (XEXP (src, 0), reg, reg);
-		XEXP (src, 1) = gen_rtx_SUBREG (DImode, tmp2, 0);
-	      }
-	    else if (!MEM_P (dst) || !REG_P (src))
+	    if (!MEM_P (dst) || !REG_P (src))
 	      replace_with_subreg_in_insn (insn, reg, reg);
 
 	    bitmap_clear_bit (conv, INSN_UID (insn));