[2/5] MSP430: Implement TARGET_RTX_COSTS

Message ID 20200723154959.6hszuzththygc2tn@jozef-acer-manjaro
State New
Headers show
Series
  • MSP430: Implement macros to describe relative costs of operations
Related show

Commit Message

Jozef Lawrynowicz July 23, 2020, 3:49 p.m.
Costs of MSP430 instructions are mostly just a function of the type and number
of operands. In these cases, TARGET_RTX_COSTS just needs to examine the
operands to calculate the cost of the expression.

For more complicated operations where library helper functions are required,
if the cost cannot be accurately calculated, it is estimated and
disparaged relative to the cost of a single instruction.
From f5cbd8967d9c64a4ea6eb9fb8846b4361e16e396 Mon Sep 17 00:00:00 2001
From: Jozef Lawrynowicz <jozef.l@mittosystems.com>

Date: Thu, 16 Jul 2020 11:28:59 +0100
Subject: [PATCH 2/5] MSP430: Implement TARGET_RTX_COSTS

Costs of MSP430 instructions are mostly just a function of the type and number
of operands. In these cases, TARGET_RTX_COSTS just needs to examine the
operands to calculate the cost of the expression.

For more complicated operations where library helper functions are required,
if the cost cannot be accurately calculated, it is estimated and
disparaged relative to the cost of a single instruction.

gcc/ChangeLog:

	* config/msp430/msp430.c (use_helper_for_const_shift): Add forward
	declaration.
	Remove unused argument.
	(struct msp430_multlib_costs): New struct.
	(msp430_is_mem_indirect): New function.
	(msp430_costs): Likewise.
	(msp430_shift_costs): Likewise.
	(msp430_muldiv_costs): Likewise.
	(msp430_get_inner_dest_code): Likewise.
	(msp430_single_op_cost): Likewise.
	(msp430_rtx_costs): Rewrite from scratch.
	(msp430_expand_shift): Adjust use_helper_for_const_shift call.
---
 gcc/config/msp430/msp430.c | 545 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 530 insertions(+), 15 deletions(-)

-- 
2.27.0

Patch

diff --git a/gcc/config/msp430/msp430.c b/gcc/config/msp430/msp430.c
index 9e739233fa0..81ee5075a57 100644
--- a/gcc/config/msp430/msp430.c
+++ b/gcc/config/msp430/msp430.c
@@ -49,6 +49,9 @@ 
 #include "msp430-devices.h"
 #include "incpath.h"
 #include "prefix.h"
+#include "insn-config.h"
+#include "insn-attr.h"
+#include "recog.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -56,6 +59,7 @@ 
 
 static void msp430_compute_frame_info (void);
 static bool use_32bit_hwmult (void);
+static bool use_helper_for_const_shift (machine_mode mode, HOST_WIDE_INT amt);
 
 
 
@@ -1132,6 +1136,28 @@  static const struct double_op_cost size_cost_double_op =
   2, 2, 3
 };
 
+struct msp430_multlib_costs
+{
+  const int mulhi;
+  const int mulsi;
+  const int muldi;
+};
+
+/* There is no precise size cost when using libcalls, instead it is disparaged
+   relative to other instructions.
+   The cycle costs are from the CALL to the RET, inclusive.
+   FIXME muldi cost is not accurate.  */
+static const struct msp430_multlib_costs cycle_cost_multlib_32bit =
+{
+  27, 33, 66
+};
+
+/* 32bit multiply takes a few more instructions on 16bit hwmult.  */
+static const struct msp430_multlib_costs cycle_cost_multlib_16bit =
+{
+  27, 42, 66
+};
+
 /* TARGET_REGISTER_MOVE_COST
    There is only one class of general-purpose, non-fixed registers, and the
    relative cost of moving data between them is always the same.
@@ -1174,29 +1200,516 @@  msp430_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
    because there are no conditional move insns - when a condition is involved,
    the only option is to use a cbranch.  */
 
+/* For X, which must be a MEM RTX, return TRUE if it is an indirect memory
+   reference, @Rn or @Rn+.  */
+static bool
+msp430_is_mem_indirect (rtx x)
+{
+  gcc_assert (GET_CODE (x) == MEM);
+  rtx op0 = XEXP (x, 0);
+  return (GET_CODE (op0) == REG || GET_CODE (op0) == POST_INC);
+}
+
+/* Costs of MSP430 instructions are generally based on the addressing mode
+   combination of the source and destination operands.
+   Given source operand SRC (which may be NULL to indicate a single-operand
+   instruction) and destination operand DST return the cost of this
+   expression.  */
+static int
+msp430_costs (rtx src, rtx dst, bool speed, rtx outer_rtx)
+{
+  enum rtx_code src_code = GET_CODE (src);
+  enum rtx_code dst_code = GET_CODE (dst);
+  enum rtx_code outer_code = GET_CODE (outer_rtx);
+  machine_mode outer_mode = GET_MODE (outer_rtx);
+  const struct double_op_cost *cost_p;
+  cost_p = (speed ? &cycle_cost_double_op : &size_cost_double_op);
+
+  if (outer_code == TRUNCATE
+      && (outer_mode == QImode
+	  || outer_mode == HImode
+	  || outer_mode == PSImode))
+    /* Truncation to these modes is normally free as a side effect of the
+       instructions themselves.  */
+    return 0;
+
+  if (dst_code == SYMBOL_REF
+      || dst_code == LABEL_REF
+      || dst_code == CONST_INT)
+    /* Catch RTX like (minus (const_int 0) (reg)) but don't add any cost.  */
+    return 0;
+
+  if (debug_rtx_costs && dst_code != REG && dst_code != MEM && dst_code != PC)
+    {
+      fprintf (stderr, "msp430_costs unhandled dst operand:\n");
+      debug_rtx (dst);
+      fprintf (stderr, "from:\n");
+      debug_rtx (outer_rtx);
+    }
+
+  switch (src_code)
+    {
+    case REG:
+      return (dst_code == REG ? cost_p->r2r
+	      : (dst_code == PC ? cost_p->r2pc : cost_p->r2m));
+
+    case CONST_INT:
+    case SYMBOL_REF:
+    case LABEL_REF:
+    case CONST:
+      return (dst_code == REG ? cost_p->imm2r
+	      : (dst_code == PC ? cost_p->imm2pc : cost_p->imm2m));
+
+
+    case MEM:
+      if (msp430_is_mem_indirect (src))
+	return (dst_code == REG ? cost_p->ind2r : (dst_code == PC
+						   ? cost_p->ind2pc
+						   : cost_p->ind2m));
+      else
+	return (dst_code == REG ? cost_p->mem2r	: (dst_code == PC
+						   ? cost_p->mem2pc
+						   : cost_p->mem2m));
+    default:
+      if (debug_rtx_costs)
+	{
+	  fprintf (stderr, "msp430_costs unhandled src operand:\n");
+	  debug_rtx (src);
+	  fprintf (stderr, "from:\n");
+	  debug_rtx (outer_rtx);
+	}
+      return cost_p->mem2m;
+    }
+}
+
+/* Given source operand SRC and destination operand DST from the shift or
+   rotate RTX OUTER_RTX, return the cost of performing that shift, assuming
+   optimization for speed when SPEED is true.  */
+static int
+msp430_shift_costs (rtx src, rtx dst, bool speed, rtx outer_rtx)
+{
+  int amt;
+  enum rtx_code src_code = GET_CODE (src);
+  enum rtx_code dst_code = GET_CODE (dst);
+  const struct single_op_cost *cost_p;
+
+  cost_p = (speed ? &cycle_cost_single_op : &size_cost_single_op);
+
+  if (debug_rtx_costs
+      && dst_code != REG
+      && dst_code != MEM
+      && dst_code != CONST
+      && dst_code != SYMBOL_REF
+      && dst_code != CONST_INT)
+    {
+      fprintf (stderr, "msp430_shift_costs unhandled dst operand:\n");
+      debug_rtx (dst);
+    }
+
+  if (debug_rtx_costs
+      && src_code != CONST_INT
+      && src_code != REG)
+    {
+      fprintf (stderr, "msp430_shift_costs unhandled src operand:\n");
+      debug_rtx (src);
+    }
+
+  if (src_code != CONST_INT)
+    /* The size or speed cost when the shift amount is unknown cannot be
+       accurately calculated, so just disparage it slightly.  */
+    return 2 * msp430_costs (src, dst, speed, outer_rtx);
+
+  if (use_helper_for_const_shift (GET_MODE (outer_rtx), amt = INTVAL (src)))
+    {
+      /* GCC sometimes tries to perform shifts in some very inventive ways,
+	 resulting in much larger code size usage than necessary, if
+	 they are disparaged too much here.  So in general, if
+	 use_helper_for_const_shift thinks a helper should be used, obey
+	 that and don't disparage the shift any more than a regular
+	 instruction, even though the shift may actually cost more.
+	 This ensures that the RTL generated at the initial expand pass has the
+	 expected shift instructions, which can be mapped to the helper
+	 functions.  */
+      return msp430_costs (src, dst, speed, outer_rtx);
+    }
+
+  if (!msp430x)
+    {
+      /* Each shift by one place will be emitted individually.  */
+      switch (dst_code)
+	{
+	case REG:
+	case CONST_INT:
+	  return amt * cost_p->reg;
+	case MEM:
+	  if (msp430_is_mem_indirect (dst))
+	    return amt * cost_p->ind;
+	  else
+	    return amt * cost_p->mem;
+	default:
+	  return amt * cost_p->mem;
+	}
+    }
+
+  /* RRAM, RRCM, RRUM, RLAM are used for shift counts <= 4, otherwise, the 'X'
+     versions are used.
+     Instructions which shift a MEM operand will never actually be output.  It
+     will always be copied into a register to allow for efficient shifting.  So
+     the cost just takes into account the cost of an additional copy in that
+     case.  */
+  return (amt <= 4 ? (speed ? amt : 1) : (speed ? amt + 1 : 2)
+	  + (dst_code == REG ? 0
+	     : msp430_costs (dst, gen_rtx_REG (HImode, 10), speed, outer_rtx)));
+}
+
+/* Given source operand SRC and destination operand DST from the MULT/DIV/MOD
+   RTX OUTER_RTX, return the cost of performing that operation, assuming
+   optimization for speed when SPEED is true.  */
+static int
+msp430_muldiv_costs (rtx src, rtx dst, bool speed, rtx outer_rtx,
+		     machine_mode outer_mode)
+{
+  enum rtx_code outer_code = GET_CODE (outer_rtx);
+  const struct msp430_multlib_costs *cost_p;
+  bool hwmult_16bit = (msp430_has_hwmult () && !(msp430_use_f5_series_hwmult ()
+						 || use_32bit_hwmult ()));
+  cost_p = (hwmult_16bit
+	    ? &cycle_cost_multlib_32bit
+	    : &cycle_cost_multlib_16bit);
+
+  int factor = 1;
+  /* Only used in some calculations.  */
+  int mode_factor = 1;
+  if (outer_mode == SImode)
+    mode_factor = 2;
+  else if (outer_mode == PSImode)
+    /* PSImode multiplication is performed using SImode operands, so has extra
+       cost to factor in the conversions necessary before/after the
+       operation.  */
+    mode_factor = 3;
+  else if (outer_mode == DImode)
+    mode_factor = 4;
+
+  if (!speed)
+    {
+      /* The codesize cost of using a helper function to perform the
+	 multiplication or division cannot be accurately calculated, since the
+	 cost depends on how many times the operation is performed in the
+	 entire program.  */
+      if (outer_code != MULT)
+	/* Division is always expensive.  */
+	factor = 7;
+      else if (((hwmult_16bit && outer_mode != DImode)
+		   || use_32bit_hwmult () || msp430_use_f5_series_hwmult ()))
+	/* When the hardware multiplier is available, only disparage
+	   slightly.  */
+	factor = 2;
+      else
+	factor = 5;
+      return factor * mode_factor * msp430_costs (src, dst, speed, outer_rtx);
+    }
+
+  /* When there is hardware multiply support, there is a relatively low, fixed
+     cycle cost to performing any multiplication, but when there is no hardware
+     multiply support it is very costly.  That precise cycle cost has not been
+     calculated here.
+     Division is extra slow since it always uses a software library.
+     The 16-bit hardware multiply library cannot be used to produce 64-bit
+     results.  */
+  if (outer_code != MULT || !msp430_has_hwmult ()
+      || (outer_mode == DImode && hwmult_16bit))
+    {
+      factor = (outer_code == MULT ? 50 : 70);
+      return factor * mode_factor * msp430_costs (src, dst, speed, outer_rtx);
+    }
+
+  switch (outer_mode)
+    {
+    case E_QImode:
+    case E_HImode:
+      /* Include the cost of copying the operands into and out of the hardware
+	 multiply routine.  */
+      return cost_p->mulhi + (3 * msp430_costs (src, dst, speed, outer_rtx));
+
+    case E_PSImode:
+      /* Extra factor for the conversions necessary to do PSI->SI before the
+	 operation.  */
+      factor = 2;
+      /* fallthru.  */
+    case E_SImode:
+      return factor * (cost_p->mulsi
+		       + (6 * msp430_costs (src, dst, speed, outer_rtx)));
+
+    case E_DImode:
+    default:
+      return cost_p->muldi + (12 * msp430_costs (src, dst, speed, outer_rtx));
+    }
+}
+
+/* Recurse within X to find the actual destination operand of the expression.
+   For example:
+   (plus (ashift (minus (ashift (reg)
+   (const_int) ......
+   should return the reg RTX.  */
+static rtx
+msp430_get_inner_dest_code (rtx x)
+{
+  enum rtx_code code = GET_CODE (x);
+  rtx op0 = XEXP (x, 0);
+  switch (code)
+    {
+    case REG:
+    case SYMBOL_REF:
+    case CONST_INT:
+    case CONST:
+    case LABEL_REF:
+      return x;
+
+    case MEM:
+      /* Return the MEM expr not the inner REG for these cases.  */
+      switch (GET_CODE (op0))
+	{
+	case REG:
+	case SYMBOL_REF:
+	case LABEL_REF:
+	case CONST:
+	case POST_INC:
+	  return x;
+
+	case PLUS:
+	  /* return MEM (PLUS (REG) (CONST)) */
+	  if (GET_CODE (XEXP (op0, 0)) == REG)
+	    {
+	      if (GET_CODE (XEXP (op0, 1)) == CONST_INT
+		  || GET_CODE (XEXP (op0, 1)) == CONST
+		  || GET_CODE (XEXP (op0, 1)) == LABEL_REF
+		  || GET_CODE (XEXP (op0, 1)) == SYMBOL_REF)
+		return x;
+	      else
+		return msp430_get_inner_dest_code (op0);
+	    }
+	  return msp430_get_inner_dest_code (op0);
+
+	default:
+	  if (GET_RTX_FORMAT (code)[0] != 'e')
+	    return x;
+	  return msp430_get_inner_dest_code (op0);
+	}
+      break;
+
+    default:
+      if (op0 == NULL_RTX)
+	gcc_unreachable ();
+      else
+	{
+	  if (GET_RTX_FORMAT (code)[0] != 'e'
+	      && code != ENTRY_VALUE)
+	    return x;
+	  return msp430_get_inner_dest_code (op0);
+	}
+    }
+}
+
+/* Calculate the cost of an MSP430 single-operand instruction, for operand DST
+   within the RTX OUTER_RTX, optimizing for speed if SPEED is true.  */
+static int
+msp430_single_op_cost (rtx dst, bool speed, rtx outer_rtx)
+{
+  enum rtx_code dst_code = GET_CODE (dst);
+  const struct single_op_cost *cost_p;
+  const struct double_op_cost *double_op_cost_p;
+
+  cost_p = (speed ? &cycle_cost_single_op : &size_cost_single_op);
+  double_op_cost_p = (speed ? &cycle_cost_double_op : &size_cost_double_op);
+
+  switch (dst_code)
+    {
+    case REG:
+      return cost_p->reg;
+    case MEM:
+      if (msp430_is_mem_indirect (dst))
+	return cost_p->ind;
+      else
+	return cost_p->mem;
+
+    case CONST_INT:
+    case CONST_FIXED:
+    case CONST_DOUBLE:
+    case SYMBOL_REF:
+    case CONST:
+      /* A constant value would need to be copied into a register first.  */
+      return double_op_cost_p->imm2r + cost_p->reg;
+
+    default:
+      if (debug_rtx_costs)
+	{
+	  fprintf (stderr, "msp430_single_op_cost unhandled "
+		   "dst operand:\n");
+	  debug_rtx (dst);
+	  fprintf (stderr, "from:\n");
+	  debug_rtx (outer_rtx);
+	}
+      return cost_p->mem;
+    }
+}
+
 #undef  TARGET_RTX_COSTS
 #define TARGET_RTX_COSTS msp430_rtx_costs
 
-static bool msp430_rtx_costs (rtx	   x ATTRIBUTE_UNUSED,
-			      machine_mode mode,
-			      int	   outer_code ATTRIBUTE_UNUSED,
-			      int	   opno ATTRIBUTE_UNUSED,
-			      int *	   total,
-			      bool	   speed ATTRIBUTE_UNUSED)
+/* This target hook describes the relative costs of RTL expressions.
+   The function recurses to just before the lowest level of the expression,
+   when both of the operands of the expression can be examined at the same time.
+   This is because the cost of the expression depends on the specific
+   addressing mode combination of the operands.
+   The hook returns true when all subexpressions of X have been processed, and
+   false when rtx_cost should recurse.  */
+static bool
+msp430_rtx_costs (rtx x,
+		  machine_mode mode,
+		  int	   outer_code ATTRIBUTE_UNUSED,
+		  int	   opno ATTRIBUTE_UNUSED,
+		  int *	   total,
+		  bool	   speed)
 {
-  int code = GET_CODE (x);
+  enum rtx_code code = GET_CODE (x);
+  rtx dst, src;
+  rtx dst_inner, src_inner;
+
+  *total = 0;
+  dst = XEXP (x, 0);
+  if (GET_RTX_LENGTH (code) == 1)
+    /* Some RTX that are single-op in GCC are double-op when translated to
+       MSP430 instructions e.g NOT, NEG, ZERO_EXTEND.  */
+    src = dst;
+  else
+    src = XEXP (x, 1);
+
 
   switch (code)
     {
+    case SET:
+      /* Ignoring SET improves codesize.  */
+      if (!speed)
+	return true;
+      /* fallthru.  */
+    case PLUS:
+      if (outer_code == MEM)
+	/* Do not add any cost for the plus itself, but recurse in case there
+	   are more complicated RTX inside.  */
+	return false;
+      /* fallthru.  */
+    case MINUS:
+    case AND:
+    case IOR:
+    case XOR:
+    case NOT:
+    case ZERO_EXTEND:
+    case TRUNCATE:
+    case NEG:
+    case ZERO_EXTRACT:
+    case SIGN_EXTRACT:
+    case IF_THEN_ELSE:
+      dst_inner = msp430_get_inner_dest_code (dst);
+      src_inner = msp430_get_inner_dest_code (src);
+      *total = COSTS_N_INSNS (msp430_costs (src_inner, dst_inner, speed, x));
+      if (mode == SImode)
+	*total *= 2;
+      if (mode == DImode)
+	*total *= 4;
+      return false;
+
+    case ROTATE:
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      dst_inner = msp430_get_inner_dest_code (dst);
+      src_inner = msp430_get_inner_dest_code (src);
+      *total = COSTS_N_INSNS (msp430_shift_costs (src_inner, dst_inner,
+						  speed, x));
+      if (mode == SImode)
+	*total *= 2;
+      if (mode == DImode)
+	*total *= 4;
+      return false;
+
+    case MULT:
+    case DIV:
+    case MOD:
+    case UDIV:
+    case UMOD:
+      dst_inner = msp430_get_inner_dest_code (dst);
+      src_inner = msp430_get_inner_dest_code (src);
+      *total = COSTS_N_INSNS (msp430_muldiv_costs (src_inner, dst_inner, speed,
+						   x, mode));
+      return false;
+
+    case CALL:
     case SIGN_EXTEND:
-      if (mode == SImode && outer_code == SET)
+      dst_inner = msp430_get_inner_dest_code (dst);
+      *total = COSTS_N_INSNS (msp430_single_op_cost (dst_inner, speed, x));
+      if (mode == SImode)
+	*total *= 2;
+      if (mode == DImode)
+	*total *= 4;
+      return false;
+
+    case CONST_INT:
+    case CONST_FIXED:
+    case CONST_DOUBLE:
+    case SYMBOL_REF:
+    case CONST:
+    case LABEL_REF:
+    case REG:
+    case PC:
+    case POST_INC:
+      if (mode == SImode)
+	*total = COSTS_N_INSNS (2);
+      else if (mode == DImode)
+	*total = COSTS_N_INSNS (4);
+      return true;
+
+    case MEM:
+      /* PSImode operands are expensive when in memory.  */
+      if (mode == PSImode)
+	*total = COSTS_N_INSNS (1);
+      else if (mode == SImode)
+	*total = COSTS_N_INSNS (2);
+      else if (mode == DImode)
+	*total = COSTS_N_INSNS (4);
+      /* Recurse into the MEM.  */
+      return false;
+
+    case EQ:
+    case NE:
+    case GT:
+    case GTU:
+    case GE:
+    case GEU:
+    case LT:
+    case LTU:
+    case LE:
+    case LEU:
+      /* Conditions are mostly equivalent, changing their relative
+	 costs has no effect.  */
+      return false;
+
+    case ASM_OPERANDS:
+    case ASM_INPUT:
+    case CLOBBER:
+    case COMPARE:
+    case CONCAT:
+    case ENTRY_VALUE:
+      /* Other unhandled expressions.  */
+      return false;
+
+    default:
+      if (debug_rtx_costs)
 	{
-	  *total = COSTS_N_INSNS (4);
-	  return true;
+	  fprintf (stderr, "\nUnhandled RTX\n");
+	  debug_rtx (x);
 	}
-      break;
+      return false;
     }
-  return false;
 }
 
 /* Function Entry and Exit */
@@ -2915,8 +3428,7 @@  msp430_expand_helper (rtx *operands, const char *helper_name,
 /* Return TRUE if the helper function should be used and FALSE if the shifts
    insns should be emitted inline.  */
 static bool
-use_helper_for_const_shift (enum rtx_code code, machine_mode mode,
-			    HOST_WIDE_INT amt)
+use_helper_for_const_shift (machine_mode mode, HOST_WIDE_INT amt)
 {
   const int default_inline_shift = 4;
   /* We initialize the option to 65 so we know if the user set it or not.  */
@@ -2927,6 +3439,9 @@  use_helper_for_const_shift (enum rtx_code code, machine_mode mode,
      the heuristic accordingly.  */
   int max_inline_32 = max_inline / 2;
 
+  if (mode == E_DImode)
+    return true;
+
   /* Don't use helpers for these modes on 430X, when optimizing for speed, or
      when emitting a small number of insns.  */
   if ((mode == E_QImode || mode == E_HImode || mode == E_PSImode)
@@ -2964,7 +3479,7 @@  msp430_expand_shift (enum rtx_code code, machine_mode mode, rtx *operands)
      constant.  */
   if (!CONST_INT_P (operands[2])
       || mode == E_DImode
-      || use_helper_for_const_shift (code, mode, INTVAL (operands[2])))
+      || use_helper_for_const_shift (mode, INTVAL (operands[2])))
     {
       const char *helper_name = NULL;
       /* The const variants of mspabi shifts have significantly larger code