[committed] amdgcn: Support basic DWARF

Message ID 1147df6c-559b-9f26-8980-630abb198230@codesourcery.com
State New
Headers show
Series
  • [committed] amdgcn: Support basic DWARF
Related show

Commit Message

Andrew Stubbs June 29, 2020, 11:05 a.m.
This patch configures the DWARF debug output to match the proposed DWARF 
specification from AMD.  This is already implemented in LLVM and rocgdb 
(out of tree).

This makes no attempt to support CFI, yet, and has some issues with 
vector registers. GCC will need to support some DWARF extensions to make 
those work properly (they're part of the AMD proposal).

Andrew

Comments

Andrew Stubbs July 16, 2020, 3:41 p.m. | #1
On 29/06/2020 12:05, Andrew Stubbs wrote:
> This patch configures the DWARF debug output to match the proposed DWARF 

> specification from AMD.  This is already implemented in LLVM and rocgdb 

> (out of tree).

> 

> This makes no attempt to support CFI, yet, and has some issues with 

> vector registers. GCC will need to support some DWARF extensions to make 

> those work properly (they're part of the AMD proposal).


This patch is now backported to devel/omp/gcc-10.

Andrew

Patch

amdgcn: Support basic DWARF

This is enough DWARF support for "-O0 -g" to work OK, within a single frame,
using the new rocgdb debugger from AMD.  Debugging with optimization enabled
also works when the values are located in SGPRs or memory.

Scalars in VGPRs are problematic, EXEC_HI and VCC_HI are unmappable, and
CFI remains unimplemented.

gcc/ChangeLog:

	* config/gcn/gcn-hsa.h (DBX_REGISTER_NUMBER): New macro.
	* config/gcn/gcn-protos.h (gcn_dwarf_register_number): New prototype.
	* config/gcn/gcn.c (gcn_expand_prologue): Add RTX_FRAME_RELATED_P
	and REG_FRAME_RELATED_EXPR to stack and frame pointer adjustments.
	(gcn_dwarf_register_number): New function.
	(gcn_dwarf_register_span): New function.
	(TARGET_DWARF_REGISTER_SPAN): New hook macro.

diff --git a/gcc/config/gcn/gcn-hsa.h b/gcc/config/gcn/gcn-hsa.h
index 4fd1365416f..cb291726e19 100644
--- a/gcc/config/gcn/gcn-hsa.h
+++ b/gcc/config/gcn/gcn-hsa.h
@@ -103,3 +103,4 @@  extern const char *last_arg_spec_function (int argc, const char **argv);
 #define DWARF2_DEBUGGING_INFO      1
 #define DWARF2_ASM_LINE_DEBUG_INFO 1
 #define EH_FRAME_THROUGH_COLLECT2  1
+#define DBX_REGISTER_NUMBER(REGNO) gcn_dwarf_register_number (REGNO)
diff --git a/gcc/config/gcn/gcn-protos.h b/gcc/config/gcn/gcn-protos.h
index e4dadd37f21..92b1a602610 100644
--- a/gcc/config/gcn/gcn-protos.h
+++ b/gcc/config/gcn/gcn-protos.h
@@ -23,6 +23,7 @@  extern bool gcn_can_split_p (machine_mode, rtx);
 extern bool gcn_constant64_p (rtx);
 extern bool gcn_constant_p (rtx);
 extern rtx gcn_convert_mask_mode (rtx reg);
+extern unsigned int gcn_dwarf_register_number (unsigned int regno);
 extern char * gcn_expand_dpp_shr_insn (machine_mode, const char *, int, int);
 extern void gcn_expand_epilogue ();
 extern rtx gcn_expand_scaled_offsets (addr_space_t as, rtx base, rtx offsets,
diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index 5693b75b672..babecef7888 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -2795,7 +2795,11 @@  gcn_expand_prologue ()
   if (!cfun || !cfun->machine || cfun->machine->normal_function)
     {
       rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
+      rtx sp_hi = gcn_operand_part (Pmode, sp, 1);
+      rtx sp_lo = gcn_operand_part (Pmode, sp, 0);
       rtx fp = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
+      rtx fp_hi = gcn_operand_part (Pmode, fp, 1);
+      rtx fp_lo = gcn_operand_part (Pmode, fp, 0);
 
       start_sequence ();
 
@@ -2812,14 +2816,40 @@  gcn_expand_prologue ()
 	+ offsets->callee_saves
 	+ offsets->local_vars + offsets->outgoing_args_size;
       if (sp_adjust > 0)
-	emit_insn (gen_adddi3_scc (sp, sp, gen_int_mode (sp_adjust, DImode)));
+	{
+	  /* Adding RTX_FRAME_RELATED_P effectively disables spliting, so
+	     we use split add explictly, and specify the DImode add in
+	     the note.  */
+	  rtx scc = gen_rtx_REG (BImode, SCC_REG);
+	  rtx adjustment = gen_int_mode (sp_adjust, SImode);
+	  rtx insn = emit_insn (gen_addsi3_scalar_carry (sp_lo, sp_lo,
+							 adjustment, scc));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			gen_rtx_SET (sp,
+				     gen_rtx_PLUS (DImode, sp, adjustment)));
+	  emit_insn (gen_addcsi3_scalar_zero (sp_hi, sp_hi, scc));
+	}
 
       if (offsets->need_frame_pointer)
-	emit_insn (gen_adddi3_scc (fp, sp,
-				   gen_int_mode
-				   (-(offsets->local_vars +
-				      offsets->outgoing_args_size),
-				    DImode)));
+	{
+	  /* Adding RTX_FRAME_RELATED_P effectively disables spliting, so
+	     we use split add explictly, and specify the DImode add in
+	     the note.  */
+	  rtx scc = gen_rtx_REG (BImode, SCC_REG);
+	  int fp_adjust = -(offsets->local_vars + offsets->outgoing_args_size);
+	  rtx adjustment = gen_int_mode (fp_adjust, SImode);
+	  rtx insn = emit_insn (gen_addsi3_scalar_carry(fp_lo, sp_lo,
+							adjustment, scc));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			gen_rtx_SET (fp,
+				     gen_rtx_PLUS (DImode, sp, adjustment)));
+	  emit_insn (gen_addcsi3_scalar (fp_hi, sp_hi,
+					 (fp_adjust < 0 ? GEN_INT (-1)
+					  : const0_rtx),
+					 scc, scc));
+	}
 
       rtx_insn *seq = get_insns ();
       end_sequence ();
@@ -2865,6 +2895,8 @@  gcn_expand_prologue ()
 
       /* Set up frame pointer and stack pointer.  */
       rtx sp = gen_rtx_REG (DImode, STACK_POINTER_REGNUM);
+      rtx sp_hi = simplify_gen_subreg (SImode, sp, DImode, 4);
+      rtx sp_lo = simplify_gen_subreg (SImode, sp, DImode, 0);
       rtx fp = gen_rtx_REG (DImode, HARD_FRAME_POINTER_REGNUM);
       rtx fp_hi = simplify_gen_subreg (SImode, fp, DImode, 4);
       rtx fp_lo = simplify_gen_subreg (SImode, fp, DImode, 0);
@@ -2880,10 +2912,28 @@  gcn_expand_prologue ()
       emit_insn (gen_addsi3_scalar_carry (fp_lo, fp_lo, wave_offset, scc));
       emit_insn (gen_addcsi3_scalar_zero (fp_hi, fp_hi, scc));
 
+      /* Adding RTX_FRAME_RELATED_P effectively disables spliting, so we use
+	 split add explictly, and specify the DImode add in the note.
+         The DWARF info expects that the callee-save data is in the frame,
+         even though it isn't (because this is the entry point), so we
+         make a notional adjustment to the DWARF frame offset here.  */
+      rtx dbg_adjustment = gen_int_mode (sp_adjust + offsets->callee_saves,
+					 DImode);
+      rtx insn;
       if (sp_adjust > 0)
-	emit_insn (gen_adddi3_scc (sp, fp, gen_int_mode (sp_adjust, DImode)));
+	{
+	  rtx scc = gen_rtx_REG (BImode, SCC_REG);
+	  rtx adjustment = gen_int_mode (sp_adjust, DImode);
+	  insn = emit_insn (gen_addsi3_scalar_carry(sp_lo, fp_lo, adjustment,
+						    scc));
+	  emit_insn (gen_addcsi3_scalar_zero (sp_hi, fp_hi, scc));
+	}
       else
-	emit_move_insn (sp, fp);
+	insn = emit_move_insn (sp, fp);
+      RTX_FRAME_RELATED_P (insn) = 1;
+      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+		    gen_rtx_SET (sp, gen_rtx_PLUS (DImode, sp,
+						   dbg_adjustment)));
 
       /* Make sure the flat scratch reg doesn't get optimised away.  */
       emit_insn (gen_prologue_use (gen_rtx_REG (DImode, FLAT_SCRATCH_REG)));
@@ -6108,6 +6158,64 @@  print_operand (FILE *file, rtx x, int code)
   gcc_unreachable ();
 }
 
+/* Implement DBX_REGISTER_NUMBER macro.
+ 
+   Return the DWARF register number that corresponds to the GCC internal
+   REGNO.  */
+
+unsigned int
+gcn_dwarf_register_number (unsigned int regno)
+{
+  /* Registers defined in DWARF.  */
+  if (regno == EXEC_LO_REG)
+    return 17;
+  /* We need to use a more complex DWARF expression for this
+  else if (regno == EXEC_HI_REG)
+    return 17; */
+  else if (regno == VCC_LO_REG)
+    return 768;
+  /* We need to use a more complex DWARF expression for this
+  else if (regno == VCC_HI_REG)
+    return 768;  */
+  else if (regno == SCC_REG)
+    return 128;
+  else if (SGPR_REGNO_P (regno))
+    {
+      if (regno - FIRST_SGPR_REG < 64)
+	return (regno - FIRST_SGPR_REG + 32);
+      else
+	return (regno - FIRST_SGPR_REG + 1024);
+    }
+  else if (VGPR_REGNO_P (regno))
+    return (regno - FIRST_VGPR_REG + 2560);
+
+  /* Otherwise, there's nothing sensible to do.  */
+  return regno + 100000;
+}
+
+/* Implement TARGET_DWARF_REGISTER_SPAN.
+ 
+   DImode and Vector DImode require additional registers.  */
+
+static rtx
+gcn_dwarf_register_span (rtx rtl)
+{
+  machine_mode mode = GET_MODE (rtl);
+
+  if (VECTOR_MODE_P (mode))
+    mode = GET_MODE_INNER (mode);
+
+  if (GET_MODE_SIZE (mode) != 8)
+    return NULL_RTX;
+
+  rtx p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+  unsigned regno = REGNO (rtl);
+  XVECEXP (p, 0, 0) = gen_rtx_REG (SImode, regno);
+  XVECEXP (p, 0, 1) = gen_rtx_REG (SImode, regno + 1);
+
+  return p;
+}
+
 /* }}}  */
 /* {{{ TARGET hook overrides.  */
 
@@ -6156,6 +6264,8 @@  print_operand (FILE *file, rtx x, int code)
 #define TARGET_CONSTANT_ALIGNMENT gcn_constant_alignment
 #undef  TARGET_DEBUG_UNWIND_INFO
 #define TARGET_DEBUG_UNWIND_INFO gcn_debug_unwind_info
+#undef  TARGET_DWARF_REGISTER_SPAN
+#define TARGET_DWARF_REGISTER_SPAN gcn_dwarf_register_span
 #undef  TARGET_EMUTLS_VAR_INIT
 #define TARGET_EMUTLS_VAR_INIT gcn_emutls_var_init
 #undef  TARGET_EXPAND_BUILTIN