[v2,2/8] Handle copy relocations

Message ID 20190801170412.5553-3-tromey@adacore.com
State Superseded
Headers show
Series
  • Handle copy relocations and add $_ada_exception
Related show

Commit Message

Tom Tromey Aug. 1, 2019, 5:04 p.m.
In ELF, if a data symbol is defined in a shared library and used by
the main program, it will be subject to a "copy relocation".  In this
scenario, the main program has a copy of the symbol in question, and a
relocation that tells ld.so to copy the data from the shared library.
Then the symbol in the main program is used to satisfy all references.

This patch changes gdb to handle this scenario.  Data symbols coming
from ELF shared libraries get a special flag that indicates that the
symbol's address may be subject to copy relocation.

I looked briefly into handling copy relocations by looking at the
actual relocations in the main program, but this seemed difficult to
do with BFD.

Note that no caching is done here.  Perhaps this could be changed if
need be; I wanted to avoid possible problems with either objfile
lifetimes and changes, or conflicts with the long-term (vapor-ware)
objfile splitting project.

gdb/ChangeLog
2019-08-01  Tom Tromey  <tromey@adacore.com>

	* symmisc.c (dump_msymbols): Don't use MSYMBOL_VALUE_ADDRESS.
	* ada-lang.c (lesseq_defined_than): Handle
	LOC_STATIC.
	* dwarf2read.c (dwarf2_per_objfile): Add can_copy
	parameter.
	(dwarf2_has_info): Likewise.
	(new_symbol): Set maybe_copied on symbol when
	appropriate.
	* dwarf2read.h (dwarf2_per_objfile): Add can_copy
	parameter.
	<can_copy>: New member.
	* elfread.c (record_minimal_symbol): Set maybe_copied
	on symbol when appropriate.
	(elf_symfile_read): Update call to dwarf2_has_info.
	* minsyms.c (lookup_minimal_symbol_linkage): New
	function.
	* minsyms.h (lookup_minimal_symbol_linkage): Declare.
	* symtab.c (get_symbol_address, get_msymbol_address):
	New functions.
	* symtab.h (get_symbol_address, get_msymbol_address):
	Declare.
	(SYMBOL_VALUE_ADDRESS, MSYMBOL_VALUE_ADDRESS): Handle
	maybe_copied.
	(struct symbol, struct minimal_symbol) <maybe_copied>:
	New member.
---
 gdb/ChangeLog    | 28 ++++++++++++++++++++++++++++
 gdb/ada-lang.c   |  9 +++++++++
 gdb/dwarf2read.c | 44 ++++++++++++++++++++++++++------------------
 gdb/dwarf2read.h | 10 ++++++++--
 gdb/elfread.c    | 16 +++++++++++-----
 gdb/minsyms.c    | 20 ++++++++++++++++++++
 gdb/minsyms.h    |  7 +++++++
 gdb/symfile.h    |  3 ++-
 gdb/symmisc.c    | 10 +++++++---
 gdb/symtab.c     | 44 ++++++++++++++++++++++++++++++++++++++++++++
 gdb/symtab.h     | 42 +++++++++++++++++++++++++++++++++++++++---
 11 files changed, 201 insertions(+), 32 deletions(-)

-- 
2.20.1

Comments

Andrew Burgess Aug. 21, 2019, 3:35 p.m. | #1
* Tom Tromey <tromey@adacore.com> [2019-08-01 11:04:06 -0600]:

> In ELF, if a data symbol is defined in a shared library and used by

> the main program, it will be subject to a "copy relocation".  In this

> scenario, the main program has a copy of the symbol in question, and a

> relocation that tells ld.so to copy the data from the shared library.

> Then the symbol in the main program is used to satisfy all references.

> 

> This patch changes gdb to handle this scenario.  Data symbols coming

> from ELF shared libraries get a special flag that indicates that the

> symbol's address may be subject to copy relocation.

> 

> I looked briefly into handling copy relocations by looking at the

> actual relocations in the main program, but this seemed difficult to

> do with BFD.

> 

> Note that no caching is done here.  Perhaps this could be changed if

> need be; I wanted to avoid possible problems with either objfile

> lifetimes and changes, or conflicts with the long-term (vapor-ware)

> objfile splitting project.

> 

> gdb/ChangeLog

> 2019-08-01  Tom Tromey  <tromey@adacore.com>

> 

> 	* symmisc.c (dump_msymbols): Don't use MSYMBOL_VALUE_ADDRESS.

> 	* ada-lang.c (lesseq_defined_than): Handle

> 	LOC_STATIC.

> 	* dwarf2read.c (dwarf2_per_objfile): Add can_copy

> 	parameter.

> 	(dwarf2_has_info): Likewise.

> 	(new_symbol): Set maybe_copied on symbol when

> 	appropriate.

> 	* dwarf2read.h (dwarf2_per_objfile): Add can_copy

> 	parameter.

> 	<can_copy>: New member.

> 	* elfread.c (record_minimal_symbol): Set maybe_copied

> 	on symbol when appropriate.

> 	(elf_symfile_read): Update call to dwarf2_has_info.

> 	* minsyms.c (lookup_minimal_symbol_linkage): New

> 	function.

> 	* minsyms.h (lookup_minimal_symbol_linkage): Declare.

> 	* symtab.c (get_symbol_address, get_msymbol_address):

> 	New functions.

> 	* symtab.h (get_symbol_address, get_msymbol_address):

> 	Declare.

> 	(SYMBOL_VALUE_ADDRESS, MSYMBOL_VALUE_ADDRESS): Handle

> 	maybe_copied.

> 	(struct symbol, struct minimal_symbol) <maybe_copied>:

> 	New member.

> ---

>  gdb/ChangeLog    | 28 ++++++++++++++++++++++++++++

>  gdb/ada-lang.c   |  9 +++++++++

>  gdb/dwarf2read.c | 44 ++++++++++++++++++++++++++------------------

>  gdb/dwarf2read.h | 10 ++++++++--

>  gdb/elfread.c    | 16 +++++++++++-----

>  gdb/minsyms.c    | 20 ++++++++++++++++++++

>  gdb/minsyms.h    |  7 +++++++

>  gdb/symfile.h    |  3 ++-

>  gdb/symmisc.c    | 10 +++++++---

>  gdb/symtab.c     | 44 ++++++++++++++++++++++++++++++++++++++++++++

>  gdb/symtab.h     | 42 +++++++++++++++++++++++++++++++++++++++---

>  11 files changed, 201 insertions(+), 32 deletions(-)

> 

> diff --git a/gdb/ada-lang.c b/gdb/ada-lang.c

> index 7a5cc4272c6..4e1ee31887a 100644

> --- a/gdb/ada-lang.c

> +++ b/gdb/ada-lang.c

> @@ -4734,6 +4734,15 @@ lesseq_defined_than (struct symbol *sym0, struct symbol *sym1)

>      case LOC_CONST:

>        return SYMBOL_VALUE (sym0) == SYMBOL_VALUE (sym1)

>          && equiv_types (SYMBOL_TYPE (sym0), SYMBOL_TYPE (sym1));

> +

> +    case LOC_STATIC:

> +      {

> +        const char *name0 = SYMBOL_LINKAGE_NAME (sym0);

> +        const char *name1 = SYMBOL_LINKAGE_NAME (sym1);

> +        return (strcmp (name0, name1) == 0

> +                && SYMBOL_VALUE_ADDRESS (sym0) == SYMBOL_VALUE_ADDRESS (sym1));

> +      }

> +

>      default:

>        return 0;

>      }

> diff --git a/gdb/dwarf2read.c b/gdb/dwarf2read.c

> index d3b9d64f342..90dfde0f1e9 100644

> --- a/gdb/dwarf2read.c

> +++ b/gdb/dwarf2read.c

> @@ -2130,8 +2130,10 @@ attr_value_as_address (struct attribute *attr)

>  /* See declaration.  */

>  

>  dwarf2_per_objfile::dwarf2_per_objfile (struct objfile *objfile_,

> -					const dwarf2_debug_sections *names)

> -  : objfile (objfile_)

> +					const dwarf2_debug_sections *names,

> +					bool can_copy_)

> +  : objfile (objfile_),

> +    can_copy (can_copy_)

>  {

>    if (names == NULL)

>      names = &dwarf2_elf_names;

> @@ -2206,11 +2208,14 @@ private:

>  /* Try to locate the sections we need for DWARF 2 debugging

>     information and return true if we have enough to do something.

>     NAMES points to the dwarf2 section names, or is NULL if the standard

> -   ELF names are used.  */

> +   ELF names are used.  CAN_COPY is true for formats where symbol

> +   interposition is possible and so symbol values must follow copy

> +   relocation rules.  */

>  

>  int

>  dwarf2_has_info (struct objfile *objfile,

> -                 const struct dwarf2_debug_sections *names)

> +                 const struct dwarf2_debug_sections *names,

> +		 bool can_copy)

>  {

>    if (objfile->flags & OBJF_READNEVER)

>      return 0;

> @@ -2220,7 +2225,8 @@ dwarf2_has_info (struct objfile *objfile,

>  

>    if (dwarf2_per_objfile == NULL)

>      dwarf2_per_objfile = dwarf2_objfile_data_key.emplace (objfile, objfile,

> -							  names);

> +							  names,

> +							  can_copy);

>  

>    return (!dwarf2_per_objfile->info.is_virtual

>  	  && dwarf2_per_objfile->info.s.section != NULL

> @@ -21646,19 +21652,21 @@ new_symbol (struct die_info *die, struct type *type, struct dwarf2_cu *cu,

>  		}

>  	      else if (attr2 && (DW_UNSND (attr2) != 0))

>  		{

> -		  /* Workaround gfortran PR debug/40040 - it uses

> -		     DW_AT_location for variables in -fPIC libraries which may

> -		     get overriden by other libraries/executable and get

> -		     a different address.  Resolve it by the minimal symbol

> -		     which may come from inferior's executable using copy

> -		     relocation.  Make this workaround only for gfortran as for

> -		     other compilers GDB cannot guess the minimal symbol

> -		     Fortran mangling kind.  */

> -		  if (cu->language == language_fortran && die->parent

> -		      && die->parent->tag == DW_TAG_module

> -		      && cu->producer

> -		      && startswith (cu->producer, "GNU Fortran"))

> -		    SYMBOL_ACLASS_INDEX (sym) = LOC_UNRESOLVED;

> +		  if (SYMBOL_CLASS (sym) == LOC_STATIC

> +		      && (objfile->flags & OBJF_MAINLINE) == 0

> +		      && dwarf2_per_objfile->can_copy)

> +		    {

> +		      /* A global static variable might be subject to

> +			 copy relocation.  We first check for a local

> +			 minsym, though, because maybe the symbol was

> +			 marked hidden, in which case this would not

> +			 apply.  */

> +		      minimal_symbol *found

> +			= (lookup_minimal_symbol_linkage

> +			   (SYMBOL_LINKAGE_NAME (sym), objfile));

> +		      if (found != nullptr)

> +			sym->maybe_copied = 1;

> +		    }

>  

>  		  /* A variable with DW_AT_external is never static,

>  		     but it may be block-scoped.  */

> diff --git a/gdb/dwarf2read.h b/gdb/dwarf2read.h

> index 8939f97af53..94cfc652e3e 100644

> --- a/gdb/dwarf2read.h

> +++ b/gdb/dwarf2read.h

> @@ -104,9 +104,12 @@ struct dwarf2_per_objfile

>  {

>    /* Construct a dwarf2_per_objfile for OBJFILE.  NAMES points to the

>       dwarf2 section names, or is NULL if the standard ELF names are

> -     used.  */

> +     used.  CAN_COPY is true for formats where symbol

> +     interposition is possible and so symbol values must follow copy

> +     relocation rules.  */

>    dwarf2_per_objfile (struct objfile *objfile,

> -		      const dwarf2_debug_sections *names);

> +		      const dwarf2_debug_sections *names,

> +		      bool can_copy);

>  

>    ~dwarf2_per_objfile ();

>  

> @@ -206,6 +209,9 @@ public:

>       original data was compressed using 'dwz -m'.  */

>    std::unique_ptr<struct dwz_file> dwz_file;

>  

> +  /* Whether copy relocations are supported by this object format.  */

> +  bool can_copy;

> +

>    /* A flag indicating whether this objfile has a section loaded at a

>       VMA of 0.  */

>    bool has_section_at_zero = false;

> diff --git a/gdb/elfread.c b/gdb/elfread.c

> index 630550b80dc..b2fade4124b 100644

> --- a/gdb/elfread.c

> +++ b/gdb/elfread.c

> @@ -203,10 +203,16 @@ record_minimal_symbol (minimal_symbol_reader &reader,

>        || ms_type == mst_text_gnu_ifunc)

>      address = gdbarch_addr_bits_remove (gdbarch, address);

>  

> -  return reader.record_full (name, name_len, copy_name, address,

> -			     ms_type,

> -			     gdb_bfd_section_index (objfile->obfd,

> -						    bfd_section));

> +  struct minimal_symbol *result

> +    = reader.record_full (name, name_len, copy_name, address,

> +			  ms_type,

> +			  gdb_bfd_section_index (objfile->obfd,

> +						 bfd_section));

> +  if ((objfile->flags & OBJF_MAINLINE) == 0

> +      && (ms_type == mst_data || ms_type == mst_bss))

> +    result->maybe_copied = 1;

> +

> +  return result;

>  }

>  

>  /* Read the symbol table of an ELF file.

> @@ -1239,7 +1245,7 @@ elf_symfile_read (struct objfile *objfile, symfile_add_flags symfile_flags)

>  				bfd_section_size (abfd, str_sect));

>      }

>  

> -  if (dwarf2_has_info (objfile, NULL))

> +  if (dwarf2_has_info (objfile, NULL, true))

>      {

>        dw_index_kind index_kind;

>  

> diff --git a/gdb/minsyms.c b/gdb/minsyms.c

> index 0f734ebc761..6f4afa979f9 100644

> --- a/gdb/minsyms.c

> +++ b/gdb/minsyms.c

> @@ -519,6 +519,26 @@ iterate_over_minimal_symbols

>  

>  /* See minsyms.h.  */

>  

> +struct minimal_symbol *

> +lookup_minimal_symbol_linkage (const char *name, struct objfile *objf)

> +{

> +  unsigned int hash = msymbol_hash (name) % MINIMAL_SYMBOL_HASH_SIZE;

> +

> +  for (minimal_symbol *msymbol = objf->per_bfd->msymbol_hash[hash];

> +       msymbol != NULL;

> +       msymbol = msymbol->hash_next)

> +    {

> +      if (strcmp (MSYMBOL_LINKAGE_NAME (msymbol), name) == 0

> +	  && (MSYMBOL_TYPE (msymbol) == mst_data

> +	      || MSYMBOL_TYPE (msymbol) == mst_bss))

> +	return msymbol;

> +    }

> +

> +  return nullptr;

> +}


I found the name chosen for this function rather confusing.  When
comparing to say 'lookup_minimal_symbol_text' which does
sort-of-almost the same thing but for text type symbols, I might have
been tempted to go with 'lookup_minimal_symbol_data'.  Am I missing
something behind the choice of function name?

Thanks,
Andrew


> +

> +/* See minsyms.h.  */

> +

>  struct bound_minimal_symbol

>  lookup_minimal_symbol_text (const char *name, struct objfile *objf)

>  {

> diff --git a/gdb/minsyms.h b/gdb/minsyms.h

> index bb43165620d..fae6cd25496 100644

> --- a/gdb/minsyms.h

> +++ b/gdb/minsyms.h

> @@ -193,6 +193,13 @@ struct bound_minimal_symbol lookup_minimal_symbol (const char *,

>  

>  struct bound_minimal_symbol lookup_bound_minimal_symbol (const char *);

>  

> +/* Look through the minimal symbols in OBJF for a global (not

> +   file-local) minsym whose linkage name is NAME.  Returns a minimal

> +   symbol that matches, or nullptr otherwise.  */

> +

> +struct minimal_symbol *lookup_minimal_symbol_linkage

> +  (const char *name, struct objfile *objf);

> +

>  /* Look through all the current minimal symbol tables and find the

>     first minimal symbol that matches NAME and has text type.  If OBJF

>     is non-NULL, limit the search to that objfile.  Returns a bound

> diff --git a/gdb/symfile.h b/gdb/symfile.h

> index 741b085e0c4..170308f10d5 100644

> --- a/gdb/symfile.h

> +++ b/gdb/symfile.h

> @@ -585,7 +585,8 @@ struct dwarf2_debug_sections {

>  };

>  

>  extern int dwarf2_has_info (struct objfile *,

> -                            const struct dwarf2_debug_sections *);

> +                            const struct dwarf2_debug_sections *,

> +			    bool = false);

>  

>  /* Dwarf2 sections that can be accessed by dwarf2_get_section_info.  */

>  enum dwarf2_section_enum {

> diff --git a/gdb/symmisc.c b/gdb/symmisc.c

> index 7666de390cd..db93d716704 100644

> --- a/gdb/symmisc.c

> +++ b/gdb/symmisc.c

> @@ -236,9 +236,13 @@ dump_msymbols (struct objfile *objfile, struct ui_file *outfile)

>  	  break;

>  	}

>        fprintf_filtered (outfile, "[%2d] %c ", index, ms_type);

> -      fputs_filtered (paddress (gdbarch, MSYMBOL_VALUE_ADDRESS (objfile,

> -								msymbol)),

> -		      outfile);

> +

> +      /* Use the relocated address as shown in the symbol here -- do

> +	 not try to respect copy relocations.  */

> +      CORE_ADDR addr = (msymbol->value.address

> +			+ ANOFFSET (objfile->section_offsets,

> +				    msymbol->section));

> +      fputs_filtered (paddress (gdbarch, addr), outfile);

>        fprintf_filtered (outfile, " %s", MSYMBOL_LINKAGE_NAME (msymbol));

>        if (section)

>  	{

> diff --git a/gdb/symtab.c b/gdb/symtab.c

> index 87a0c8e4da6..0ff212e0d97 100644

> --- a/gdb/symtab.c

> +++ b/gdb/symtab.c

> @@ -6068,6 +6068,50 @@ symbol_set_symtab (struct symbol *symbol, struct symtab *symtab)

>    symbol->owner.symtab = symtab;

>  }

>  

> +/* See symtab.h.  */

> +

> +CORE_ADDR

> +get_symbol_address (const struct symbol *sym)

> +{

> +  gdb_assert (sym->maybe_copied);

> +  gdb_assert (SYMBOL_CLASS (sym) == LOC_STATIC);

> +

> +  const char *linkage_name = SYMBOL_LINKAGE_NAME (sym);

> +

> +  for (objfile *objfile : current_program_space->objfiles ())

> +    {

> +      minimal_symbol *minsym

> +	= lookup_minimal_symbol_linkage (linkage_name, objfile);

> +      if (minsym != nullptr)

> +	return MSYMBOL_VALUE_ADDRESS (objfile, minsym);

> +    }

> +  return sym->ginfo.value.address;

> +}

> +

> +/* See symtab.h.  */

> +

> +CORE_ADDR

> +get_msymbol_address (struct objfile *objf, const struct minimal_symbol *minsym)

> +{

> +  gdb_assert (minsym->maybe_copied);

> +  gdb_assert ((objf->flags & OBJF_MAINLINE) == 0);

> +

> +  const char *linkage_name = MSYMBOL_LINKAGE_NAME (minsym);

> +

> +  for (objfile *objfile : current_program_space->objfiles ())

> +    {

> +      if ((objfile->flags & OBJF_MAINLINE) != 0)

> +	{

> +	  minimal_symbol *found

> +	    = lookup_minimal_symbol_linkage (linkage_name, objfile);

> +	  if (found != nullptr)

> +	    return MSYMBOL_VALUE_ADDRESS (objfile, found);

> +	}

> +    }

> +  return (minsym->value.address

> +	  + ANOFFSET (objf->section_offsets, minsym->section));

> +}

> +

>  

>  

>  void

> diff --git a/gdb/symtab.h b/gdb/symtab.h

> index 9f1791ba63c..dd5a1299b9b 100644

> --- a/gdb/symtab.h

> +++ b/gdb/symtab.h

> @@ -454,6 +454,14 @@ extern const char *symbol_get_demangled_name

>  

>  extern CORE_ADDR symbol_overlayed_address (CORE_ADDR, struct obj_section *);

>  

> +/* Return the address of SYM.  The MAYBE_COPIED flag must be set on

> +   SYM.  If SYM appears in the main program's minimal symbols, then

> +   that minsym's address is returned; otherwise, SYM's address is

> +   returned.  This should generally only be used via the

> +   SYMBOL_VALUE_ADDRESS macro.  */

> +

> +extern CORE_ADDR get_symbol_address (const struct symbol *sym);

> +

>  /* Note that all the following SYMBOL_* macros are used with the

>     SYMBOL argument being either a partial symbol or

>     a full symbol.  Both types have a ginfo field.  In particular

> @@ -463,7 +471,9 @@ extern CORE_ADDR symbol_overlayed_address (CORE_ADDR, struct obj_section *);

>     field only, instead of the SYMBOL parameter.  */

>  

>  #define SYMBOL_VALUE(symbol)		(symbol)->ginfo.value.ivalue

> -#define SYMBOL_VALUE_ADDRESS(symbol)	((symbol)->ginfo.value.address + 0)

> +#define SYMBOL_VALUE_ADDRESS(symbol)			      \

> +  (((symbol)->maybe_copied) ? get_symbol_address (symbol)     \

> +   : ((symbol)->ginfo.value.address))

>  #define SET_SYMBOL_VALUE_ADDRESS(symbol, new_value)	\

>    ((symbol)->ginfo.value.address = (new_value))

>  #define SYMBOL_VALUE_BYTES(symbol)	(symbol)->ginfo.value.bytes

> @@ -671,6 +681,14 @@ struct minimal_symbol : public general_symbol_info

>       the object file format may not carry that piece of information.  */

>    unsigned int has_size : 1;

>  

> +  /* For data symbols only, if this is set, then the symbol might be

> +     subject to copy relocation.  In this case, a minimal symbol

> +     matching the symbol's linkage name is first looked for in the

> +     main objfile.  If found, then that address is used; otherwise the

> +     address in this symbol is used.  */

> +

> +  unsigned maybe_copied : 1;

> +

>    /* Minimal symbols with the same hash key are kept on a linked

>       list.  This is the link.  */

>  

> @@ -690,6 +708,15 @@ struct minimal_symbol : public general_symbol_info

>    bool text_p () const;

>  };

>  

> +/* Return the address of MINSYM, which comes from OBJF.  The

> +   MAYBE_COPIED flag must be set on MINSYM.  If MINSYM appears in the

> +   main program's minimal symbols, then that minsym's address is

> +   returned; otherwise, MINSYM's address is returned.  This should

> +   generally only be used via the MSYMBOL_VALUE_ADDRESS macro.  */

> +

> +extern CORE_ADDR get_msymbol_address (struct objfile *objf,

> +				      const struct minimal_symbol *minsym);

> +

>  #define MSYMBOL_TARGET_FLAG_1(msymbol)  (msymbol)->target_flag_1

>  #define MSYMBOL_TARGET_FLAG_2(msymbol)  (msymbol)->target_flag_2

>  #define MSYMBOL_SIZE(msymbol)		((msymbol)->size + 0)

> @@ -708,8 +735,9 @@ struct minimal_symbol : public general_symbol_info

>  /* The relocated address of the minimal symbol, using the section

>     offsets from OBJFILE.  */

>  #define MSYMBOL_VALUE_ADDRESS(objfile, symbol)				\

> -  ((symbol)->value.address					\

> -   + ANOFFSET ((objfile)->section_offsets, ((symbol)->section)))

> +  (((symbol)->maybe_copied) ? get_msymbol_address (objfile, symbol)	\

> +   : ((symbol)->value.address						\

> +      + ANOFFSET ((objfile)->section_offsets, ((symbol)->section))))

>  /* For a bound minsym, we can easily compute the address directly.  */

>  #define BMSYMBOL_VALUE_ADDRESS(symbol) \

>    MSYMBOL_VALUE_ADDRESS ((symbol).objfile, (symbol).minsym)

> @@ -1112,6 +1140,14 @@ struct symbol

>    /* Whether this is an inlined function (class LOC_BLOCK only).  */

>    unsigned is_inlined : 1;

>  

> +  /* For LOC_STATIC only, if this is set, then the symbol might be

> +     subject to copy relocation.  In this case, a minimal symbol

> +     matching the symbol's linkage name is first looked for in the

> +     main objfile.  If found, then that address is used; otherwise the

> +     address in this symbol is used.  */

> +

> +  unsigned maybe_copied : 1;

> +

>    /* The concrete type of this symbol.  */

>  

>    ENUM_BITFIELD (symbol_subclass_kind) subclass : 2;

> -- 

> 2.20.1

>
Tom Tromey Aug. 21, 2019, 7:11 p.m. | #2
>>>>> "Andrew" == Andrew Burgess <andrew.burgess@embecosm.com> writes:


>> +struct minimal_symbol *

>> +lookup_minimal_symbol_linkage (const char *name, struct objfile *objf)


Andrew> I found the name chosen for this function rather confusing.  When
Andrew> comparing to say 'lookup_minimal_symbol_text' which does
Andrew> sort-of-almost the same thing but for text type symbols, I might have
Andrew> been tempted to go with 'lookup_minimal_symbol_data'.  Am I missing
Andrew> something behind the choice of function name?

You aren't missing anything.

lookup_minimal_symbol_data seems maybe confusing too, because
lookup_minimal_symbol_text loops over all objfiles, but this one does not.

I guess I don't care all that much (as evidenced by the original choice
of name ;) so if you still like that better, I'll make the change.

Tom
Andrew Burgess Aug. 22, 2019, 8:41 a.m. | #3
* Tom Tromey <tromey@adacore.com> [2019-08-21 13:11:54 -0600]:

> >>>>> "Andrew" == Andrew Burgess <andrew.burgess@embecosm.com> writes:

> 

> >> +struct minimal_symbol *

> >> +lookup_minimal_symbol_linkage (const char *name, struct objfile *objf)

> 

> Andrew> I found the name chosen for this function rather confusing.  When

> Andrew> comparing to say 'lookup_minimal_symbol_text' which does

> Andrew> sort-of-almost the same thing but for text type symbols, I might have

> Andrew> been tempted to go with 'lookup_minimal_symbol_data'.  Am I missing

> Andrew> something behind the choice of function name?

> 

> You aren't missing anything.

> 

> lookup_minimal_symbol_data seems maybe confusing too, because

> lookup_minimal_symbol_text loops over all objfiles, but this one

> does not.


Indeed, and the return type is different.  I also notice that
lookup_minimal_symbol_text can be restricted to a single objfile,
however in that case we _still_ iterate over all object files.

So then I thought, surely we can do better, maybe add a single object
iterator and have lookup_minimal_symbol_text not walk through all
object files.... but then I spotted this condition:

    if (objf == NULL || objf == objfile
        || objf == objfile->separate_debug_objfile_backlink)
      {
          /* Process object file.  */
      }

So, in this case we have to scan all objfiles in case we have split
debug information.  Wouldn't this apply to your new function too?

I guess I'm thinking that we could merge the core of
lookup_minimal_symbol_text and lookup_minimal_symbol_linkage (or
_data), and just pass in a comparison function to check for either
text or data symbols.

> 

> I guess I don't care all that much (as evidenced by the original choice

> of name ;) so if you still like that better, I'll make the change.


Meh! I guess my only request would be can the comment at least explain
that it's similar to *_text but for data as currently neither the
function name nor the comment mention data at all.

Thanks,
Andrew
Tom Tromey Sept. 19, 2019, 5:44 p.m. | #4
>>>>> "Andrew" == Andrew Burgess <andrew.burgess@embecosm.com> writes:


Andrew> So then I thought, surely we can do better, maybe add a single object
Andrew> iterator and have lookup_minimal_symbol_text not walk through all
Andrew> object files.... but then I spotted this condition:

Andrew>     if (objf == NULL || objf == objfile
Andrew>         || objf == objfile->separate_debug_objfile_backlink)
Andrew>       {
Andrew>           /* Process object file.  */
Andrew>       }

Andrew> So, in this case we have to scan all objfiles in case we have split
Andrew> debug information.  Wouldn't this apply to your new function too?

TBH I was not sure.  Can we really have a minimal symbol that appears in
the separate debug info but not in the main objfile?  I guess maybe it's
possible if one strips the main objfile.

I'll change my patch.

Andrew> I guess I'm thinking that we could merge the core of
Andrew> lookup_minimal_symbol_text and lookup_minimal_symbol_linkage (or
Andrew> _data), and just pass in a comparison function to check for either
Andrew> text or data symbols.

Sounds good, I'll try that.  There is already a separate debug iterator,
so maybe that can be used as well.

>> I guess I don't care all that much (as evidenced by the original choice

>> of name ;) so if you still like that better, I'll make the change.


Andrew> Meh! I guess my only request would be can the comment at least explain
Andrew> that it's similar to *_text but for data as currently neither the
Andrew> function name nor the comment mention data at all.

I'll fix that up too.

Tom
Tom Tromey Sept. 19, 2019, 6:27 p.m. | #5
>>>>> "Tom" == Tom Tromey <tom@tromey.com> writes:


Andrew> I guess I'm thinking that we could merge the core of
Andrew> lookup_minimal_symbol_text and lookup_minimal_symbol_linkage (or
Andrew> _data), and just pass in a comparison function to check for either
Andrew> text or data symbols.

Tom> Sounds good, I'll try that.  There is already a separate debug iterator,
Tom> so maybe that can be used as well.

I looked at this, but there isn't really that much to share.

IMO callbacks tend to obfuscate the code.  So I think I'd rather not do
that, if that's ok with you.

Maybe introducing an iterator over minimal symbols, given a hash, would
simplify things a bit.  But, that's not really much of a savings.

Tom
Andrew Burgess Sept. 19, 2019, 6:40 p.m. | #6
* Tom Tromey <tromey@adacore.com> [2019-09-19 12:27:58 -0600]:

> >>>>> "Tom" == Tom Tromey <tom@tromey.com> writes:

> 

> Andrew> I guess I'm thinking that we could merge the core of

> Andrew> lookup_minimal_symbol_text and lookup_minimal_symbol_linkage (or

> Andrew> _data), and just pass in a comparison function to check for either

> Andrew> text or data symbols.

> 

> Tom> Sounds good, I'll try that.  There is already a separate debug iterator,

> Tom> so maybe that can be used as well.

> 

> I looked at this, but there isn't really that much to share.

> 

> IMO callbacks tend to obfuscate the code.  So I think I'd rather not do

> that, if that's ok with you.


That's fine it was only an idea.

Thanks,
Andrew

Patch

diff --git a/gdb/ada-lang.c b/gdb/ada-lang.c
index 7a5cc4272c6..4e1ee31887a 100644
--- a/gdb/ada-lang.c
+++ b/gdb/ada-lang.c
@@ -4734,6 +4734,15 @@  lesseq_defined_than (struct symbol *sym0, struct symbol *sym1)
     case LOC_CONST:
       return SYMBOL_VALUE (sym0) == SYMBOL_VALUE (sym1)
         && equiv_types (SYMBOL_TYPE (sym0), SYMBOL_TYPE (sym1));
+
+    case LOC_STATIC:
+      {
+        const char *name0 = SYMBOL_LINKAGE_NAME (sym0);
+        const char *name1 = SYMBOL_LINKAGE_NAME (sym1);
+        return (strcmp (name0, name1) == 0
+                && SYMBOL_VALUE_ADDRESS (sym0) == SYMBOL_VALUE_ADDRESS (sym1));
+      }
+
     default:
       return 0;
     }
diff --git a/gdb/dwarf2read.c b/gdb/dwarf2read.c
index d3b9d64f342..90dfde0f1e9 100644
--- a/gdb/dwarf2read.c
+++ b/gdb/dwarf2read.c
@@ -2130,8 +2130,10 @@  attr_value_as_address (struct attribute *attr)
 /* See declaration.  */
 
 dwarf2_per_objfile::dwarf2_per_objfile (struct objfile *objfile_,
-					const dwarf2_debug_sections *names)
-  : objfile (objfile_)
+					const dwarf2_debug_sections *names,
+					bool can_copy_)
+  : objfile (objfile_),
+    can_copy (can_copy_)
 {
   if (names == NULL)
     names = &dwarf2_elf_names;
@@ -2206,11 +2208,14 @@  private:
 /* Try to locate the sections we need for DWARF 2 debugging
    information and return true if we have enough to do something.
    NAMES points to the dwarf2 section names, or is NULL if the standard
-   ELF names are used.  */
+   ELF names are used.  CAN_COPY is true for formats where symbol
+   interposition is possible and so symbol values must follow copy
+   relocation rules.  */
 
 int
 dwarf2_has_info (struct objfile *objfile,
-                 const struct dwarf2_debug_sections *names)
+                 const struct dwarf2_debug_sections *names,
+		 bool can_copy)
 {
   if (objfile->flags & OBJF_READNEVER)
     return 0;
@@ -2220,7 +2225,8 @@  dwarf2_has_info (struct objfile *objfile,
 
   if (dwarf2_per_objfile == NULL)
     dwarf2_per_objfile = dwarf2_objfile_data_key.emplace (objfile, objfile,
-							  names);
+							  names,
+							  can_copy);
 
   return (!dwarf2_per_objfile->info.is_virtual
 	  && dwarf2_per_objfile->info.s.section != NULL
@@ -21646,19 +21652,21 @@  new_symbol (struct die_info *die, struct type *type, struct dwarf2_cu *cu,
 		}
 	      else if (attr2 && (DW_UNSND (attr2) != 0))
 		{
-		  /* Workaround gfortran PR debug/40040 - it uses
-		     DW_AT_location for variables in -fPIC libraries which may
-		     get overriden by other libraries/executable and get
-		     a different address.  Resolve it by the minimal symbol
-		     which may come from inferior's executable using copy
-		     relocation.  Make this workaround only for gfortran as for
-		     other compilers GDB cannot guess the minimal symbol
-		     Fortran mangling kind.  */
-		  if (cu->language == language_fortran && die->parent
-		      && die->parent->tag == DW_TAG_module
-		      && cu->producer
-		      && startswith (cu->producer, "GNU Fortran"))
-		    SYMBOL_ACLASS_INDEX (sym) = LOC_UNRESOLVED;
+		  if (SYMBOL_CLASS (sym) == LOC_STATIC
+		      && (objfile->flags & OBJF_MAINLINE) == 0
+		      && dwarf2_per_objfile->can_copy)
+		    {
+		      /* A global static variable might be subject to
+			 copy relocation.  We first check for a local
+			 minsym, though, because maybe the symbol was
+			 marked hidden, in which case this would not
+			 apply.  */
+		      minimal_symbol *found
+			= (lookup_minimal_symbol_linkage
+			   (SYMBOL_LINKAGE_NAME (sym), objfile));
+		      if (found != nullptr)
+			sym->maybe_copied = 1;
+		    }
 
 		  /* A variable with DW_AT_external is never static,
 		     but it may be block-scoped.  */
diff --git a/gdb/dwarf2read.h b/gdb/dwarf2read.h
index 8939f97af53..94cfc652e3e 100644
--- a/gdb/dwarf2read.h
+++ b/gdb/dwarf2read.h
@@ -104,9 +104,12 @@  struct dwarf2_per_objfile
 {
   /* Construct a dwarf2_per_objfile for OBJFILE.  NAMES points to the
      dwarf2 section names, or is NULL if the standard ELF names are
-     used.  */
+     used.  CAN_COPY is true for formats where symbol
+     interposition is possible and so symbol values must follow copy
+     relocation rules.  */
   dwarf2_per_objfile (struct objfile *objfile,
-		      const dwarf2_debug_sections *names);
+		      const dwarf2_debug_sections *names,
+		      bool can_copy);
 
   ~dwarf2_per_objfile ();
 
@@ -206,6 +209,9 @@  public:
      original data was compressed using 'dwz -m'.  */
   std::unique_ptr<struct dwz_file> dwz_file;
 
+  /* Whether copy relocations are supported by this object format.  */
+  bool can_copy;
+
   /* A flag indicating whether this objfile has a section loaded at a
      VMA of 0.  */
   bool has_section_at_zero = false;
diff --git a/gdb/elfread.c b/gdb/elfread.c
index 630550b80dc..b2fade4124b 100644
--- a/gdb/elfread.c
+++ b/gdb/elfread.c
@@ -203,10 +203,16 @@  record_minimal_symbol (minimal_symbol_reader &reader,
       || ms_type == mst_text_gnu_ifunc)
     address = gdbarch_addr_bits_remove (gdbarch, address);
 
-  return reader.record_full (name, name_len, copy_name, address,
-			     ms_type,
-			     gdb_bfd_section_index (objfile->obfd,
-						    bfd_section));
+  struct minimal_symbol *result
+    = reader.record_full (name, name_len, copy_name, address,
+			  ms_type,
+			  gdb_bfd_section_index (objfile->obfd,
+						 bfd_section));
+  if ((objfile->flags & OBJF_MAINLINE) == 0
+      && (ms_type == mst_data || ms_type == mst_bss))
+    result->maybe_copied = 1;
+
+  return result;
 }
 
 /* Read the symbol table of an ELF file.
@@ -1239,7 +1245,7 @@  elf_symfile_read (struct objfile *objfile, symfile_add_flags symfile_flags)
 				bfd_section_size (abfd, str_sect));
     }
 
-  if (dwarf2_has_info (objfile, NULL))
+  if (dwarf2_has_info (objfile, NULL, true))
     {
       dw_index_kind index_kind;
 
diff --git a/gdb/minsyms.c b/gdb/minsyms.c
index 0f734ebc761..6f4afa979f9 100644
--- a/gdb/minsyms.c
+++ b/gdb/minsyms.c
@@ -519,6 +519,26 @@  iterate_over_minimal_symbols
 
 /* See minsyms.h.  */
 
+struct minimal_symbol *
+lookup_minimal_symbol_linkage (const char *name, struct objfile *objf)
+{
+  unsigned int hash = msymbol_hash (name) % MINIMAL_SYMBOL_HASH_SIZE;
+
+  for (minimal_symbol *msymbol = objf->per_bfd->msymbol_hash[hash];
+       msymbol != NULL;
+       msymbol = msymbol->hash_next)
+    {
+      if (strcmp (MSYMBOL_LINKAGE_NAME (msymbol), name) == 0
+	  && (MSYMBOL_TYPE (msymbol) == mst_data
+	      || MSYMBOL_TYPE (msymbol) == mst_bss))
+	return msymbol;
+    }
+
+  return nullptr;
+}
+
+/* See minsyms.h.  */
+
 struct bound_minimal_symbol
 lookup_minimal_symbol_text (const char *name, struct objfile *objf)
 {
diff --git a/gdb/minsyms.h b/gdb/minsyms.h
index bb43165620d..fae6cd25496 100644
--- a/gdb/minsyms.h
+++ b/gdb/minsyms.h
@@ -193,6 +193,13 @@  struct bound_minimal_symbol lookup_minimal_symbol (const char *,
 
 struct bound_minimal_symbol lookup_bound_minimal_symbol (const char *);
 
+/* Look through the minimal symbols in OBJF for a global (not
+   file-local) minsym whose linkage name is NAME.  Returns a minimal
+   symbol that matches, or nullptr otherwise.  */
+
+struct minimal_symbol *lookup_minimal_symbol_linkage
+  (const char *name, struct objfile *objf);
+
 /* Look through all the current minimal symbol tables and find the
    first minimal symbol that matches NAME and has text type.  If OBJF
    is non-NULL, limit the search to that objfile.  Returns a bound
diff --git a/gdb/symfile.h b/gdb/symfile.h
index 741b085e0c4..170308f10d5 100644
--- a/gdb/symfile.h
+++ b/gdb/symfile.h
@@ -585,7 +585,8 @@  struct dwarf2_debug_sections {
 };
 
 extern int dwarf2_has_info (struct objfile *,
-                            const struct dwarf2_debug_sections *);
+                            const struct dwarf2_debug_sections *,
+			    bool = false);
 
 /* Dwarf2 sections that can be accessed by dwarf2_get_section_info.  */
 enum dwarf2_section_enum {
diff --git a/gdb/symmisc.c b/gdb/symmisc.c
index 7666de390cd..db93d716704 100644
--- a/gdb/symmisc.c
+++ b/gdb/symmisc.c
@@ -236,9 +236,13 @@  dump_msymbols (struct objfile *objfile, struct ui_file *outfile)
 	  break;
 	}
       fprintf_filtered (outfile, "[%2d] %c ", index, ms_type);
-      fputs_filtered (paddress (gdbarch, MSYMBOL_VALUE_ADDRESS (objfile,
-								msymbol)),
-		      outfile);
+
+      /* Use the relocated address as shown in the symbol here -- do
+	 not try to respect copy relocations.  */
+      CORE_ADDR addr = (msymbol->value.address
+			+ ANOFFSET (objfile->section_offsets,
+				    msymbol->section));
+      fputs_filtered (paddress (gdbarch, addr), outfile);
       fprintf_filtered (outfile, " %s", MSYMBOL_LINKAGE_NAME (msymbol));
       if (section)
 	{
diff --git a/gdb/symtab.c b/gdb/symtab.c
index 87a0c8e4da6..0ff212e0d97 100644
--- a/gdb/symtab.c
+++ b/gdb/symtab.c
@@ -6068,6 +6068,50 @@  symbol_set_symtab (struct symbol *symbol, struct symtab *symtab)
   symbol->owner.symtab = symtab;
 }
 
+/* See symtab.h.  */
+
+CORE_ADDR
+get_symbol_address (const struct symbol *sym)
+{
+  gdb_assert (sym->maybe_copied);
+  gdb_assert (SYMBOL_CLASS (sym) == LOC_STATIC);
+
+  const char *linkage_name = SYMBOL_LINKAGE_NAME (sym);
+
+  for (objfile *objfile : current_program_space->objfiles ())
+    {
+      minimal_symbol *minsym
+	= lookup_minimal_symbol_linkage (linkage_name, objfile);
+      if (minsym != nullptr)
+	return MSYMBOL_VALUE_ADDRESS (objfile, minsym);
+    }
+  return sym->ginfo.value.address;
+}
+
+/* See symtab.h.  */
+
+CORE_ADDR
+get_msymbol_address (struct objfile *objf, const struct minimal_symbol *minsym)
+{
+  gdb_assert (minsym->maybe_copied);
+  gdb_assert ((objf->flags & OBJF_MAINLINE) == 0);
+
+  const char *linkage_name = MSYMBOL_LINKAGE_NAME (minsym);
+
+  for (objfile *objfile : current_program_space->objfiles ())
+    {
+      if ((objfile->flags & OBJF_MAINLINE) != 0)
+	{
+	  minimal_symbol *found
+	    = lookup_minimal_symbol_linkage (linkage_name, objfile);
+	  if (found != nullptr)
+	    return MSYMBOL_VALUE_ADDRESS (objfile, found);
+	}
+    }
+  return (minsym->value.address
+	  + ANOFFSET (objf->section_offsets, minsym->section));
+}
+
 
 
 void
diff --git a/gdb/symtab.h b/gdb/symtab.h
index 9f1791ba63c..dd5a1299b9b 100644
--- a/gdb/symtab.h
+++ b/gdb/symtab.h
@@ -454,6 +454,14 @@  extern const char *symbol_get_demangled_name
 
 extern CORE_ADDR symbol_overlayed_address (CORE_ADDR, struct obj_section *);
 
+/* Return the address of SYM.  The MAYBE_COPIED flag must be set on
+   SYM.  If SYM appears in the main program's minimal symbols, then
+   that minsym's address is returned; otherwise, SYM's address is
+   returned.  This should generally only be used via the
+   SYMBOL_VALUE_ADDRESS macro.  */
+
+extern CORE_ADDR get_symbol_address (const struct symbol *sym);
+
 /* Note that all the following SYMBOL_* macros are used with the
    SYMBOL argument being either a partial symbol or
    a full symbol.  Both types have a ginfo field.  In particular
@@ -463,7 +471,9 @@  extern CORE_ADDR symbol_overlayed_address (CORE_ADDR, struct obj_section *);
    field only, instead of the SYMBOL parameter.  */
 
 #define SYMBOL_VALUE(symbol)		(symbol)->ginfo.value.ivalue
-#define SYMBOL_VALUE_ADDRESS(symbol)	((symbol)->ginfo.value.address + 0)
+#define SYMBOL_VALUE_ADDRESS(symbol)			      \
+  (((symbol)->maybe_copied) ? get_symbol_address (symbol)     \
+   : ((symbol)->ginfo.value.address))
 #define SET_SYMBOL_VALUE_ADDRESS(symbol, new_value)	\
   ((symbol)->ginfo.value.address = (new_value))
 #define SYMBOL_VALUE_BYTES(symbol)	(symbol)->ginfo.value.bytes
@@ -671,6 +681,14 @@  struct minimal_symbol : public general_symbol_info
      the object file format may not carry that piece of information.  */
   unsigned int has_size : 1;
 
+  /* For data symbols only, if this is set, then the symbol might be
+     subject to copy relocation.  In this case, a minimal symbol
+     matching the symbol's linkage name is first looked for in the
+     main objfile.  If found, then that address is used; otherwise the
+     address in this symbol is used.  */
+
+  unsigned maybe_copied : 1;
+
   /* Minimal symbols with the same hash key are kept on a linked
      list.  This is the link.  */
 
@@ -690,6 +708,15 @@  struct minimal_symbol : public general_symbol_info
   bool text_p () const;
 };
 
+/* Return the address of MINSYM, which comes from OBJF.  The
+   MAYBE_COPIED flag must be set on MINSYM.  If MINSYM appears in the
+   main program's minimal symbols, then that minsym's address is
+   returned; otherwise, MINSYM's address is returned.  This should
+   generally only be used via the MSYMBOL_VALUE_ADDRESS macro.  */
+
+extern CORE_ADDR get_msymbol_address (struct objfile *objf,
+				      const struct minimal_symbol *minsym);
+
 #define MSYMBOL_TARGET_FLAG_1(msymbol)  (msymbol)->target_flag_1
 #define MSYMBOL_TARGET_FLAG_2(msymbol)  (msymbol)->target_flag_2
 #define MSYMBOL_SIZE(msymbol)		((msymbol)->size + 0)
@@ -708,8 +735,9 @@  struct minimal_symbol : public general_symbol_info
 /* The relocated address of the minimal symbol, using the section
    offsets from OBJFILE.  */
 #define MSYMBOL_VALUE_ADDRESS(objfile, symbol)				\
-  ((symbol)->value.address					\
-   + ANOFFSET ((objfile)->section_offsets, ((symbol)->section)))
+  (((symbol)->maybe_copied) ? get_msymbol_address (objfile, symbol)	\
+   : ((symbol)->value.address						\
+      + ANOFFSET ((objfile)->section_offsets, ((symbol)->section))))
 /* For a bound minsym, we can easily compute the address directly.  */
 #define BMSYMBOL_VALUE_ADDRESS(symbol) \
   MSYMBOL_VALUE_ADDRESS ((symbol).objfile, (symbol).minsym)
@@ -1112,6 +1140,14 @@  struct symbol
   /* Whether this is an inlined function (class LOC_BLOCK only).  */
   unsigned is_inlined : 1;
 
+  /* For LOC_STATIC only, if this is set, then the symbol might be
+     subject to copy relocation.  In this case, a minimal symbol
+     matching the symbol's linkage name is first looked for in the
+     main objfile.  If found, then that address is used; otherwise the
+     address in this symbol is used.  */
+
+  unsigned maybe_copied : 1;
+
   /* The concrete type of this symbol.  */
 
   ENUM_BITFIELD (symbol_subclass_kind) subclass : 2;