[04/10] libctf, include: remove the nondeduplicating CTF linker

Message ID 20210227132954.7766-5-nick.alcock@oracle.com
State New
Headers show
Series
  • libctf: cleanups, speedups, and bugfixes (one needing review)
Related show

Commit Message

H.J. Lu via Binutils Feb. 27, 2021, 1:29 p.m.
The nondeduplicating CTF linker was kept around when the deduplicating
one was added so that people had something to fall back to in case the
deduplicating linker turned out to be buggy.  It's now much more stable
than the nondeduplicating linker, in addition to much faster, using much
less memory and producing much better output.  In addition, while
libctf has a linker flag to invoke the nondeduplicating linker, ld does
not expose it: the only way to turn it on within ld is an intentionally-
undocumented environment variable.  So we can remove it without any ABI
or user-visibility concerns (the only thing we leave around is the
CTF_LINK_NONDEDUP flag, which can easily be interpreted as "deduplicate
less", though right now it does nothing).

This lets us remove a lot of complexity associated with tracking
filenames and CU names separately (something the deduplcating linker
never bothered with, since the cunames are always reliable and ld never
hands us useful filenames anyway)

The biggest lacuna left behind is the ctf_type_mapping machinery, which
slows down deduplicating links quite a lot.  We can't just ditch it
because ctf_add_type uses it: removing the slowdown from the
deduplicating linker is a job for another commit.

include/ChangeLog
2021-02-23  Nick Alcock  <nick.alcock@oracle.com>

	* ctf-api.h (CTF_LINK_SHARE_DUPLICATED): Note that this might
	merely change how much deduplication is done.

libctf/ChangeLog
2021-02-23  Nick Alcock  <nick.alcock@oracle.com>

	* ctf-link.c (ctf_create_per_cu): Drop FILENAME now that it is
	always identical to CUNAME.
	(ctf_link_deduplicating_one_symtypetab): Adjust.
	(ctf_link_one_type): Remove.
	(ctf_link_one_input_archive_member): Likewise.
	(ctf_link_close_one_input_archive): Likewise.
	(ctf_link_one_input_archive): Likewise.
	(ctf_link): No longer call it.  Drop CTF_LINK_NONDEDUP path.
	Improve header comment a bit (dicts, not files).  Adjust
	ctf_create_per_cu call.
	(ctf_link_deduplicating_variables): Simplify.
	(ctf_link_in_member_cb_arg_t) <cu_name>: Remove.
	<in_input_cu_file>: Likewise.
	<in_fp_parent>: Likewise.
	<done_parent>: Likewise.
	(ctf_link_one_variable): Turn uses of in_file_name to in_cuname.
---
 include/ChangeLog |   5 +
 include/ctf-api.h |   3 +-
 libctf/ChangeLog  |  19 ++++
 libctf/ctf-link.c | 250 ++++------------------------------------------
 4 files changed, 46 insertions(+), 231 deletions(-)

-- 
2.30.0.252.gc27e85e57d

Patch

diff --git a/include/ChangeLog b/include/ChangeLog
index 616b923b53b..5f081bb3876 100644
--- a/include/ChangeLog
+++ b/include/ChangeLog
@@ -1,3 +1,8 @@ 
+2021-02-23  Nick Alcock  <nick.alcock@oracle.com>
+
+	* ctf-api.h (CTF_LINK_SHARE_DUPLICATED): Note that this might
+	merely change how much deduplication is done.
+
 2021-02-21  Alan Modra  <amodra@gmail.com>
 
 	* bfdlink.h (struct bfd_link_info): Add warn_multiple_definition.
diff --git a/include/ctf-api.h b/include/ctf-api.h
index ce764dff5e3..25dbe9ec533 100644
--- a/include/ctf-api.h
+++ b/include/ctf-api.h
@@ -94,7 +94,8 @@  typedef struct ctf_link_sym
 /* Share only types that are used by multiple inputs.  */
 #define CTF_LINK_SHARE_DUPLICATED 0x1
 
-/* Do a nondeduplicating link.  */
+/* Do a nondeduplicating link, or otherwise deduplicate "less hard", trading off
+   CTF output size for link time.  */
 #define CTF_LINK_NONDEDUP 0x2
 
 /* Create empty outputs for all registered CU mappings even if no types are
diff --git a/libctf/ChangeLog b/libctf/ChangeLog
index 0fc8e3caafd..0f84eea9533 100644
--- a/libctf/ChangeLog
+++ b/libctf/ChangeLog
@@ -1,3 +1,22 @@ 
+2021-02-23  Nick Alcock  <nick.alcock@oracle.com>
+
+	* ctf-link.c (ctf_create_per_cu): Drop FILENAME now that it is
+	always identical to CUNAME.
+	(ctf_link_deduplicating_one_symtypetab): Adjust.
+	(ctf_link_one_type): Remove.
+	(ctf_link_one_input_archive_member): Likewise.
+	(ctf_link_close_one_input_archive): Likewise.
+	(ctf_link_one_input_archive): Likewise.
+	(ctf_link): No longer call it.  Drop CTF_LINK_NONDEDUP path.
+	Improve header comment a bit (dicts, not files).  Adjust
+	ctf_create_per_cu call.
+	(ctf_link_deduplicating_variables): Simplify.
+	(ctf_link_in_member_cb_arg_t) <cu_name>: Remove.
+	<in_input_cu_file>: Likewise.
+	<in_fp_parent>: Likewise.
+	<done_parent>: Likewise.
+	(ctf_link_one_variable): Turn uses of in_file_name to in_cuname.
+
 2021-02-18  Nick Alcock  <nick.alcock@oracle.com>
 
 	* ctf-types.c (ctf_member_iter): Move 'rc' to an inner scope.
diff --git a/libctf/ctf-link.c b/libctf/ctf-link.c
index 7c342166b5c..5d813dbf8b3 100644
--- a/libctf/ctf-link.c
+++ b/libctf/ctf-link.c
@@ -280,29 +280,24 @@  ctf_link_add_ctf (ctf_dict_t *fp, ctf_archive_t *ctf, const char *name)
    interning it if need be.  */
 
 static ctf_dict_t *
-ctf_create_per_cu (ctf_dict_t *fp, const char *filename, const char *cuname)
+ctf_create_per_cu (ctf_dict_t *fp, const char *cu_name)
 {
   ctf_dict_t *cu_fp;
   const char *ctf_name = NULL;
   char *dynname = NULL;
 
   /* First, check the mapping table and translate the per-CU name we use
-     accordingly.  We check both the input filename and the CU name.  Only if
-     neither are set do we fall back to the input filename as the per-CU
-     dictionary name.  We prefer the filename because this is easier for likely
-     callers to determine.  */
+     accordingly.  */
 
   if (fp->ctf_link_in_cu_mapping)
     {
-      if (((ctf_name = ctf_dynhash_lookup (fp->ctf_link_in_cu_mapping,
-					   filename)) == NULL) &&
-	  ((ctf_name = ctf_dynhash_lookup (fp->ctf_link_in_cu_mapping,
-					   cuname)) == NULL))
-	ctf_name = filename;
+      if ((ctf_name = ctf_dynhash_lookup (fp->ctf_link_in_cu_mapping,
+					  cu_name)) == NULL)
+	ctf_name = cu_name;
     }
 
   if (ctf_name == NULL)
-    ctf_name = filename;
+    ctf_name = cu_name;
 
   if ((cu_fp = ctf_dynhash_lookup (fp->ctf_link_outputs, ctf_name)) == NULL)
     {
@@ -311,8 +306,7 @@  ctf_create_per_cu (ctf_dict_t *fp, const char *filename, const char *cuname)
       if ((cu_fp = ctf_create (&err)) == NULL)
 	{
 	  ctf_err_warn (fp, 0, err, _("cannot create per-CU CTF archive for "
-				      "CU %s from input file %s"),
-			cuname, filename);
+				      "input CU %s"), cu_name);
 	  ctf_set_errno (fp, err);
 	  return NULL;
 	}
@@ -323,7 +317,7 @@  ctf_create_per_cu (ctf_dict_t *fp, const char *filename, const char *cuname)
 	goto oom;
 
       ctf_import_unref (cu_fp, fp);
-      ctf_cuname_set (cu_fp, cuname);
+      ctf_cuname_set (cu_fp, cu_name);
       ctf_parent_name_set (cu_fp, _CTF_SECTION);
     }
   return cu_fp;
@@ -440,93 +434,16 @@  typedef struct ctf_link_in_member_cb_arg
   /* The shared output dictionary.  */
   ctf_dict_t *out_fp;
 
-  /* The filename of the input file, and an fp to each dictionary in that file
+  /* The cuname of the input file, and an fp to each dictionary in that file
      in turn.  */
-  const char *in_file_name;
+  const char *in_cuname;
   ctf_dict_t *in_fp;
 
-  /* The CU name of the dict being processed.  */
-  const char *cu_name;
-  int in_input_cu_file;
-
-  /* The parent dictionary in the input, and whether it's been processed yet.
-     Not needed by ctf_link_one_type / ctf_link_one_variable, only by higher
-     layers.  */
-  ctf_dict_t *in_fp_parent;
-  int done_parent;
-
   /* If true, this is the CU-mapped portion of a deduplicating link: no child
      dictionaries should be created.  */
   int cu_mapped;
 } ctf_link_in_member_cb_arg_t;
 
-/* Link one type into the link.  We rely on ctf_add_type() to detect
-   duplicates.  This is not terribly reliable yet (unnmamed types will be
-   mindlessly duplicated), but will improve shortly.  */
-
-static int
-ctf_link_one_type (ctf_id_t type, int isroot _libctf_unused_, void *arg_)
-{
-  ctf_link_in_member_cb_arg_t *arg = (ctf_link_in_member_cb_arg_t *) arg_;
-  ctf_dict_t *per_cu_out_fp;
-  int err;
-
-  if (arg->in_fp->ctf_link_flags != CTF_LINK_SHARE_UNCONFLICTED)
-    {
-      ctf_err_warn (arg->out_fp, 0, ECTF_NOTYET,
-		    _("share-duplicated mode not yet implemented"));
-      return ctf_set_errno (arg->out_fp, ECTF_NOTYET);
-    }
-
-  /* Simply call ctf_add_type: if it reports a conflict and we're adding to the
-     main CTF file, add to the per-CU archive member instead, creating it if
-     necessary.  If we got this type from a per-CU archive member, add it
-     straight back to the corresponding member in the output.  */
-
-  if (!arg->in_input_cu_file)
-    {
-      if (ctf_add_type (arg->out_fp, arg->in_fp, type) != CTF_ERR)
-	return 0;
-
-      err = ctf_errno (arg->out_fp);
-      if (err != ECTF_CONFLICT)
-	{
-	  if (err != ECTF_NONREPRESENTABLE)
-	    ctf_err_warn (arg->out_fp, 1, 0,
-			  _("cannot link type %lx from input file %s, CU %s "
-			    "into output link"), type, arg->cu_name,
-			  arg->in_file_name);
-	  /* We must ignore this problem or we end up losing future types, then
-	     trying to link the variables in, then exploding.  Better to link as
-	     much as possible.  */
-	  return 0;
-	}
-      ctf_set_errno (arg->out_fp, 0);
-    }
-
-  if ((per_cu_out_fp = ctf_create_per_cu (arg->out_fp, arg->in_file_name,
-					  arg->cu_name)) == NULL)
-    return -1;					/* Errno is set for us.  */
-
-  if (ctf_add_type (per_cu_out_fp, arg->in_fp, type) != CTF_ERR)
-    return 0;
-
-  err = ctf_errno (per_cu_out_fp);
-  if (err != ECTF_NONREPRESENTABLE)
-    ctf_err_warn (arg->out_fp, 1, 0,
-		  _("cannot link type %lx from input file %s, CU %s "
-		    "into output per-CU CTF archive member %s: %s: skipped"),
-		  type, ctf_link_input_name (arg->in_fp), arg->in_file_name,
-		  ctf_link_input_name (per_cu_out_fp), ctf_errmsg (err));
-  if (err == ECTF_CONFLICT)
-      /* Conflicts are possible at this stage only if a non-ld user has combined
-	 multiple TUs into a single output dictionary.  Even in this case we do not
-	 want to stop the link or propagate the error.  */
-      ctf_set_errno (arg->out_fp, 0);
-
-  return 0;					/* As above: do not lose types.  */
-}
-
 /* Set a function which is used to filter out unwanted variables from the link.  */
 int
 ctf_link_set_variable_filter (ctf_dict_t *fp, ctf_link_variable_filter_f *filter,
@@ -615,13 +532,12 @@  ctf_link_one_variable (const char *name, ctf_id_t type, void *arg_)
   if (arg->cu_mapped)
     {
       ctf_dprintf ("Variable %s in input file %s depends on a type %lx hidden "
-		   "due to conflicts: skipped.\n", name, arg->in_file_name,
+		   "due to conflicts: skipped.\n", name, arg->in_cuname,
 		   type);
       return 0;
     }
 
-  if ((per_cu_out_fp = ctf_create_per_cu (arg->out_fp, arg->in_file_name,
-					  arg->cu_name)) == NULL)
+  if ((per_cu_out_fp = ctf_create_per_cu (arg->out_fp, arg->in_cuname)) == NULL)
     return -1;					/* Errno is set for us.  */
 
   /* If the type was not found, check for it in the child too.  */
@@ -635,7 +551,7 @@  ctf_link_one_variable (const char *name, ctf_id_t type, void *arg_)
 	  ctf_err_warn (arg->out_fp, 1, 0,
 			_("type %lx for variable %s in input file %s "
 			  "not found: skipped"), type, name,
-			arg->in_file_name);
+			arg->in_cuname);
 	  /* Do not terminate the link: just skip the variable.  */
 	  return 0;
 	}
@@ -647,55 +563,6 @@  ctf_link_one_variable (const char *name, ctf_id_t type, void *arg_)
   return 0;
 }
 
-/* Merge every type (and optionally, variable) in this archive member into the
-   link, so we can relink things that have already had ld run on them.  We use
-   the archive member name, sans any leading '.ctf.', as the CU name for
-   ambiguous types if there is one and it's not the default: otherwise, we use
-   the name of the input file.  */
-static int
-ctf_link_one_input_archive_member (ctf_dict_t *in_fp, const char *name, void *arg_)
-{
-  ctf_link_in_member_cb_arg_t *arg = (ctf_link_in_member_cb_arg_t *) arg_;
-  int err = 0;
-
-  if (strcmp (name, _CTF_SECTION) == 0)
-    {
-      /* This file is the default member of this archive, and has already been
-	 explicitly processed.
-
-	 In the default sharing mode of CTF_LINK_SHARE_UNCONFLICTED, it does no
-	 harm to rescan an existing shared repo again: all the types will just
-	 end up in the same place.  But in CTF_LINK_SHARE_DUPLICATED mode, this
-	 causes the system to erroneously conclude that all types are duplicated
-	 and should be shared, even if they are not.  */
-
-      if (arg->done_parent)
-	return 0;
-    }
-  else
-    {
-      /* Get ambiguous types from our parent.  */
-      ctf_import (in_fp, arg->in_fp_parent);
-      arg->in_input_cu_file = 1;
-    }
-
-  arg->cu_name = name;
-  if (strncmp (arg->cu_name, ".ctf.", strlen (".ctf.")) == 0)
-    arg->cu_name += strlen (".ctf.");
-  arg->in_fp = in_fp;
-
-  if ((err = ctf_type_iter_all (in_fp, ctf_link_one_type, arg)) > -1)
-    if (!(in_fp->ctf_link_flags & CTF_LINK_OMIT_VARIABLES_SECTION))
-      err = ctf_variable_iter (in_fp, ctf_link_one_variable, arg);
-
-  arg->in_input_cu_file = 0;
-
-  if (err < 0)
-      return -1;				/* Errno is set for us.  */
-
-  return 0;
-}
-
 /* Dump the unnecessary link type mapping after one input file is processed.  */
 static void
 empty_link_type_mapping (void *key _libctf_unused_, void *value,
@@ -753,73 +620,6 @@  ctf_link_lazy_open (ctf_dict_t *fp, ctf_link_input_t *input)
   return (ssize_t) count;
 }
 
-/* Close an input, as a ctf_dynhash_iter iterator.  */
-static void
-ctf_link_close_one_input_archive (void *key _libctf_unused_, void *value,
-				  void *arg _libctf_unused_)
-{
-  ctf_link_input_t *input = (ctf_link_input_t *) value;
-  if (input->clin_arc)
-    ctf_arc_close (input->clin_arc);
-  input->clin_arc = NULL;
-}
-
-/* Link one input file's types into the output file.  */
-static void
-ctf_link_one_input_archive (void *key, void *value, void *arg_)
-{
-  const char *file_name = (const char *) key;
-  ctf_link_input_t *input = (ctf_link_input_t *)value;
-  ctf_link_in_member_cb_arg_t *arg = (ctf_link_in_member_cb_arg_t *) arg_;
-  int err = 0;
-
-  if (!input->clin_arc)
-    {
-      err = ctf_link_lazy_open (arg->out_fp, input);
-      if (err == 0)				/* Just no CTF.  */
-	return;
-
-      if (err < 0)
-	return;					/* errno is set for us.  */
-    }
-
-  arg->in_file_name = file_name;
-  arg->done_parent = 0;
-  if ((arg->in_fp_parent = ctf_dict_open (input->clin_arc,
-					  NULL, &err)) == NULL)
-    if (err != ECTF_ARNNAME)
-      {
-	ctf_err_warn (arg->out_fp, 1, 0,
-		      _("cannot open main archive member in input file %s "
-			"in the link: skipping: %s"), arg->in_file_name,
-		      ctf_errmsg (err));
-	goto out;
-      }
-
-  if (ctf_link_one_input_archive_member (arg->in_fp_parent,
-					 _CTF_SECTION, arg) < 0)
-    {
-      ctf_dict_close (arg->in_fp_parent);
-      goto out;
-    }
-  arg->done_parent = 1;
-  if (ctf_archive_iter (input->clin_arc, ctf_link_one_input_archive_member,
-			arg) < 0)
-    ctf_err_warn (arg->out_fp, 0, 0, _("cannot traverse archive in input file "
-				       "%s: link cannot continue"),
-		  arg->in_file_name);
-  else
-    {
-      /* The only error indication to the caller is the errno: so ensure that it
-	 is zero if there was no actual error from the caller.  */
-      ctf_set_errno (arg->out_fp, 0);
-    }
-  ctf_dict_close (arg->in_fp_parent);
-
- out:
-  ctf_link_close_one_input_archive (key, value, NULL);
-}
-
 typedef struct link_sort_inputs_cb_arg
 {
   int is_cu_mapped;
@@ -1130,21 +930,16 @@  ctf_link_deduplicating_variables (ctf_dict_t *fp, ctf_dict_t **inputs,
 
   arg.cu_mapped = cu_mapped;
   arg.out_fp = fp;
-  arg.in_input_cu_file = 0;
 
   for (i = 0; i < ninputs; i++)
     {
       arg.in_fp = inputs[i];
       if (ctf_cuname (inputs[i]) != NULL)
-	arg.in_file_name = ctf_cuname (inputs[i]);
+	arg.in_cuname = ctf_cuname (inputs[i]);
       else
-	arg.in_file_name = "unnamed-CU";
-      arg.cu_name = arg.in_file_name;
+	arg.in_cuname = "unnamed-CU";
       if (ctf_variable_iter (arg.in_fp, ctf_link_one_variable, &arg) < 0)
 	return ctf_set_errno (fp, ctf_errno (arg.in_fp));
-
-      /* Outputs > 0 are per-CU.  */
-      arg.in_input_cu_file = 1;
     }
   return 0;
 }
@@ -1237,8 +1032,7 @@  ctf_link_deduplicating_one_symtypetab (ctf_dict_t *fp, ctf_dict_t *input,
 	  continue;
 	}
 
-      if ((per_cu_out_fp = ctf_create_per_cu (fp, in_file_name,
-					      in_file_name)) == NULL)
+      if ((per_cu_out_fp = ctf_create_per_cu (fp, in_file_name)) == NULL)
 	return -1;				/* errno is set for us.  */
 
       /* If the type was not found, check for it in the child too.  */
@@ -1653,8 +1447,8 @@  ctf_link_deduplicating (ctf_dict_t *fp)
   goto err;
 }
 
-/* Merge types and variable sections in all files added to the link
-   together.  All the added files are closed.  */
+/* Merge types and variable sections in all dicts added to the link together.
+   All the added dicts are closed.  */
 int
 ctf_link (ctf_dict_t *fp, int flags)
 {
@@ -1691,7 +1485,7 @@  ctf_link (ctf_dict_t *fp, int flags)
 				      NULL)) == 0)
 	{
 	  const char *to = (const char *) v;
-	  if (ctf_create_per_cu (fp, to, to) == NULL)
+	  if (ctf_create_per_cu (fp, to) == NULL)
 	    {
 	      fp->ctf_flags &= ~LCTF_LINKING;
 	      ctf_next_destroy (i);
@@ -1707,11 +1501,7 @@  ctf_link (ctf_dict_t *fp, int flags)
 	}
     }
 
-  if ((flags & CTF_LINK_NONDEDUP) || (getenv ("LD_NO_CTF_DEDUP")))
-    ctf_dynhash_iter (fp->ctf_link_inputs, ctf_link_one_input_archive,
-		      &arg);
-  else
-    ctf_link_deduplicating (fp);
+  ctf_link_deduplicating (fp);
 
   /* Discard the now-unnecessary mapping table data from all the outputs.  */
   if (fp->ctf_link_type_mapping)