[11/16] libctf: do not corrupt strings across ctf_serialize

Message ID 20210306004023.164154-12-nick.alcock@oracle.com
State New
Headers show
Series
  • libctf: mostly cleanups and refactoring
Related show

Commit Message

Nick Clifton via Binutils March 6, 2021, 12:40 a.m.
The preceding change revealed a new bug: the string table is sorted for
better compression, so repeated serialization with type (or member)
additions in the middle can move strings around.  But every
serialization flushes the set of refs (the memory locations that are
automatically updated with a final string offset when the strtab is
updated), so if we are not to have string offsets go stale, we must do
all ref additions within the serialization code (which walks the
complete set of types and symbols anyway). Unfortunately, we were adding
one ref in another place: the type name in the dynamic type definitions,
which has a ref added to it by ctf_add_generic.

So adding a type, serializing (via, say, one of the ctf_write
functions), adding another type with a name that sorts earlier, and
serializing again will corrupt the name of the first type because it no
longer had a ref pointing to its dtd entry's name when its string offset
was shifted later in the strtab to mae way for the other type.

To ensure that we don't miss strings, we also maintain a set of *pending
refs* that will be added later (during serialization), and remove
entries from that set when the ref is finally added.  We always use
ctf_str_add_pending outside ctf-serialize.c, ensure that ctf_serialize
adds all strtab offsets as refs (even those in the dtds) on every
serialization, and mandate that no refs are live on entry to
ctf_serialize and that all pending refs are gone before strtab
finalization.  (Of necessity ctf_serialize has to traverse all strtab
offsets in the dtds in order to serialize them, so adding them as refs
at the same time is easy.)

(Note that we still can't erase unused atoms when we roll back, though
we can erase unused refs: members and enums are still not removed by
rollbacks and might reference strings added after the snapshot.)

libctf/ChangeLog
2021-03-03  Nick Alcock  <nick.alcock@oracle.com>

	* ctf-hash.c (ctf_dynset_elements): New.
	* ctf-impl.h (ctf_dynset_elements): Declare it.
	(ctf_str_add_pending): Likewise.
	(ctf_dict_t) <ctf_str_pending_ref>: New, set of refs that must be
	added during serialization.
	* ctf-string.c (ctf_str_create_atoms): Initialize it.
	(CTF_STR_ADD_REF): New flag.
	(CTF_STR_MAKE_PROVISIONAL): Likewise.
	(CTF_STR_PENDING_REF): Likewise.
	(ctf_str_add_ref_internal): Take a flags word rather than int
	params.  Populate, and clear out, ctf_str_pending_ref.
	(ctf_str_add): Adjust accordingly.
	(ctf_str_add_external): Likewise.
	(ctf_str_add_pending): New.
	(ctf_str_remove_ref): Also remove the potential ref if it is a
	pending ref.
	* ctf-serialize.c (ctf_serialize): Prohibit addition of strings
	with ctf_str_add_ref before serialization.  Ensure that the
	ctf_str_pending_ref set is empty before strtab finalization.
	(ctf_emit_type_sect): Add a ref to the ctt_name.
	* ctf-create.c (ctf_add_generic): Add the ctt_name as a pending
	ref.
	* testsuite/libctf-writable/reserialize-strtab-corruption.*: New test.
---
 libctf/ctf-create.c                           |  3 +-
 libctf/ctf-hash.c                             |  6 ++
 libctf/ctf-impl.h                             |  5 +-
 libctf/ctf-serialize.c                        | 24 ++++-
 libctf/ctf-string.c                           | 61 +++++++++++--
 .../reserialize-strtab-corruption.c           | 91 +++++++++++++++++++
 .../reserialize-strtab-corruption.lk          |  5 +
 7 files changed, 183 insertions(+), 12 deletions(-)
 create mode 100644 libctf/testsuite/libctf-writable/reserialize-strtab-corruption.c
 create mode 100644 libctf/testsuite/libctf-writable/reserialize-strtab-corruption.lk

-- 
2.30.0.252.gc27e85e57d

Patch

diff --git a/libctf/ctf-create.c b/libctf/ctf-create.c
index 6acc2428cd7..ea2c1481b6c 100644
--- a/libctf/ctf-create.c
+++ b/libctf/ctf-create.c
@@ -439,7 +439,8 @@  ctf_add_generic (ctf_dict_t *fp, uint32_t flag, const char *name, int kind,
   type = ++fp->ctf_typemax;
   type = LCTF_INDEX_TO_TYPE (fp, type, (fp->ctf_flags & LCTF_CHILD));
 
-  dtd->dtd_data.ctt_name = ctf_str_add_ref (fp, name, &dtd->dtd_data.ctt_name);
+  dtd->dtd_data.ctt_name = ctf_str_add_pending (fp, name,
+						&dtd->dtd_data.ctt_name);
   dtd->dtd_type = type;
 
   if (dtd->dtd_data.ctt_name == 0 && name != NULL && name[0] != '\0')
diff --git a/libctf/ctf-hash.c b/libctf/ctf-hash.c
index 62f3dde3465..426bd625ffb 100644
--- a/libctf/ctf-hash.c
+++ b/libctf/ctf-hash.c
@@ -672,6 +672,12 @@  ctf_dynset_lookup (ctf_dynset_t *hp, const void *key)
   return NULL;
 }
 
+size_t
+ctf_dynset_elements (ctf_dynset_t *hp)
+{
+  return htab_elements ((struct htab *) hp);
+}
+
 /* TRUE/FALSE return.  */
 int
 ctf_dynset_exists (ctf_dynset_t *hp, const void *key, const void **orig_key)
diff --git a/libctf/ctf-impl.h b/libctf/ctf-impl.h
index 7a4e418ce2d..a319d7f74a2 100644
--- a/libctf/ctf-impl.h
+++ b/libctf/ctf-impl.h
@@ -398,7 +398,8 @@  struct ctf_dict
   ctf_names_t ctf_names;	    /* Hash table of remaining type names.  */
   ctf_lookup_t ctf_lookups[5];	    /* Pointers to nametabs for name lookup.  */
   ctf_strs_t ctf_str[2];	    /* Array of string table base and bounds.  */
-  ctf_dynhash_t *ctf_str_atoms;	  /* Hash table of ctf_str_atoms_t.  */
+  ctf_dynhash_t *ctf_str_atoms;	    /* Hash table of ctf_str_atoms_t.  */
+  ctf_dynset_t *ctf_str_pending_ref; /* Locations awaiting ref addition.  */
   uint64_t ctf_str_num_refs;	  /* Number of refs to cts_str_atoms.  */
   uint32_t ctf_str_prov_offset;	  /* Latest provisional offset assigned so far.  */
   unsigned char *ctf_base;	  /* CTF file pointer.  */
@@ -673,6 +674,7 @@  extern int ctf_dynset_insert (ctf_dynset_t *, void *);
 extern void ctf_dynset_remove (ctf_dynset_t *, const void *);
 extern void ctf_dynset_destroy (ctf_dynset_t *);
 extern void *ctf_dynset_lookup (ctf_dynset_t *, const void *);
+extern size_t ctf_dynset_elements (ctf_dynset_t *);
 extern int ctf_dynset_exists (ctf_dynset_t *, const void *key,
 			      const void **orig_key);
 extern int ctf_dynset_next (ctf_dynset_t *, ctf_next_t **, void **key);
@@ -731,6 +733,7 @@  extern int ctf_str_create_atoms (ctf_dict_t *);
 extern void ctf_str_free_atoms (ctf_dict_t *);
 extern uint32_t ctf_str_add (ctf_dict_t *, const char *);
 extern uint32_t ctf_str_add_ref (ctf_dict_t *, const char *, uint32_t *ref);
+extern uint32_t ctf_str_add_pending (ctf_dict_t *, const char *, uint32_t *);
 extern int ctf_str_add_external (ctf_dict_t *, const char *, uint32_t offset);
 extern void ctf_str_remove_ref (ctf_dict_t *, const char *, uint32_t *ref);
 extern void ctf_str_rollback (ctf_dict_t *, ctf_snapshot_id_t);
diff --git a/libctf/ctf-serialize.c b/libctf/ctf-serialize.c
index 17f11f67ffb..0eff0e197fa 100644
--- a/libctf/ctf-serialize.c
+++ b/libctf/ctf-serialize.c
@@ -870,7 +870,10 @@  ctf_emit_type_sect (ctf_dict_t *fp, unsigned char **tptr)
       copied = (ctf_stype_t *) t;  /* name is at the start: constant offset.  */
       if (copied->ctt_name
 	  && (name = ctf_strraw (fp, copied->ctt_name)) != NULL)
-	ctf_str_add_ref (fp, name, &copied->ctt_name);
+	{
+	  ctf_str_add_ref (fp, name, &copied->ctt_name);
+	  ctf_str_add_ref (fp, name, &dtd->dtd_data.ctt_name);
+	}
       t += len;
 
       switch (kind)
@@ -955,6 +958,7 @@  ctf_serialize (ctf_dict_t *fp)
   ctf_varent_t *dvarents;
   ctf_strs_writable_t strtab;
   int err;
+  int num_missed_str_refs;
 
   unsigned char *t;
   unsigned long i;
@@ -973,6 +977,16 @@  ctf_serialize (ctf_dict_t *fp)
   if (!(fp->ctf_flags & LCTF_DIRTY))
     return 0;
 
+  /* The strtab refs table must be empty at this stage.  Any refs already added
+     will be corrupted by any modifications, including reserialization, after
+     strtab finalization is complete.  Only this function, and functions it
+     calls, may add refs, and all memory locations (including in the dtds)
+     containing strtab offsets must be traversed as part of serialization, and
+     refs added.  */
+
+  if (!ctf_assert (fp, fp->ctf_str_num_refs == 0))
+    return -1;					/* errno is set for us.  */
+
   /* Fill in an initial CTF header.  We will leave the label, object,
      and function sections empty and only output a header, type section,
      and string table.  The type section begins at a 4-byte aligned
@@ -1052,6 +1066,12 @@  ctf_serialize (ctf_dict_t *fp)
 
   assert (t == (unsigned char *) buf + sizeof (ctf_header_t) + hdr.cth_stroff);
 
+  /* Every string added outside serialization by ctf_str_add_pending should
+     now have been added by ctf_add_ref.  */
+  num_missed_str_refs = ctf_dynset_elements (fp->ctf_str_pending_ref);
+  if (!ctf_assert (fp, num_missed_str_refs == 0))
+    goto err;					/* errno is set for us.  */
+
   /* Construct the final string table and fill out all the string refs with the
      final offsets.  Then purge the refs list, because we're about to move this
      strtab onto the end of the buf, invalidating all the offsets.  */
@@ -1154,8 +1174,10 @@  ctf_serialize (ctf_dict_t *fp)
   ctf_str_free_atoms (nfp);
   nfp->ctf_str_atoms = fp->ctf_str_atoms;
   nfp->ctf_prov_strtab = fp->ctf_prov_strtab;
+  nfp->ctf_str_pending_ref = fp->ctf_str_pending_ref;
   fp->ctf_str_atoms = NULL;
   fp->ctf_prov_strtab = NULL;
+  fp->ctf_str_pending_ref = NULL;
   memset (&fp->ctf_dtdefs, 0, sizeof (ctf_list_t));
   memset (&fp->ctf_errs_warnings, 0, sizeof (ctf_list_t));
   fp->ctf_add_processing = NULL;
diff --git a/libctf/ctf-string.c b/libctf/ctf-string.c
index 91ad2e36db7..9f0e5400141 100644
--- a/libctf/ctf-string.c
+++ b/libctf/ctf-string.c
@@ -103,7 +103,7 @@  ctf_str_create_atoms (ctf_dict_t *fp)
 {
   fp->ctf_str_atoms = ctf_dynhash_create (ctf_hash_string, ctf_hash_eq_string,
 					  free, ctf_str_free_atom);
-  if (fp->ctf_str_atoms == NULL)
+  if (!fp->ctf_str_atoms)
     return -ENOMEM;
 
   if (!fp->ctf_prov_strtab)
@@ -113,6 +113,13 @@  ctf_str_create_atoms (ctf_dict_t *fp)
   if (!fp->ctf_prov_strtab)
     goto oom_prov_strtab;
 
+  if (!fp->ctf_str_pending_ref)
+    fp->ctf_str_pending_ref = ctf_dynset_create (htab_hash_pointer,
+						 htab_eq_pointer,
+						 NULL);
+  if (!fp->ctf_str_pending_ref)
+    goto oom_str_pending_ref;
+
   errno = 0;
   ctf_str_add (fp, "");
   if (errno == ENOMEM)
@@ -123,6 +130,9 @@  ctf_str_create_atoms (ctf_dict_t *fp)
  oom_str_add:
   ctf_dynhash_destroy (fp->ctf_prov_strtab);
   fp->ctf_prov_strtab = NULL;
+ oom_str_pending_ref:
+  ctf_dynset_destroy (fp->ctf_str_pending_ref);
+  fp->ctf_str_pending_ref = NULL;
  oom_prov_strtab:
   ctf_dynhash_destroy (fp->ctf_str_atoms);
   fp->ctf_str_atoms = NULL;
@@ -135,8 +145,13 @@  ctf_str_free_atoms (ctf_dict_t *fp)
 {
   ctf_dynhash_destroy (fp->ctf_prov_strtab);
   ctf_dynhash_destroy (fp->ctf_str_atoms);
+  ctf_dynset_destroy (fp->ctf_str_pending_ref);
 }
 
+#define CTF_STR_ADD_REF 0x1
+#define CTF_STR_MAKE_PROVISIONAL 0x2
+#define CTF_STR_PENDING_REF 0x4
+
 /* Add a string to the atoms table, copying the passed-in string.  Return the
    atom added. Return NULL only when out of memory (and do not touch the
    passed-in string in that case).  Possibly augment the ref list with the
@@ -144,7 +159,7 @@  ctf_str_free_atoms (ctf_dict_t *fp)
    provisional strtab.   */
 static ctf_str_atom_t *
 ctf_str_add_ref_internal (ctf_dict_t *fp, const char *str,
-			  int add_ref, int make_provisional, uint32_t *ref)
+			  int flags, uint32_t *ref)
 {
   char *newstr = NULL;
   ctf_str_atom_t *atom = NULL;
@@ -152,7 +167,7 @@  ctf_str_add_ref_internal (ctf_dict_t *fp, const char *str,
 
   atom = ctf_dynhash_lookup (fp->ctf_str_atoms, str);
 
-  if (add_ref)
+  if (flags & CTF_STR_ADD_REF)
     {
       if ((aref = malloc (sizeof (struct ctf_str_atom_ref))) == NULL)
 	return NULL;
@@ -161,8 +176,9 @@  ctf_str_add_ref_internal (ctf_dict_t *fp, const char *str,
 
   if (atom)
     {
-      if (add_ref)
+      if (flags & CTF_STR_ADD_REF)
 	{
+	  ctf_dynset_remove (fp->ctf_str_pending_ref, (void *) ref);
 	  ctf_list_append (&atom->csa_refs, aref);
 	  fp->ctf_str_num_refs++;
 	}
@@ -182,7 +198,7 @@  ctf_str_add_ref_internal (ctf_dict_t *fp, const char *str,
   atom->csa_str = newstr;
   atom->csa_snapshot_id = fp->ctf_snapshots;
 
-  if (make_provisional)
+  if (flags & CTF_STR_MAKE_PROVISIONAL)
     {
       atom->csa_offset = fp->ctf_str_prov_offset;
 
@@ -193,8 +209,14 @@  ctf_str_add_ref_internal (ctf_dict_t *fp, const char *str,
       fp->ctf_str_prov_offset += strlen (atom->csa_str) + 1;
     }
 
-  if (add_ref)
+  if (flags & CTF_STR_PENDING_REF)
+    {
+      if (ctf_dynset_insert (fp->ctf_str_pending_ref, (void *) ref) < 0)
+	goto oom;
+    }
+  else if (flags & CTF_STR_ADD_REF)
     {
+      ctf_dynset_remove (fp->ctf_str_pending_ref, (void *) ref);
       ctf_list_append (&atom->csa_refs, aref);
       fp->ctf_str_num_refs++;
     }
@@ -222,7 +244,7 @@  ctf_str_add (ctf_dict_t *fp, const char *str)
   if (!str)
     str = "";
 
-  atom = ctf_str_add_ref_internal (fp, str, FALSE, TRUE, 0);
+  atom = ctf_str_add_ref_internal (fp, str, CTF_STR_MAKE_PROVISIONAL, 0);
   if (!atom)
     return 0;
 
@@ -240,7 +262,26 @@  ctf_str_add_ref (ctf_dict_t *fp, const char *str, uint32_t *ref)
   if (!str)
     str = "";
 
-  atom = ctf_str_add_ref_internal (fp, str, TRUE, TRUE, ref);
+  atom = ctf_str_add_ref_internal (fp, str, CTF_STR_ADD_REF
+				   | CTF_STR_MAKE_PROVISIONAL, ref);
+  if (!atom)
+    return 0;
+
+  return atom->csa_offset;
+}
+
+/* Like ctf_str_add_ref(), but notes that this memory location must be added as
+   a ref by a later serialization phase, rather than adding it itself.  */
+uint32_t
+ctf_str_add_pending (ctf_dict_t *fp, const char *str, uint32_t *ref)
+{
+  ctf_str_atom_t *atom;
+
+  if (!str)
+    str = "";
+
+  atom = ctf_str_add_ref_internal (fp, str, CTF_STR_PENDING_REF
+				   | CTF_STR_MAKE_PROVISIONAL, ref);
   if (!atom)
     return 0;
 
@@ -257,7 +298,7 @@  ctf_str_add_external (ctf_dict_t *fp, const char *str, uint32_t offset)
   if (!str)
     str = "";
 
-  atom = ctf_str_add_ref_internal (fp, str, FALSE, FALSE, 0);
+  atom = ctf_str_add_ref_internal (fp, str, 0, 0);
   if (!atom)
     return 0;
 
@@ -307,6 +348,8 @@  ctf_str_remove_ref (ctf_dict_t *fp, const char *str, uint32_t *ref)
 	  free (aref);
 	}
     }
+
+  ctf_dynset_remove (fp->ctf_str_pending_ref, (void *) ref);
 }
 
 /* A ctf_dynhash_iter_remove() callback that removes atoms later than a given
diff --git a/libctf/testsuite/libctf-writable/reserialize-strtab-corruption.c b/libctf/testsuite/libctf-writable/reserialize-strtab-corruption.c
new file mode 100644
index 00000000000..1593325da77
--- /dev/null
+++ b/libctf/testsuite/libctf-writable/reserialize-strtab-corruption.c
@@ -0,0 +1,91 @@ 
+/* Make sure serializing a dict (possibly repeatedly) does not corrupt either
+   type lookup or the string content of the dict.  */
+
+#include <ctf-api.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int
+main (int argc, char *argv[])
+{
+  ctf_dict_t *fp;
+  ctf_id_t zygal, autoschediastic;
+  ctf_snapshot_id_t snap;
+  unsigned char *foo;
+  size_t foo_size;
+  const char *bar;
+  int err;
+  char name[64];
+
+  /* Adding things after serialization should not corrupt names created before
+     serialization.  */
+
+  if ((fp = ctf_create (&err)) == NULL)
+    goto create_err;
+
+  if ((zygal = ctf_add_struct (fp, CTF_ADD_ROOT, "zygal")) == CTF_ERR)
+    goto add_err;
+
+  if ((foo = ctf_write_mem (fp, &foo_size, 4096)) == NULL)
+    goto write_err;
+  free (foo);
+
+  if (ctf_type_name (fp, zygal, name, sizeof (name)) == NULL)
+    fprintf (stderr, "Can't get name of zygal: %s\n", ctf_errmsg (ctf_errno (fp)));
+  else
+    printf ("zygal's name is %s\n", name);
+
+  if ((autoschediastic = ctf_add_enum (fp, CTF_ADD_ROOT, "autoschediastic")) == CTF_ERR)
+    goto add_err;
+
+  if (ctf_type_name (fp, zygal, name, sizeof (name)) == NULL)
+    fprintf (stderr, "Can't get name of zygal: %s\n", ctf_errmsg (ctf_errno (fp)));
+  else
+    printf ("zygal's name is %s\n", name);
+
+  /* Serializing again should not corrupt names either.  */
+  if ((foo = ctf_write_mem (fp, &foo_size, 4096)) == NULL)
+    goto write_err;
+  free (foo);
+
+  if (ctf_type_name (fp, zygal, name, sizeof (name)) == NULL)
+    fprintf (stderr, "Can't get name of zygal: %s\n", ctf_errmsg (ctf_errno (fp)));
+  else
+    printf ("zygal's name is %s\n", name);
+
+  /* Add another new name, roll back, and make sure the strings are
+     uncorrupted.  */
+
+  snap = ctf_snapshot (fp);
+  if (ctf_add_enumerator (fp, autoschediastic, "aichmophobia", 0) < 0)
+    goto add_err;
+
+  if (ctf_rollback (fp, snap) < 0)
+    goto roll_err;
+
+  if (ctf_type_name (fp, zygal, name, sizeof (name)) == NULL)
+    fprintf (stderr, "Can't get name of zygal: %s\n", ctf_errmsg (ctf_errno (fp)));
+  else
+    printf ("zygal's name is %s after first rollback\n", name);
+
+  if (ctf_type_name (fp, autoschediastic, name, sizeof (name)) == NULL)
+    fprintf (stderr, "Can't get name of autoschediastic: %s\n", ctf_errmsg (ctf_errno (fp)));
+  else
+    printf ("autoschediastic's name is %s after first rollback\n", name);
+
+  ctf_dict_close (fp);
+  return 0;
+
+ create_err:
+  fprintf (stderr, "Cannot create: %s\n", ctf_errmsg (err));
+  return 1;
+ add_err:
+  fprintf (stderr, "Cannot add: %s\n", ctf_errmsg (ctf_errno (fp)));
+  return 1;
+ write_err:
+  fprintf (stderr, "Cannot serialize: %s\n", ctf_errmsg (ctf_errno (fp)));
+  return 1;
+ roll_err:
+  fprintf (stderr, "Cannot roll back: %s\n", ctf_errmsg (ctf_errno (fp)));
+  return 1;
+}
diff --git a/libctf/testsuite/libctf-writable/reserialize-strtab-corruption.lk b/libctf/testsuite/libctf-writable/reserialize-strtab-corruption.lk
new file mode 100644
index 00000000000..58f7a64a331
--- /dev/null
+++ b/libctf/testsuite/libctf-writable/reserialize-strtab-corruption.lk
@@ -0,0 +1,5 @@ 
+zygal's name is struct zygal
+zygal's name is struct zygal
+zygal's name is struct zygal
+zygal's name is struct zygal after first rollback
+autoschediastic's name is enum autoschediastic after first rollback