read_leb128 overflow checking

Message ID 20210217063925.GA5511@bubble.grove.modra.org
State New
Headers show
Series
  • read_leb128 overflow checking
Related show

Commit Message

Nick Clifton via Binutils Feb. 17, 2021, 6:39 a.m.
There is a tiny error left in dwarf.c:read_leb128 after Nick fixed the
signed overflow problem in code I wrote.  It's to do with sleb128
values that have unnecessary excess bytes.  For example, -1 is
represented as 0x7f, the most efficient encoding, but also as
0xff,0x7f or 0xff,0xff,0x7f and so on.  None of these sequences
overflow any size signed integer, but read_leb128 will report an
overflow given enough excess bytes.  This patch fixes that problem,
and since the proper test for signed values with excess bytes can
easily be adapted to also test a sleb byte with just some bits that
overflow the result, I changed the code to not use signed right
shifts.  (The C standard ISO/IEC 9899:1999 6.5.7 says signed right
shifts of negative values have an implementation defined value.  A
long time ago I even used a C compiler for a certain microprocessor
that always did unsigned right shifts.  Mind you, it is very unlikely
to be compiling binutils with such a compiler.)

bfd/
	* wasm-module.c: Guard include of limits.h.
	(CHAR_BIT): Provide backup define.
	(wasm_read_leb128): Use CHAR_BIT to size "result" in bits.
	Correct signed overflow checking.
opcodes/
	* wasm32-dis.c: Include limits.h.
	(CHAR_BIT): Provide backup define.
	(wasm_read_leb128): Use CHAR_BIT to size "result" in bits.
	Correct signed overflow checking.
binutils/
	* dwarf.c: Include limits.h.
	(CHAR_BIT): Provide backup define.
	(read_leb128): Use CHAR_BIT to size "result" in bits.  Correct
	signed overflow checking.
	* testsuite/binutils-all/pr26548.s,
	* testsuite/binutils-all/pr26548.d,
	* testsuite/binutils-all/pr26548e.d: New tests.
	* testsuite/binutils-all/readelf.exp: Run them.
	(readelf_test): Drop unused "xfails" parameter.  Update all uses.


-- 
Alan Modra
Australia Development Lab, IBM

Patch

diff --git a/bfd/wasm-module.c b/bfd/wasm-module.c
index dc135645f4..a8f65479af 100644
--- a/bfd/wasm-module.c
+++ b/bfd/wasm-module.c
@@ -28,11 +28,17 @@ 
 #include "sysdep.h"
 #include "alloca-conf.h"
 #include "bfd.h"
-#include <limits.h>
 #include "libiberty.h"
 #include "libbfd.h"
 #include "wasm-module.h"
 
+#ifdef HAVE_LIMITS_H
+#include <limits.h>
+#endif
+#ifndef CHAR_BIT
+#define CHAR_BIT 8
+#endif
+
 typedef struct
 {
   asymbol *      symbols;
@@ -111,27 +117,34 @@  wasm_read_leb128 (bfd *		  abfd,
   unsigned int num_read = 0;
   unsigned int shift = 0;
   unsigned char byte = 0;
+  unsigned char lost, mask;
   int status = 1;
 
   while (bfd_bread (&byte, 1, abfd) == 1)
     {
       num_read++;
 
-      if (shift < sizeof (result) * 8)
+      if (shift < CHAR_BIT * sizeof (result))
 	{
 	  result |= ((bfd_vma) (byte & 0x7f)) << shift;
-	  if ((result >> shift) != (byte & 0x7f))
-	    /* Overflow.  */
-	    status |= 2;
+	  /* These bits overflowed.  */
+	  lost = byte ^ (result >> shift);
+	  /* And this is the mask of possible overflow bits.  */
+	  mask = 0x7f ^ ((bfd_vma) 0x7f << shift >> shift);
 	  shift += 7;
 	}
-      else if ((byte & 0x7f) != 0)
+      else
+	{
+	  lost = byte;
+	  mask = 0x7f;
+	}
+      if ((lost & mask) != (sign && (bfd_signed_vma) result < 0 ? mask : 0))
 	status |= 2;
 
       if ((byte & 0x80) == 0)
 	{
 	  status &= ~1;
-	  if (sign && (shift < 8 * sizeof (result)) && (byte & 0x40))
+	  if (sign && shift < CHAR_BIT * sizeof (result) && (byte & 0x40))
 	    result |= -((bfd_vma) 1 << shift);
 	  break;
 	}
diff --git a/binutils/dwarf.c b/binutils/dwarf.c
index a69d110665..ce2602b6cb 100644
--- a/binutils/dwarf.c
+++ b/binutils/dwarf.c
@@ -36,6 +36,13 @@ 
 #include <elfutils/debuginfod.h>
 #endif
 
+#ifdef HAVE_LIMITS_H
+#include <limits.h>
+#endif
+#ifndef CHAR_BIT
+#define CHAR_BIT 8
+#endif
+
 #undef MAX
 #undef MIN
 #define MAX(a, b) ((a) > (b) ? (a) : (b))
@@ -326,7 +333,7 @@  dwarf_vmatoa64 (dwarf_vma hvalue, dwarf_vma lvalue, char *buf,
 /* Read in a LEB128 encoded value starting at address DATA.
    If SIGN is true, return a signed LEB128 value.
    If LENGTH_RETURN is not NULL, return in it the number of bytes read.
-   If STATUS_RETURN in not NULL, return with bit 0 (LSB) set if the
+   If STATUS_RETURN is not NULL, return with bit 0 (LSB) set if the
    terminating byte was not found and with bit 1 set if the value
    overflows a dwarf_vma.
    No bytes will be read at address END or beyond.  */
@@ -346,37 +353,31 @@  read_leb128 (unsigned char *data,
   while (data < end)
     {
       unsigned char byte = *data++;
-      bfd_boolean cont = (byte & 0x80) ? TRUE : FALSE;
+      unsigned char lost, mask;
 
-      byte &= 0x7f;
       num_read++;
 
-      if (shift < sizeof (result) * 8)
+      if (shift < CHAR_BIT * sizeof (result))
 	{
-	  result |= ((dwarf_vma) byte) << shift;
-	  if (sign)
-	    {
-	      if ((((dwarf_signed_vma) result >> shift) & 0x7f) != byte)
-		/* Overflow.  */
-		status |= 2;
-	    }
-	  else if ((result >> shift) != byte)
-	    {
-	      /* Overflow.  */
-	      status |= 2;
-	    }
-
+	  result |= ((dwarf_vma) (byte & 0x7f)) << shift;
+	  /* These bits overflowed.  */
+	  lost = byte ^ (result >> shift);
+	  /* And this is the mask of possible overflow bits.  */
+	  mask = 0x7f ^ ((dwarf_vma) 0x7f << shift >> shift);
 	  shift += 7;
 	}
-      else if (byte != 0)
+      else
 	{
-	  status |= 2;
+	  lost = byte;
+	  mask = 0x7f;
 	}
+      if ((lost & mask) != (sign && (dwarf_signed_vma) result < 0 ? mask : 0))
+	status |= 2;
 
-      if (!cont)
+      if ((byte & 0x80) == 0)
 	{
 	  status &= ~1;
-	  if (sign && (shift < 8 * sizeof (result)) && (byte & 0x40))
+	  if (sign && shift < CHAR_BIT * sizeof (result) && (byte & 0x40))
 	    result |= -((dwarf_vma) 1 << shift);
 	  break;
 	}
diff --git a/binutils/testsuite/binutils-all/pr26548.d b/binutils/testsuite/binutils-all/pr26548.d
new file mode 100644
index 0000000000..8f6e3a5f5a
--- /dev/null
+++ b/binutils/testsuite/binutils-all/pr26548.d
@@ -0,0 +1,13 @@ 
+#source: pr26548.s
+#as:
+#readelf: -Wwi
+
+#...
+.*: Abbrev Number: 1 \(DW_TAG_variable\)
+.*   DW_AT_const_value : \(sdata\) 9223372036854775807
+.*: Abbrev Number: 1 \(DW_TAG_variable\)
+.*   DW_AT_const_value : \(sdata\) -9223372036854775808
+.*: Abbrev Number: 1 \(DW_TAG_variable\)
+.*   DW_AT_const_value : \(sdata\) -1
+.*: Abbrev Number: 1 \(DW_TAG_variable\)
+.*   DW_AT_const_value : \(sdata\) 1
diff --git a/binutils/testsuite/binutils-all/pr26548.s b/binutils/testsuite/binutils-all/pr26548.s
new file mode 100644
index 0000000000..73b729a88e
--- /dev/null
+++ b/binutils/testsuite/binutils-all/pr26548.s
@@ -0,0 +1,40 @@ 
+	.section .debug_abbrev
+1:
+	.uleb128 1
+# DW_TAG_variable
+	.uleb128 0x34
+# no children
+	.byte 0
+# DW_AT_const_value
+	.uleb128 0x1c
+# DW_FORM_sdata
+	.uleb128 0x0d
+	.byte 0,0
+	.byte 0
+
+	.section .debug_info
+	.4byte 9f-0f
+0:
+	.2byte 4
+	.4byte 1b
+	.byte 8
+ .ifndef ERROR
+	.uleb128 1
+	.sleb128  0x7fffffffffffffff
+	.uleb128 1
+	.sleb128 -0x8000000000000000
+	.uleb128 1
+# silly excess byte encoding of -1, no warning
+	.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f
+	.uleb128 1
+# silly excess byte encoding of 1, no warning
+	.byte 0x81,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0
+ .else
+	.uleb128 1
+# encode +0x8000000000000000, readelf warning
+	.byte 0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x01
+	.uleb128 1
+# encode -0x8000000000000001, readelf warning
+	.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x3e
+ .endif
+9:
diff --git a/binutils/testsuite/binutils-all/pr26548e.d b/binutils/testsuite/binutils-all/pr26548e.d
new file mode 100644
index 0000000000..3d5e64daa2
--- /dev/null
+++ b/binutils/testsuite/binutils-all/pr26548e.d
@@ -0,0 +1,11 @@ 
+#source: pr26548.s
+#as: --defsym ERROR=1
+#readelf: -Wwi
+
+#...
+.*: Abbrev Number: 1 \(DW_TAG_variable\)
+.*   DW_AT_const_value : \(sdata\).*LEB value.*
+ -9223372036854775808
+.*: Abbrev Number: 1 \(DW_TAG_variable\)
+.*   DW_AT_const_value : \(sdata\).*LEB value.*
+ 9223372036854775807
diff --git a/binutils/testsuite/binutils-all/readelf.exp b/binutils/testsuite/binutils-all/readelf.exp
index 7116408570..c283cc6bed 100644
--- a/binutils/testsuite/binutils-all/readelf.exp
+++ b/binutils/testsuite/binutils-all/readelf.exp
@@ -76,7 +76,7 @@  proc readelf_find_size { binary_file test_iteration } {
 # Readelf's output is captured and then compared against the contents
 # of the regexp_file-readelf_size if it exists, else regexp_file.
 
-proc readelf_test { options binary_file regexp_file xfails } {
+proc readelf_test { options binary_file regexp_file } {
 
     global READELF
     global READELFFLAGS
@@ -89,10 +89,6 @@  proc readelf_test { options binary_file regexp_file xfails } {
     send_log "exec $READELF $READELFFLAGS $options $binary_file > readelf.out\n"
     set got [remote_exec host "$READELF $READELFFLAGS $options $binary_file" "" "/dev/null" "readelf.out"]
 
-    foreach xfail $xfails {
-	setup_xfail $xfail
-    }
-
     if { [lindex $got 0] != 0 || ![string match "" [lindex $got 1]] } then {
 	fail "$testname (reason: unexpected output)"
 	send_log $got
@@ -342,11 +338,11 @@  if {![binutils_assemble $srcdir/$subdir/bintest.s tmpdir/bintest.o]} then {
     readelf_find_size $tempfile 1
 
     # Run the tests.
-    readelf_test -h $tempfile readelf.h  {}
-    readelf_test -S $tempfile readelf.s  {}
+    readelf_test -h $tempfile readelf.h
+    readelf_test -S $tempfile readelf.s
     setup_xfail "mips-*-*irix*"
-    readelf_test -s $tempfile readelf.ss {}
-    readelf_test -r $tempfile readelf.r  {}
+    readelf_test -s $tempfile readelf.ss
+    readelf_test -r $tempfile readelf.r
 }
 
 # Test demangling symbol names.
@@ -361,7 +357,7 @@  if {![binutils_assemble $srcdir/$subdir/mangled.s tmpdir/mangled.o]} then {
     }
 
     # Run the test.
-    readelf_test {--syms --demangle --wide} $tempfile readelf.demangled {}
+    readelf_test {--syms --demangle --wide} $tempfile readelf.demangled
 }
 
 readelf_wi_test
@@ -376,6 +372,19 @@  if {[which $AS] != 0} then {
     run_dump_test "retain1b"
     run_dump_test "readelf-maskos-1a"
     run_dump_test "readelf-maskos-1b"
+    if {$readelf_size == 64 && ![istarget *-*-hpux*]} then {
+	run_dump_test pr26548
+	if {![binutils_assemble_flags $srcdir/$subdir/pr26548.s tmpdir/pr26548e.o {--defsym ERROR=1}]} then {
+	    unsupported "pr26548e (failed to assemble)"
+	} else {
+	    if ![is_remote host] {
+		set tempfile tmpdir/pr26548e.o
+	    } else {
+		set tempfile [remote_download host tmpdir/pr26548e.o]
+	    }
+	    readelf_test -Wwi $tempfile pr26548e.d
+	}
+    }
 }
 
 # PR 13482 - Check for off-by-one errors when dumping .note sections.
@@ -389,7 +398,7 @@  if {![binutils_assemble $srcdir/$subdir/version.s tmpdir/version.o]} then {
 	set tempfile [remote_download host tmpdir/version.o]
     }
 
-    readelf_test -n $tempfile readelf.n  {}
+    readelf_test -n $tempfile readelf.n
 }
 
 
@@ -405,7 +414,7 @@  if {![binutils_assemble $srcdir/$subdir/pr18374.s tmpdir/pr18374.o]} then {
 	set tempfile [remote_download host tmpdir/pr18374.o]
     }
 
-    readelf_test --debug-dump=loc $tempfile readelf.pr18374  {}
+    readelf_test --debug-dump=loc $tempfile readelf.pr18374
 }
 
 
@@ -420,7 +429,7 @@  if {![binutils_assemble $srcdir/$subdir/locview-1.s tmpdir/locview-1.o]} then {
 	set tempfile [remote_download host tmpdir/locview-1.o]
     }
 
-    readelf_test --debug-dump=loc $tempfile readelf.locview-1  {}
+    readelf_test --debug-dump=loc $tempfile readelf.locview-1
 }
 if {![binutils_assemble $srcdir/$subdir/locview-2.s tmpdir/locview-2.o]} then {
     unsupported "readelf --debug-dump=loc locview-2 (failed to assemble)"
@@ -432,7 +441,7 @@  if {![binutils_assemble $srcdir/$subdir/locview-2.s tmpdir/locview-2.o]} then {
 	set tempfile [remote_download host tmpdir/locview-2.o]
     }
 
-    readelf_test --debug-dump=loc $tempfile readelf.locview-2  {}
+    readelf_test --debug-dump=loc $tempfile readelf.locview-2
 }
 
 
@@ -447,7 +456,7 @@  if {![binutils_assemble $srcdir/$subdir/z.s tmpdir/z.o]} then {
 	set tempfile [remote_download host tmpdir/z.o]
     }
 
-    readelf_test {--decompress --hex-dump .debug_loc} $tempfile readelf.z  {}
+    readelf_test {--decompress --hex-dump .debug_loc} $tempfile readelf.z
 }
 
 # Skip the next test for the RISCV architectures because they
@@ -475,7 +484,7 @@  if ![istarget "riscv*-*-*"] then {
 	readelf_find_size $tempfile 2
 
 	# Make sure that readelf can decode the contents.
-	readelf_test -wiaoRlL $tempfile dw5.W {}
+	readelf_test -wiaoRlL $tempfile dw5.W
     }
 }
 
@@ -494,7 +503,7 @@  if {![binutils_assemble_flags $srcdir/$subdir/dwarf-attributes.S tmpdir/dwarf-at
     readelf_find_size $tempfile 3
 
     # Make sure that readelf can decode the contents.
-    readelf_test -wi $tempfile dwarf-attributes.W {}
+    readelf_test -wi $tempfile dwarf-attributes.W
 }
 
 # Check that debug link sections can be dumped.
@@ -507,7 +516,7 @@  if {![binutils_assemble $srcdir/$subdir/debuglink.s tmpdir/debuglink.o]} then {
 	set tempfile [remote_download host tmpdir/debuglink.o]
     }
 
-    readelf_test {--debug-dump=links -wN} $tempfile readelf.k  {}
+    readelf_test {--debug-dump=links -wN} $tempfile readelf.k
 
     # Check that debug link sections can be followed.
     if {![binutils_assemble $srcdir/$subdir/linkdebug.s tmpdir/linkdebug.debug]} then {
@@ -517,7 +526,7 @@  if {![binutils_assemble $srcdir/$subdir/debuglink.s tmpdir/debuglink.o]} then {
 	    set tempfile2 [remote_download host tmpdir/linkdebug.debug]
 	}
 
-	readelf_test {-wKis} $tempfile readelf.wKis  {}
+	readelf_test {-wKis} $tempfile readelf.wKis
     }
 }
 
@@ -530,7 +539,7 @@  if {![binutils_assemble $srcdir/$subdir/dwo.s tmpdir/dwo.o]} then {
 	set tempfile [remote_download host tmpdir/dwo.o]
     }
 
-    readelf_test {--debug-dump=links --debug-dump=no-follow-links} $tempfile readelf.k2  {}
+    readelf_test {--debug-dump=links --debug-dump=no-follow-links} $tempfile readelf.k2
 }
 
 if {![binutils_assemble $srcdir/$subdir/zero-sec.s tmpdir/zero-sec.o]} then {
@@ -542,7 +551,7 @@  if {![binutils_assemble $srcdir/$subdir/zero-sec.s tmpdir/zero-sec.o]} then {
 	set tempfile [remote_download host tmpdir/zero-sec.o]
     }
 
-    readelf_test {--enable-checks --sections --wide} $tempfile zero-sec.r {}
+    readelf_test {--enable-checks --sections --wide} $tempfile zero-sec.r
 }
 
 if ![is_remote host] {
@@ -555,6 +564,6 @@  if ![is_remote host] {
     if {[catch "system \"bzip2 -dc $test > $tempfile\""] != 0} {
 	untested "bzip2 -dc ($testname)"
     } else {
-	readelf_test {--debug-dump=macro -wN} $tempfile pr26112.r {}
+	readelf_test {--debug-dump=macro -wN} $tempfile pr26112.r
     }
 }
diff --git a/opcodes/wasm32-dis.c b/opcodes/wasm32-dis.c
index 727a49e120..2fe5132e12 100644
--- a/opcodes/wasm32-dis.c
+++ b/opcodes/wasm32-dis.c
@@ -29,6 +29,13 @@ 
 #include "elf/wasm32.h"
 #include "bfd_stdint.h"
 
+#ifdef HAVE_LIMITS_H
+#include <limits.h>
+#endif
+#ifndef CHAR_BIT
+#define CHAR_BIT 8
+#endif
+
 /* Type names for blocks and signatures.  */
 #define BLOCK_TYPE_NONE              0x40
 #define BLOCK_TYPE_I32               0x7f
@@ -192,27 +199,34 @@  wasm_read_leb128 (bfd_vma                   pc,
   unsigned int num_read = 0;
   unsigned int shift = 0;
   unsigned char byte = 0;
+  unsigned char lost, mask;
   int status = 1;
 
   while (info->read_memory_func (pc + num_read, &byte, 1, info) == 0)
     {
       num_read++;
 
-      if (shift < sizeof (result) * 8)
+      if (shift < CHAR_BIT * sizeof (result))
 	{
 	  result |= ((uint64_t) (byte & 0x7f)) << shift;
-	  if ((result >> shift) != (byte & 0x7f))
-	    /* Overflow.  */
-	    status |= 2;
+	  /* These bits overflowed.  */
+	  lost = byte ^ (result >> shift);
+	  /* And this is the mask of possible overflow bits.  */
+	  mask = 0x7f ^ ((uint64_t) 0x7f << shift >> shift);
 	  shift += 7;
 	}
-      else if ((byte & 0x7f) != 0)
+      else
+	{
+	  lost = byte;
+	  mask = 0x7f;
+	}
+      if ((lost & mask) != (sign && (int64_t) result < 0 ? mask : 0))
 	status |= 2;
 
       if ((byte & 0x80) == 0)
 	{
 	  status &= ~1;
-	  if (sign && (shift < 8 * sizeof (result)) && (byte & 0x40))
+	  if (sign && shift < CHAR_BIT * sizeof (result) && (byte & 0x40))
 	    result |= -((uint64_t) 1 << shift);
 	  break;
 	}