Replace hash function from bcache with fast_hash

Message ID 20191203010207.63155-1-cbiesinger@google.com
State New
Headers show
Series
  • Replace hash function from bcache with fast_hash
Related show

Commit Message

Christian Biesinger via gdb-patches Dec. 3, 2019, 1:02 a.m.
This function is not just slower than xxhash, it is slower than
even libiberty's iterative_hash, so there does not seem to be
a reason for it to exist.

------------------------------------------------------------
Benchmark                     Time           CPU Iterations
------------------------------------------------------------
BM_xxh3                      11 ns         11 ns   66127192
BM_xxh32                     19 ns         19 ns   36792609
BM_xxh64                     16 ns         16 ns   42941328
BM_city32                    26 ns         26 ns   27028370
BM_city64                    17 ns         17 ns   40472793
BM_iterative_hash            77 ns         77 ns    9088854
BM_bcache_hash              125 ns        125 ns    5599232

gdb/ChangeLog:

2019-12-02  Christian Biesinger  <cbiesinger@google.com>

	* bcache.c (hash): Remove.
	(hash_continue): Remove.
	* bcache.h (hash): Remove.
	(hash_continue): Remove.
	(struct bcache) <ctor>: Update.
	* psymtab.c (psymbol_hash): Update.
	* stabsread.c (hashname): Update.
	* utils.h (fast_hash): Add an argument for a start value,
	defaulting to zero.

Change-Id: I107f013eda5fdd3293326b5a206be43155dae0f8
---
 gdb/bcache.c    | 25 -------------------------
 gdb/bcache.h    | 11 +++++------
 gdb/psymtab.c   | 11 +++++------
 gdb/stabsread.c |  2 +-
 gdb/utils.h     | 11 ++++++-----
 5 files changed, 17 insertions(+), 43 deletions(-)

-- 
2.24.0.393.g34dc348eaf-goog

Comments

Simon Marchi Dec. 3, 2019, 8:36 p.m. | #1
On 2019-12-02 8:02 p.m., Christian Biesinger via gdb-patches wrote:
> This function is not just slower than xxhash, it is slower than

> even libiberty's iterative_hash, so there does not seem to be

> a reason for it to exist.

> 

> ------------------------------------------------------------

> Benchmark                     Time           CPU Iterations

> ------------------------------------------------------------

> BM_xxh3                      11 ns         11 ns   66127192

> BM_xxh32                     19 ns         19 ns   36792609

> BM_xxh64                     16 ns         16 ns   42941328

> BM_city32                    26 ns         26 ns   27028370

> BM_city64                    17 ns         17 ns   40472793

> BM_iterative_hash            77 ns         77 ns    9088854

> BM_bcache_hash              125 ns        125 ns    5599232

> 

> gdb/ChangeLog:

> 

> 2019-12-02  Christian Biesinger  <cbiesinger@google.com>

> 

> 	* bcache.c (hash): Remove.

> 	(hash_continue): Remove.

> 	* bcache.h (hash): Remove.

> 	(hash_continue): Remove.

> 	(struct bcache) <ctor>: Update.

> 	* psymtab.c (psymbol_hash): Update.

> 	* stabsread.c (hashname): Update.

> 	* utils.h (fast_hash): Add an argument for a start value,

> 	defaulting to zero.


LGTM, with the nits below fixed.

> diff --git a/gdb/bcache.h b/gdb/bcache.h

> index 15dcc63440..96f6d6813f 100644

> --- a/gdb/bcache.h

> +++ b/gdb/bcache.h

> @@ -138,13 +138,12 @@

>  

>  struct bstring;

>  

> -/* The hash functions */

> -extern unsigned long hash (const void *addr, int length);

> -extern unsigned long hash_continue (const void *addr, int length,

> -                                    unsigned long h);

> -

>  struct bcache

>  {

> +  static unsigned long default_hash (const void *ptr, int length) {


Brace on the next line.

> +    return fast_hash (ptr, length, 0);

> +  }


Can this method be private, just like `compare` is?

> +

>    /* Allocate a bcache.  HASH_FN and COMPARE_FN can be used to pass in

>       custom hash, and compare functions to be used by this bcache.  If

>       HASH_FUNCTION is NULL hash() is used and if COMPARE_FUNCTION is


This line of documentation should be updated, probably hash -> fast_hash.

> diff --git a/gdb/utils.h b/gdb/utils.h

> index 79c8a6fc8d..68376dac83 100644

> --- a/gdb/utils.h

> +++ b/gdb/utils.h

> @@ -571,17 +571,18 @@ extern void copy_bitwise (gdb_byte *dest, ULONGEST dest_offset,

>  			  const gdb_byte *source, ULONGEST source_offset,

>  			  ULONGEST nbits, int bits_big_endian);

>  

> -/* A fast hashing function.  This can be used to hash strings in a fast way

> +/* A fast hashing function.  This can be used to hash data in a fast way

>     when the length is known.  If no fast hashing library is available, falls

> -   back to iterative_hash from libiberty.  */

> +   back to iterative_hash from libiberty.  START_VALUE can be set to

> +   continue hashing from a previous value.  */

>  

>  static inline unsigned int

> -fast_hash (const char* str, size_t len)

> +fast_hash (const void* ptr, size_t len, unsigned int start_value = 0)


- const void* ptr
+ const void *ptr

Simon
Christian Biesinger via gdb-patches Dec. 3, 2019, 9:25 p.m. | #2
On Tue, Dec 3, 2019 at 2:36 PM Simon Marchi <simark@simark.ca> wrote:
>

> On 2019-12-02 8:02 p.m., Christian Biesinger via gdb-patches wrote:

> > This function is not just slower than xxhash, it is slower than

> > even libiberty's iterative_hash, so there does not seem to be

> > a reason for it to exist.

> >

> > ------------------------------------------------------------

> > Benchmark                     Time           CPU Iterations

> > ------------------------------------------------------------

> > BM_xxh3                      11 ns         11 ns   66127192

> > BM_xxh32                     19 ns         19 ns   36792609

> > BM_xxh64                     16 ns         16 ns   42941328

> > BM_city32                    26 ns         26 ns   27028370

> > BM_city64                    17 ns         17 ns   40472793

> > BM_iterative_hash            77 ns         77 ns    9088854

> > BM_bcache_hash              125 ns        125 ns    5599232

> >

> > gdb/ChangeLog:

> >

> > 2019-12-02  Christian Biesinger  <cbiesinger@google.com>

> >

> >       * bcache.c (hash): Remove.

> >       (hash_continue): Remove.

> >       * bcache.h (hash): Remove.

> >       (hash_continue): Remove.

> >       (struct bcache) <ctor>: Update.

> >       * psymtab.c (psymbol_hash): Update.

> >       * stabsread.c (hashname): Update.

> >       * utils.h (fast_hash): Add an argument for a start value,

> >       defaulting to zero.

>

> LGTM, with the nits below fixed.

>

> > diff --git a/gdb/bcache.h b/gdb/bcache.h

> > index 15dcc63440..96f6d6813f 100644

> > --- a/gdb/bcache.h

> > +++ b/gdb/bcache.h

> > @@ -138,13 +138,12 @@

> >

> >  struct bstring;

> >

> > -/* The hash functions */

> > -extern unsigned long hash (const void *addr, int length);

> > -extern unsigned long hash_continue (const void *addr, int length,

> > -                                    unsigned long h);

> > -

> >  struct bcache

> >  {

> > +  static unsigned long default_hash (const void *ptr, int length) {

>

> Brace on the next line.


Done.

> > +    return fast_hash (ptr, length, 0);

> > +  }

>

> Can this method be private, just like `compare` is?


Done.

> > +

> >    /* Allocate a bcache.  HASH_FN and COMPARE_FN can be used to pass in

> >       custom hash, and compare functions to be used by this bcache.  If

> >       HASH_FUNCTION is NULL hash() is used and if COMPARE_FUNCTION is

>

> This line of documentation should be updated, probably hash -> fast_hash.


Done.

> > diff --git a/gdb/utils.h b/gdb/utils.h

> > index 79c8a6fc8d..68376dac83 100644

> > --- a/gdb/utils.h

> > +++ b/gdb/utils.h

> > @@ -571,17 +571,18 @@ extern void copy_bitwise (gdb_byte *dest, ULONGEST dest_offset,

> >                         const gdb_byte *source, ULONGEST source_offset,

> >                         ULONGEST nbits, int bits_big_endian);

> >

> > -/* A fast hashing function.  This can be used to hash strings in a fast way

> > +/* A fast hashing function.  This can be used to hash data in a fast way

> >     when the length is known.  If no fast hashing library is available, falls

> > -   back to iterative_hash from libiberty.  */

> > +   back to iterative_hash from libiberty.  START_VALUE can be set to

> > +   continue hashing from a previous value.  */

> >

> >  static inline unsigned int

> > -fast_hash (const char* str, size_t len)

> > +fast_hash (const void* ptr, size_t len, unsigned int start_value = 0)

>

> - const void* ptr

> + const void *ptr


Done, thanks.

Will push now with those fixes.

Christian

Patch

diff --git a/gdb/bcache.c b/gdb/bcache.c
index 3f0a63be22..497efe96cb 100644
--- a/gdb/bcache.c
+++ b/gdb/bcache.c
@@ -51,31 +51,6 @@  struct bstring
   d;
 };
 
-/* The old hash function was stolen from SDBM. This is what DB 3.0
-   uses now, and is better than the old one.  */
-
-unsigned long
-hash(const void *addr, int length)
-{
-  return hash_continue (addr, length, 0);
-}
-
-/* Continue the calculation of the hash H at the given address.  */
-
-unsigned long
-hash_continue (const void *addr, int length, unsigned long h)
-{
-  const unsigned char *k, *e;
-
-  k = (const unsigned char *)addr;
-  e = k+length;
-  for (; k< e;++k)
-    {
-      h *=16777619;
-      h ^= *k;
-    }
-  return (h);
-}
 
 /* Growing the bcache's hash table.  */
 
diff --git a/gdb/bcache.h b/gdb/bcache.h
index 15dcc63440..96f6d6813f 100644
--- a/gdb/bcache.h
+++ b/gdb/bcache.h
@@ -138,13 +138,12 @@ 
 
 struct bstring;
 
-/* The hash functions */
-extern unsigned long hash (const void *addr, int length);
-extern unsigned long hash_continue (const void *addr, int length,
-                                    unsigned long h);
-
 struct bcache
 {
+  static unsigned long default_hash (const void *ptr, int length) {
+    return fast_hash (ptr, length, 0);
+  }
+
   /* Allocate a bcache.  HASH_FN and COMPARE_FN can be used to pass in
      custom hash, and compare functions to be used by this bcache.  If
      HASH_FUNCTION is NULL hash() is used and if COMPARE_FUNCTION is
@@ -154,7 +153,7 @@  struct bcache
 					    int length) = nullptr,
 		   int (*compare_fn)(const void *, const void *,
 				     int length) = nullptr)
-    : m_hash_function (hash_fn == nullptr ? hash : hash_fn),
+    : m_hash_function (hash_fn == nullptr ? default_hash : hash_fn),
       m_compare_function (compare_fn == nullptr ? compare : compare_fn)
   {
   }
diff --git a/gdb/psymtab.c b/gdb/psymtab.c
index 7074a32956..2cbc6d4f65 100644
--- a/gdb/psymtab.c
+++ b/gdb/psymtab.c
@@ -1530,14 +1530,13 @@  psymbol_hash (const void *addr, int length)
   unsigned int domain = psymbol->domain;
   unsigned int theclass = psymbol->aclass;
 
-  h = hash_continue (&psymbol->ginfo.value, sizeof (psymbol->ginfo.value), h);
-  h = hash_continue (&lang, sizeof (unsigned int), h);
-  h = hash_continue (&domain, sizeof (unsigned int), h);
-  h = hash_continue (&theclass, sizeof (unsigned int), h);
+  h = fast_hash (&psymbol->ginfo.value, sizeof (psymbol->ginfo.value), h);
+  h = fast_hash (&lang, sizeof (unsigned int), h);
+  h = fast_hash (&domain, sizeof (unsigned int), h);
+  h = fast_hash (&theclass, sizeof (unsigned int), h);
   /* Note that psymbol names are interned via symbol_set_names, so
      there's no need to hash the contents of the name here.  */
-  h = hash_continue (&psymbol->ginfo.name,
-		     sizeof (psymbol->ginfo.name), h);
+  h = fast_hash (&psymbol->ginfo.name, sizeof (psymbol->ginfo.name), h);
 
   return h;
 }
diff --git a/gdb/stabsread.c b/gdb/stabsread.c
index 979df0266c..91a73dd10d 100644
--- a/gdb/stabsread.c
+++ b/gdb/stabsread.c
@@ -4778,7 +4778,7 @@  find_name_end (const char *name)
 int
 hashname (const char *name)
 {
-  return hash (name, strlen (name)) % HASHSIZE;
+  return fast_hash (name, strlen (name)) % HASHSIZE;
 }
 
 /* Initializer for this module.  */
diff --git a/gdb/utils.h b/gdb/utils.h
index 79c8a6fc8d..68376dac83 100644
--- a/gdb/utils.h
+++ b/gdb/utils.h
@@ -571,17 +571,18 @@  extern void copy_bitwise (gdb_byte *dest, ULONGEST dest_offset,
 			  const gdb_byte *source, ULONGEST source_offset,
 			  ULONGEST nbits, int bits_big_endian);
 
-/* A fast hashing function.  This can be used to hash strings in a fast way
+/* A fast hashing function.  This can be used to hash data in a fast way
    when the length is known.  If no fast hashing library is available, falls
-   back to iterative_hash from libiberty.  */
+   back to iterative_hash from libiberty.  START_VALUE can be set to
+   continue hashing from a previous value.  */
 
 static inline unsigned int
-fast_hash (const char* str, size_t len)
+fast_hash (const void* ptr, size_t len, unsigned int start_value = 0)
 {
 #ifdef HAVE_LIBXXHASH
-  return XXH64 (str, len, 0);
+  return XXH64 (ptr, len, start_value);
 #else
-  return iterative_hash (str, len, 0);
+  return iterative_hash (ptr, len, start_value);
 #endif
 }