[1/2] gcc_qsort: source code changes

Message ID 20180510155641.2950-2-amonakov@ispras.ru
State New
Headers show
Series
  • Introduce gcc_qsort
Related show

Commit Message

Alexander Monakov May 10, 2018, 3:56 p.m.
* sort.cc: New file.
        * system.h [!CHECKING_P] (qsort): Redirect to gcc_qsort.
        * vec.c (qsort_chk): Use gcc_qsort.

---
 gcc/sort.cc  | 232 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 gcc/system.h |   7 +-
 gcc/vec.c    |   2 +-
 3 files changed, 238 insertions(+), 3 deletions(-)
 create mode 100644 gcc/sort.cc

-- 
2.13.3

Comments

David Malcolm May 10, 2018, 5:01 p.m. | #1
On Thu, 2018-05-10 at 18:56 +0300, Alexander Monakov wrote:
> 	* sort.cc: New file.

>         * system.h [!CHECKING_P] (qsort): Redirect to gcc_qsort.

>         * vec.c (qsort_chk): Use gcc_qsort.


[...snip...]

I'm not a reviewer for this, but there's a lot of fiddly implementation
logic here, so maybe this code could use the selftest framework?

Maybe, in pseudo-code, something like this:

template <typename T>
static void
test_gcc_sort ()
{
   for (creation_strategy in {in-order, backwards}: // and anything else?
     for (int n = 0; n < some_limit; n++)
       {
          make_a_list_of_t (n, creation_strategy)
          gcc_sort (the_list);
          assert that the list is sorted;
	  assert that the number of calls to the callback was sane
     }
}

void
test_gcc_sort_cc ()
{
   test_gcc_sort<int, int_comparator> ();
   test_gcc_sort<long, long_comparator> ();
   // etc; maybe some custom structs to exercise the deterministic property???
}

...or some such, to quickly get coverage of the various list sizes
(which the implementation seems to rely on heavily), in a non-release
build.



Hope this is constructive
Dave
Richard Biener May 10, 2018, 5:43 p.m. | #2
On May 10, 2018 5:56:40 PM GMT+02:00, Alexander Monakov <amonakov@ispras.ru> wrote:
>	* sort.cc: New file.

>        * system.h [!CHECKING_P] (qsort): Redirect to gcc_qsort.

>        * vec.c (qsort_chk): Use gcc_qsort.


Just a quick first remark - how about putting this into libiberty?  And then name it xqsort? 

Richard. 

>---

>gcc/sort.cc  | 232

>+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

> gcc/system.h |   7 +-

> gcc/vec.c    |   2 +-

> 3 files changed, 238 insertions(+), 3 deletions(-)

> create mode 100644 gcc/sort.cc

>

>diff --git a/gcc/sort.cc b/gcc/sort.cc

>new file mode 100644

>index 00000000000..4faf6d45dc6

>--- /dev/null

>+++ b/gcc/sort.cc

>@@ -0,0 +1,232 @@

>+/* Platform-independent deterministic sort function.

>+   Copyright (C) 2018 Free Software Foundation, Inc.

>+   Contributed by Alexander Monakov.

>+

>+This file is part of GCC.

>+

>+GCC is free software; you can redistribute it and/or modify it

>+under the terms of the GNU General Public License as published by the

>+Free Software Foundation; either version 3, or (at your option) any

>+later version.

>+

>+GCC is distributed in the hope that it will be useful, but WITHOUT

>+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

>+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

>+for more details.

>+

>+You should have received a copy of the GNU General Public License

>+along with GCC; see the file COPYING3.  If not see

>+<http://www.gnu.org/licenses/>.  */

>+

>+/* This implements a sort function suitable for GCC use cases:

>+   - signature-compatible to C qsort, but relaxed contract:

>+     - may apply the comparator to elements in a temporary buffer

>+     - may abort on allocation failure

>+   - deterministic (but not necessarily stable)

>+   - fast, especially for common cases (0-5 elements of size 8 or 4)

>+

>+   The implementation uses a network sort for up to 5 elements and

>+   a merge sort on top of that.  Neither stage has branches depending

>on

>+   comparator result, trading extra arithmetic for branch

>mispredictions.  */

>+

>+#ifdef GENERATOR_FILE

>+#include "bconfig.h"

>+#else

>+#include "config.h"

>+#endif

>+

>+#include "system.h"

>+

>+#define likely(cond) __builtin_expect ((cond), 1)

>+

>+#ifdef __GNUC__

>+#define noinline __attribute__ ((__noinline__))

>+#else

>+#define noinline

>+#endif

>+

>+/* C-style qsort comparator function type.  */

>+typedef int cmp_fn (const void *, const void *);

>+

>+/* Structure holding read-mostly (read-only in netsort) context. */

>+struct sort_ctx

>+{

>+  cmp_fn *cmp; // pointer to comparator

>+  char   *out; // output buffer

>+  size_t n;    // number of elements

>+  size_t size; // element size

>+};

>+

>+/* Helper for netsort. Permute, possibly in-place, 2 or 3 elements,

>+   placing E0 to C->OUT, E1 to C->OUT + C->SIZE, and so on. */

>+static void

>+reorder23 (sort_ctx *c, char *e0, char *e1, char *e2)

>+{

>+#define REORDER_23(SIZE, STRIDE, OFFSET)        \

>+do {                                            \

>+  size_t t0, t1;                                \

>+  memcpy (&t0, e0 + OFFSET, SIZE);              \

>+  memcpy (&t1, e1 + OFFSET, SIZE);              \

>+  char *out = c->out + OFFSET;                  \

>+  if (likely (c->n == 3))                       \

>+    memcpy (out + 2*STRIDE, e2 + OFFSET, SIZE); \

>+  memcpy (out, &t0, SIZE); out += STRIDE;       \

>+  memcpy (out, &t1, SIZE);                      \

>+} while (0)

>+

>+  if (sizeof (size_t) == 8 && likely (c->size == 8))

>+    REORDER_23 (8, 8, 0);

>+  else if (likely (c->size == 4))

>+    REORDER_23 (4, 4, 0);

>+  else

>+    {

>+      size_t offset = 0, step = sizeof (size_t);

>+      for (; offset + step <= c->size; offset += step)

>+	REORDER_23 (step, c->size, offset);

>+      for (; offset < c->size; offset++)

>+	REORDER_23 (1, c->size, offset);

>+    }

>+}

>+

>+/* Like reorder23, but permute 4 or 5 elements. */

>+static void

>+reorder45 (sort_ctx *c, char *e0, char *e1, char *e2, char *e3, char

>*e4)

>+{

>+#define REORDER_45(SIZE, STRIDE, OFFSET)        \

>+do {                                            \

>+  size_t t0, t1, t2, t3;                        \

>+  memcpy (&t0, e0 + OFFSET, SIZE);              \

>+  memcpy (&t1, e1 + OFFSET, SIZE);              \

>+  memcpy (&t2, e2 + OFFSET, SIZE);              \

>+  memcpy (&t3, e3 + OFFSET, SIZE);              \

>+  char *out = c->out + OFFSET;                  \

>+  if (likely (c->n == 5))                       \

>+    memcpy (out + 4*STRIDE, e4 + OFFSET, SIZE); \

>+  memcpy (out, &t0, SIZE); out += STRIDE;       \

>+  memcpy (out, &t1, SIZE); out += STRIDE;       \

>+  memcpy (out, &t2, SIZE); out += STRIDE;       \

>+  memcpy (out, &t3, SIZE);                      \

>+} while (0)

>+

>+  if (sizeof (size_t) == 8 && likely (c->size == 8))

>+    REORDER_45 (8, 8, 0);

>+  else if (likely(c->size == 4))

>+    REORDER_45 (4, 4, 0);

>+  else

>+    {

>+      size_t offset = 0, step = sizeof (size_t);

>+      for (; offset + step <= c->size; offset += step)

>+	REORDER_45 (step, c->size, offset);

>+      for (; offset < c->size; offset++)

>+	REORDER_45 (1, c->size, offset);

>+    }

>+}

>+

>+/* Helper for netsort. Invoke comparator CMP on E0 and E1.

>+   Return E0^E1 if E0 compares less than E1, zero otherwise.

>+   This is noinline to avoid code growth and confine invocation

>+   to a single call site, assisting indirect branch prediction. */

>+noinline static intptr_t

>+cmp1 (char *e0, char *e1, cmp_fn *cmp)

>+{

>+  intptr_t x = (intptr_t)e0 ^ (intptr_t)e1;

>+  return x & (cmp (e0, e1) >> 31);

>+}

>+

>+/* Execute network sort on 2 to 5 elements from IN, placing them into

>C->OUT.

>+   IN may be equal to C->OUT, in which case elements are sorted in

>place.  */

>+static void

>+netsort (char *in, sort_ctx *c)

>+{

>+#define CMP(e0, e1)                   \

>+do {                                  \

>+  intptr_t x = cmp1 (e1, e0, c->cmp); \

>+  e0 = (char *)((intptr_t)e0 ^ x);    \

>+  e1 = (char *)((intptr_t)e1 ^ x);    \

>+} while (0)

>+

>+  char *e0 = in, *e1 = e0 + c->size, *e2 = e1 + c->size;

>+  CMP (e0, e1);

>+  if (likely (c->n == 3))

>+    {

>+      CMP (e1, e2);

>+      CMP (e0, e1);

>+    }

>+  if (c->n <= 3)

>+    return reorder23 (c, e0, e1, e2);

>+  char *e3 = e2 + c->size, *e4 = e3 + c->size;

>+  if (likely (c->n == 5))

>+    {

>+      CMP (e3, e4);

>+      CMP (e2, e4);

>+    }

>+  CMP (e2, e3);

>+  if (likely (c->n == 5))

>+    {

>+      CMP (e0, e3);

>+      CMP (e1, e4);

>+    }

>+  CMP (e0, e2);

>+  CMP (e1, e3);

>+  CMP (e1, e2);

>+  reorder45 (c, e0, e1, e2, e3, e4);

>+}

>+

>+/* Execute merge sort on N elements from IN, placing them into OUT,

>+   using TMP as temporary storage if IN is equal to OUT.

>+   This is a stable sort if netsort is used only for 2 or 3 elements.

>*/

>+static void

>+mergesort (char *in, sort_ctx *c, size_t n, char *out, char *tmp)

>+{

>+  if (likely (n <= 5))

>+    {

>+      c->out = out;

>+      c->n = n;

>+      return netsort (in, c);

>+    }

>+  size_t nl = n / 2, nr = n - nl, sz = nl * c->size;

>+  char *mid = in + sz, *r = out + sz, *l = in == out ? tmp : in;

>+  /* Sort the right half, outputting to right half of OUT. */

>+  mergesort (mid, c, nr, r, tmp);

>+  /* Sort the left half, leaving left half of OUT free.  */

>+  mergesort (in, c, nl, l, mid);

>+  /* Merge sorted halves given by L, R to [OUT, END). */

>+#define MERGE_ELTSIZE(SIZE)                     \

>+do {                                            \

>+  intptr_t mr = c->cmp (r, l) >> 31;            \

>+  intptr_t lr = (intptr_t)l ^ (intptr_t)r;      \

>+  lr = (intptr_t)l ^ (lr & mr);                 \

>+  out = (char *)memcpy (out, (char *)lr, SIZE); \

>+  out += SIZE;                                  \

>+  r += mr & SIZE;                               \

>+  if (r == out) return;                         \

>+  l += ~mr & SIZE;                              \

>+} while (r != end)

>+

>+  if (likely (c->cmp(r, l + (r - out) - c->size) < 0))

>+    {

>+      char *end = out + n * c->size;

>+      if (sizeof (size_t) == 8 && likely (c->size == 8))

>+	MERGE_ELTSIZE (8);

>+      else if (likely (c->size == 4))

>+	MERGE_ELTSIZE (4);

>+      else

>+	MERGE_ELTSIZE (c->size);

>+    }

>+  memcpy (out, l, r - out);

>+}

>+

>+void

>+gcc_qsort (void *vbase, size_t n, size_t size, cmp_fn *cmp)

>+{

>+  if (n < 2)

>+    return;

>+  char *base = (char *)vbase;

>+  sort_ctx c = {cmp, base, n, size};

>+  long long scratch[32];

>+  size_t bufsz = (n / 2) * size;

>+  void *buf = bufsz <= sizeof scratch ? scratch : xmalloc (bufsz);

>+  mergesort (base, &c, n, base, (char *)buf);

>+  if (buf != scratch)

>+    free (buf);

>+}

>diff --git a/gcc/system.h b/gcc/system.h

>index 4abc321c71d..88dffccb8ab 100644

>--- a/gcc/system.h

>+++ b/gcc/system.h

>@@ -1202,11 +1202,14 @@ helper_const_non_const_cast (const char *p)

>/* qsort comparator consistency checking: except in release-checking

>compilers,

>redirect 4-argument qsort calls to qsort_chk; keep 1-argument

>invocations

>corresponding to vec::qsort (cmp): they use C qsort internally anyway. 

>*/

>-#if CHECKING_P

>+void qsort_chk (void *, size_t, size_t, int (*)(const void *, const

>void *));

>+void gcc_qsort (void *, size_t, size_t, int (*)(const void *, const

>void *));

> #define PP_5th(a1, a2, a3, a4, a5, ...) a5

> #undef qsort

>+#if CHECKING_P

>#define qsort(...) PP_5th (__VA_ARGS__, qsort_chk, 3, 2, qsort, 0)

>(__VA_ARGS__)

>-void qsort_chk (void *, size_t, size_t, int (*)(const void *, const

>void *));

>+#else

>+#define qsort(...) PP_5th (__VA_ARGS__, gcc_qsort, 3, 2, qsort, 0)

>(__VA_ARGS__)

> #endif

> 

> #endif /* ! GCC_SYSTEM_H */

>diff --git a/gcc/vec.c b/gcc/vec.c

>index 11924a80a2d..2941715a34a 100644

>--- a/gcc/vec.c

>+++ b/gcc/vec.c

>@@ -215,7 +215,7 @@ void

> qsort_chk (void *base, size_t n, size_t size,

> 	   int (*cmp)(const void *, const void *))

> {

>-  (qsort) (base, n, size, cmp);

>+  gcc_qsort (base, n, size, cmp);

> #if 0

> #define LIM(n) (n)

> #else
Alexander Monakov May 10, 2018, 6:07 p.m. | #3
On Thu, 10 May 2018, Richard Biener wrote:
> 

> Just a quick first remark - how about putting this into libiberty?  And then name it xqsort? 


I'm not sure.  It has a weaker contract compared to qsort, and I believe
functions in libiberty are understood to provide stronger/better replacements.

Alexander
Richard Biener May 11, 2018, noon | #4
On Thu, May 10, 2018 at 5:56 PM, Alexander Monakov <amonakov@ispras.ru> wrote:
>         * sort.cc: New file.

>         * system.h [!CHECKING_P] (qsort): Redirect to gcc_qsort.

>         * vec.c (qsort_chk): Use gcc_qsort.


Looks good to me.  As additional enhancement we might want to provide
(even unconditionally?)
the glibc qsort_r() interface.  I remember adding various globals to
pass down state to the comparator...

I agree self-tests might be good to have.  Also it looks like the
qsort-checking may now be somehow
embedded within our qsort implementation?

But all these things can be done as followup I think.

Thanks,
Richard.

> ---

>  gcc/sort.cc  | 232 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

>  gcc/system.h |   7 +-

>  gcc/vec.c    |   2 +-

>  3 files changed, 238 insertions(+), 3 deletions(-)

>  create mode 100644 gcc/sort.cc

>

> diff --git a/gcc/sort.cc b/gcc/sort.cc

> new file mode 100644

> index 00000000000..4faf6d45dc6

> --- /dev/null

> +++ b/gcc/sort.cc

> @@ -0,0 +1,232 @@

> +/* Platform-independent deterministic sort function.

> +   Copyright (C) 2018 Free Software Foundation, Inc.

> +   Contributed by Alexander Monakov.

> +

> +This file is part of GCC.

> +

> +GCC is free software; you can redistribute it and/or modify it

> +under the terms of the GNU General Public License as published by the

> +Free Software Foundation; either version 3, or (at your option) any

> +later version.

> +

> +GCC is distributed in the hope that it will be useful, but WITHOUT

> +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

> +FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

> +for more details.

> +

> +You should have received a copy of the GNU General Public License

> +along with GCC; see the file COPYING3.  If not see

> +<http://www.gnu.org/licenses/>.  */

> +

> +/* This implements a sort function suitable for GCC use cases:

> +   - signature-compatible to C qsort, but relaxed contract:

> +     - may apply the comparator to elements in a temporary buffer

> +     - may abort on allocation failure

> +   - deterministic (but not necessarily stable)

> +   - fast, especially for common cases (0-5 elements of size 8 or 4)

> +

> +   The implementation uses a network sort for up to 5 elements and

> +   a merge sort on top of that.  Neither stage has branches depending on

> +   comparator result, trading extra arithmetic for branch mispredictions.  */

> +

> +#ifdef GENERATOR_FILE

> +#include "bconfig.h"

> +#else

> +#include "config.h"

> +#endif

> +

> +#include "system.h"

> +

> +#define likely(cond) __builtin_expect ((cond), 1)

> +

> +#ifdef __GNUC__

> +#define noinline __attribute__ ((__noinline__))

> +#else

> +#define noinline

> +#endif

> +

> +/* C-style qsort comparator function type.  */

> +typedef int cmp_fn (const void *, const void *);

> +

> +/* Structure holding read-mostly (read-only in netsort) context. */

> +struct sort_ctx

> +{

> +  cmp_fn *cmp; // pointer to comparator

> +  char   *out; // output buffer

> +  size_t n;    // number of elements

> +  size_t size; // element size

> +};

> +

> +/* Helper for netsort. Permute, possibly in-place, 2 or 3 elements,

> +   placing E0 to C->OUT, E1 to C->OUT + C->SIZE, and so on. */

> +static void

> +reorder23 (sort_ctx *c, char *e0, char *e1, char *e2)

> +{

> +#define REORDER_23(SIZE, STRIDE, OFFSET)        \

> +do {                                            \

> +  size_t t0, t1;                                \

> +  memcpy (&t0, e0 + OFFSET, SIZE);              \

> +  memcpy (&t1, e1 + OFFSET, SIZE);              \

> +  char *out = c->out + OFFSET;                  \

> +  if (likely (c->n == 3))                       \

> +    memcpy (out + 2*STRIDE, e2 + OFFSET, SIZE); \

> +  memcpy (out, &t0, SIZE); out += STRIDE;       \

> +  memcpy (out, &t1, SIZE);                      \

> +} while (0)

> +

> +  if (sizeof (size_t) == 8 && likely (c->size == 8))

> +    REORDER_23 (8, 8, 0);

> +  else if (likely (c->size == 4))

> +    REORDER_23 (4, 4, 0);

> +  else

> +    {

> +      size_t offset = 0, step = sizeof (size_t);

> +      for (; offset + step <= c->size; offset += step)

> +       REORDER_23 (step, c->size, offset);

> +      for (; offset < c->size; offset++)

> +       REORDER_23 (1, c->size, offset);

> +    }

> +}

> +

> +/* Like reorder23, but permute 4 or 5 elements. */

> +static void

> +reorder45 (sort_ctx *c, char *e0, char *e1, char *e2, char *e3, char *e4)

> +{

> +#define REORDER_45(SIZE, STRIDE, OFFSET)        \

> +do {                                            \

> +  size_t t0, t1, t2, t3;                        \

> +  memcpy (&t0, e0 + OFFSET, SIZE);              \

> +  memcpy (&t1, e1 + OFFSET, SIZE);              \

> +  memcpy (&t2, e2 + OFFSET, SIZE);              \

> +  memcpy (&t3, e3 + OFFSET, SIZE);              \

> +  char *out = c->out + OFFSET;                  \

> +  if (likely (c->n == 5))                       \

> +    memcpy (out + 4*STRIDE, e4 + OFFSET, SIZE); \

> +  memcpy (out, &t0, SIZE); out += STRIDE;       \

> +  memcpy (out, &t1, SIZE); out += STRIDE;       \

> +  memcpy (out, &t2, SIZE); out += STRIDE;       \

> +  memcpy (out, &t3, SIZE);                      \

> +} while (0)

> +

> +  if (sizeof (size_t) == 8 && likely (c->size == 8))

> +    REORDER_45 (8, 8, 0);

> +  else if (likely(c->size == 4))

> +    REORDER_45 (4, 4, 0);

> +  else

> +    {

> +      size_t offset = 0, step = sizeof (size_t);

> +      for (; offset + step <= c->size; offset += step)

> +       REORDER_45 (step, c->size, offset);

> +      for (; offset < c->size; offset++)

> +       REORDER_45 (1, c->size, offset);

> +    }

> +}

> +

> +/* Helper for netsort. Invoke comparator CMP on E0 and E1.

> +   Return E0^E1 if E0 compares less than E1, zero otherwise.

> +   This is noinline to avoid code growth and confine invocation

> +   to a single call site, assisting indirect branch prediction. */

> +noinline static intptr_t

> +cmp1 (char *e0, char *e1, cmp_fn *cmp)

> +{

> +  intptr_t x = (intptr_t)e0 ^ (intptr_t)e1;

> +  return x & (cmp (e0, e1) >> 31);

> +}

> +

> +/* Execute network sort on 2 to 5 elements from IN, placing them into C->OUT.

> +   IN may be equal to C->OUT, in which case elements are sorted in place.  */

> +static void

> +netsort (char *in, sort_ctx *c)

> +{

> +#define CMP(e0, e1)                   \

> +do {                                  \

> +  intptr_t x = cmp1 (e1, e0, c->cmp); \

> +  e0 = (char *)((intptr_t)e0 ^ x);    \

> +  e1 = (char *)((intptr_t)e1 ^ x);    \

> +} while (0)

> +

> +  char *e0 = in, *e1 = e0 + c->size, *e2 = e1 + c->size;

> +  CMP (e0, e1);

> +  if (likely (c->n == 3))

> +    {

> +      CMP (e1, e2);

> +      CMP (e0, e1);

> +    }

> +  if (c->n <= 3)

> +    return reorder23 (c, e0, e1, e2);

> +  char *e3 = e2 + c->size, *e4 = e3 + c->size;

> +  if (likely (c->n == 5))

> +    {

> +      CMP (e3, e4);

> +      CMP (e2, e4);

> +    }

> +  CMP (e2, e3);

> +  if (likely (c->n == 5))

> +    {

> +      CMP (e0, e3);

> +      CMP (e1, e4);

> +    }

> +  CMP (e0, e2);

> +  CMP (e1, e3);

> +  CMP (e1, e2);

> +  reorder45 (c, e0, e1, e2, e3, e4);

> +}

> +

> +/* Execute merge sort on N elements from IN, placing them into OUT,

> +   using TMP as temporary storage if IN is equal to OUT.

> +   This is a stable sort if netsort is used only for 2 or 3 elements. */

> +static void

> +mergesort (char *in, sort_ctx *c, size_t n, char *out, char *tmp)

> +{

> +  if (likely (n <= 5))

> +    {

> +      c->out = out;

> +      c->n = n;

> +      return netsort (in, c);

> +    }

> +  size_t nl = n / 2, nr = n - nl, sz = nl * c->size;

> +  char *mid = in + sz, *r = out + sz, *l = in == out ? tmp : in;

> +  /* Sort the right half, outputting to right half of OUT. */

> +  mergesort (mid, c, nr, r, tmp);

> +  /* Sort the left half, leaving left half of OUT free.  */

> +  mergesort (in, c, nl, l, mid);

> +  /* Merge sorted halves given by L, R to [OUT, END). */

> +#define MERGE_ELTSIZE(SIZE)                     \

> +do {                                            \

> +  intptr_t mr = c->cmp (r, l) >> 31;            \

> +  intptr_t lr = (intptr_t)l ^ (intptr_t)r;      \

> +  lr = (intptr_t)l ^ (lr & mr);                 \

> +  out = (char *)memcpy (out, (char *)lr, SIZE); \

> +  out += SIZE;                                  \

> +  r += mr & SIZE;                               \

> +  if (r == out) return;                         \

> +  l += ~mr & SIZE;                              \

> +} while (r != end)

> +

> +  if (likely (c->cmp(r, l + (r - out) - c->size) < 0))

> +    {

> +      char *end = out + n * c->size;

> +      if (sizeof (size_t) == 8 && likely (c->size == 8))

> +       MERGE_ELTSIZE (8);

> +      else if (likely (c->size == 4))

> +       MERGE_ELTSIZE (4);

> +      else

> +       MERGE_ELTSIZE (c->size);

> +    }

> +  memcpy (out, l, r - out);

> +}

> +

> +void

> +gcc_qsort (void *vbase, size_t n, size_t size, cmp_fn *cmp)

> +{

> +  if (n < 2)

> +    return;

> +  char *base = (char *)vbase;

> +  sort_ctx c = {cmp, base, n, size};

> +  long long scratch[32];

> +  size_t bufsz = (n / 2) * size;

> +  void *buf = bufsz <= sizeof scratch ? scratch : xmalloc (bufsz);

> +  mergesort (base, &c, n, base, (char *)buf);

> +  if (buf != scratch)

> +    free (buf);

> +}

> diff --git a/gcc/system.h b/gcc/system.h

> index 4abc321c71d..88dffccb8ab 100644

> --- a/gcc/system.h

> +++ b/gcc/system.h

> @@ -1202,11 +1202,14 @@ helper_const_non_const_cast (const char *p)

>  /* qsort comparator consistency checking: except in release-checking compilers,

>     redirect 4-argument qsort calls to qsort_chk; keep 1-argument invocations

>     corresponding to vec::qsort (cmp): they use C qsort internally anyway.  */

> -#if CHECKING_P

> +void qsort_chk (void *, size_t, size_t, int (*)(const void *, const void *));

> +void gcc_qsort (void *, size_t, size_t, int (*)(const void *, const void *));

>  #define PP_5th(a1, a2, a3, a4, a5, ...) a5

>  #undef qsort

> +#if CHECKING_P

>  #define qsort(...) PP_5th (__VA_ARGS__, qsort_chk, 3, 2, qsort, 0) (__VA_ARGS__)

> -void qsort_chk (void *, size_t, size_t, int (*)(const void *, const void *));

> +#else

> +#define qsort(...) PP_5th (__VA_ARGS__, gcc_qsort, 3, 2, qsort, 0) (__VA_ARGS__)

>  #endif

>

>  #endif /* ! GCC_SYSTEM_H */

> diff --git a/gcc/vec.c b/gcc/vec.c

> index 11924a80a2d..2941715a34a 100644

> --- a/gcc/vec.c

> +++ b/gcc/vec.c

> @@ -215,7 +215,7 @@ void

>  qsort_chk (void *base, size_t n, size_t size,

>            int (*cmp)(const void *, const void *))

>  {

> -  (qsort) (base, n, size, cmp);

> +  gcc_qsort (base, n, size, cmp);

>  #if 0

>  #define LIM(n) (n)

>  #else

> --

> 2.13.3

>
Alexander Monakov May 11, 2018, 1:08 p.m. | #5
On Fri, 11 May 2018, Richard Biener wrote:
> Looks good to me.  As additional enhancement we might want to provide

> (even unconditionally?)

> the glibc qsort_r() interface.  I remember adding various globals to

> pass down state to the comparator...


Thanks. I have no plans w.r.t qsort_r, but OTOH a stable sort interface
can be added with tiny size/speed cost, and the sole in-tree use can be
converted :)

> I agree self-tests might be good to have.  Also it looks like the

> qsort-checking may now be somehow

> embedded within our qsort implementation?


I gave self-tests some thought after David's mail, and honestly I don't
see much value in that, given that we run qsort_chk on everything.

As for embedding, I don't think that's necessary. I prefer to keep them
separate.

Alexander
H.J. Lu May 13, 2018, 11:14 p.m. | #6
On Thu, May 10, 2018 at 8:56 AM, Alexander Monakov <amonakov@ispras.ru> wrote:
>         * sort.cc: New file.

>         * system.h [!CHECKING_P] (qsort): Redirect to gcc_qsort.

>         * vec.c (qsort_chk): Use gcc_qsort.

>


This breaks bootstrap on Fedora 28/i686:

https://gcc.gnu.org/ml/gcc-regression/2018-05/msg00088.html

../../src-trunk/gcc/sort.cc:112:5: note: in expansion of macro ‘REORDER_45’
     REORDER_45 (8, 8, 0);
     ^~~~~~~~~~
../../src-trunk/gcc/sort.cc:100:10: error: ‘void* memcpy(void*, const
void*, size_t)’ forming offset [5, 8] is out of the bounds [0, 4] of
object ‘t2’ with type ‘size_t’ {aka ‘unsigned int’}
[-Werror=array-bounds]
   memcpy (&t2, e2 + OFFSET, SIZE);              \
   ~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~
../../src-trunk/gcc/sort.cc:112:5: note: in expansion of macro ‘REORDER_45’
     REORDER_45 (8, 8, 0);
     ^~~~~~~~~~
../../src-trunk/gcc/sort.cc:97:18: note: ‘t2’ declared here
   size_t t0, t1, t2, t3;                        \
                  ^~
../../src-trunk/gcc/sort.cc:112:5: note: in expansion of macro ‘REORDER_45’
     REORDER_45 (8, 8, 0);
     ^~~~~~~~~~
../../src-trunk/gcc/sort.cc:101:10: error: ‘void* memcpy(void*, const
void*, size_t)’ forming offset [5, 8] is out of the bounds [0, 4] of
object ‘t3’ with type ‘size_t’ {aka ‘unsigned int’}
[-Werror=array-bounds]
   memcpy (&t3, e3 + OFFSET, SIZE);              \
   ~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~
../../src-trunk/gcc/sort.cc:112:5: note: in expansion of macro ‘REORDER_45’
     REORDER_45 (8, 8, 0);
     ^~~~~~~~~~
../../src-trunk/gcc/sort.cc:97:22: note: ‘t3’ declared here
   size_t t0, t1, t2, t3;                        \
                      ^~
../../src-trunk/gcc/sort.cc:112:5: note: in expansion of macro ‘REORDER_45’
     REORDER_45 (8, 8, 0);
     ^~~~~~~~~~
../../src-trunk/gcc/sort.cc:105:10: error: ‘void* memcpy(void*, const
void*, size_t)’ forming offset [5, 8] is out of the bounds [0, 4] of
object ‘t0’ with type ‘size_t’ {aka ‘unsigned int’}
[-Werror=array-bounds]
   memcpy (out, &t0, SIZE); out += STRIDE;       \
   ~~~~~~~^~~~~~~~~~~~~~~~
../../src-trunk/gcc/sort.cc:112:5: note: in expansion of macro ‘REORDER_45’
     REORDER_45 (8, 8, 0);
     ^~~~~~~~~~
../../src-trunk/gcc/sort.cc:97:10: note: ‘t0’ declared here
   size_t t0, t1, t2, t3;                        \
          ^~
../../src-trunk/gcc/sort.cc:112:5: note: in expansion of macro ‘REORDER_45’
     REORDER_45 (8, 8, 0);
     ^~~~~~~~~~
../../src-trunk/gcc/sort.cc:106:10: error: ‘void* memcpy(void*, const
void*, size_t)’ forming offset [5, 8] is out of the bounds [0, 4] of
object ‘t1’ with type ‘size_t’ {aka ‘unsigned int’}
[-Werror=array-bounds]
   memcpy (out, &t1, SIZE); out += STRIDE;       \
   ~~~~~~~^~~~~~~~~~~~~~~~
../../src-trunk/gcc/sort.cc:112:5: note: in expansion of macro ‘REORDER_45’
     REORDER_45 (8, 8, 0);
     ^~~~~~~~~~
../../src-trunk/gcc/sort.cc:97:14: note: ‘t1’ declared here
   size_t t0, t1, t2, t3;                        \
              ^~
../../src-trunk/gcc/sort.cc:112:5: note: in expansion of macro ‘REORDER_45’
     REORDER_45 (8, 8, 0);
     ^~~~~~~~~~
../../src-trunk/gcc/sort.cc:107:10: error: ‘void* memcpy(void*, const
void*, size_t)’ forming offset [5, 8] is out of the bounds [0, 4] of
object ‘t2’ with type ‘size_t’ {aka ‘unsigned int’}
[-Werror=array-bounds]
   memcpy (out, &t2, SIZE); out += STRIDE;       \
   ~~~~~~~^~~~~~~~~~~~~~~~
../../src-trunk/gcc/sort.cc:112:5: note: in expansion of macro ‘REORDER_45’
     REORDER_45 (8, 8, 0);
     ^~~~~~~~~~
../../src-trunk/gcc/sort.cc:97:18: note: ‘t2’ declared here
   size_t t0, t1, t2, t3;                        \
                  ^~
../../src-trunk/gcc/sort.cc:112:5: note: in expansion of macro ‘REORDER_45’
     REORDER_45 (8, 8, 0);
     ^~~~~~~~~~
../../src-trunk/gcc/sort.cc:108:10: error: ‘void* memcpy(void*, const
void*, size_t)’ forming offset [5, 8] is out of the bounds [0, 4] of
object ‘t3’ with type ‘size_t’ {aka ‘unsigned int’}
[-Werror=array-bounds]
   memcpy (out, &t3, SIZE);                      \
   ~~~~~~~^~~~~~~~~~~~~~~~
../../src-trunk/gcc/sort.cc:112:5: note: in expansion of macro ‘REORDER_45’
     REORDER_45 (8, 8, 0);
     ^~~~~~~~~~
../../src-trunk/gcc/sort.cc:97:22: note: ‘t3’ declared here
   size_t t0, t1, t2, t3;                        \
                      ^~
../../src-trunk/gcc/sort.cc:112:5: note: in expansion of macro ‘REORDER_45’
     REORDER_45 (8, 8, 0);
     ^~~~~~~~~~
Alexander Monakov May 14, 2018, 6:37 a.m. | #7
On Sun, 13 May 2018, H.J. Lu wrote:
> This breaks bootstrap on Fedora 28/i686:

> 

> https://gcc.gnu.org/ml/gcc-regression/2018-05/msg00088.html

> 

> ../../src-trunk/gcc/sort.cc:112:5: note: in expansion of macro ‘REORDER_45’

>      REORDER_45 (8, 8, 0);

>      ^~~~~~~~~~

> ../../src-trunk/gcc/sort.cc:100:10: error: ‘void* memcpy(void*, const

> void*, size_t)’ forming offset [5, 8] is out of the bounds [0, 4] of

> object ‘t2’ with type ‘size_t’ {aka ‘unsigned int’}

> [-Werror=array-bounds]

>    memcpy (&t2, e2 + OFFSET, SIZE);              \

>    ~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~


Hm, on 32-bit this is trivially dead code, I wonder why we issue the warning?

In any case, due to PR 85757 it's desirable to use types with sizes matching
the memcpy size; is the following OK to apply? Bootstrapped on 32-bit x86.

	* sort.cc (REORDER_23): Pass the type for the temporaries instead of
        intended memcpy size.
        (REORDER_45): Likewise.
---
 gcc/sort.cc | 72 ++++++++++++++++++++++++++++++-------------------------------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/gcc/sort.cc b/gcc/sort.cc
index 4faf6d45dc6..c41683c91dd 100644
--- a/gcc/sort.cc
+++ b/gcc/sort.cc
@@ -62,29 +62,29 @@ struct sort_ctx
 static void
 reorder23 (sort_ctx *c, char *e0, char *e1, char *e2)
 {
-#define REORDER_23(SIZE, STRIDE, OFFSET)        \
-do {                                            \
-  size_t t0, t1;                                \
-  memcpy (&t0, e0 + OFFSET, SIZE);              \
-  memcpy (&t1, e1 + OFFSET, SIZE);              \
-  char *out = c->out + OFFSET;                  \
-  if (likely (c->n == 3))                       \
-    memcpy (out + 2*STRIDE, e2 + OFFSET, SIZE); \
-  memcpy (out, &t0, SIZE); out += STRIDE;       \
-  memcpy (out, &t1, SIZE);                      \
+#define REORDER_23(TYPE, STRIDE, OFFSET)                 \
+do {                                                     \
+  TYPE t0, t1;                                           \
+  memcpy (&t0, e0 + OFFSET, sizeof (TYPE));              \
+  memcpy (&t1, e1 + OFFSET, sizeof (TYPE));              \
+  char *out = c->out + OFFSET;                           \
+  if (likely (c->n == 3))                                \
+    memcpy (out + 2*STRIDE, e2 + OFFSET, sizeof (TYPE)); \
+  memcpy (out, &t0, sizeof (TYPE)); out += STRIDE;       \
+  memcpy (out, &t1, sizeof (TYPE));                      \
 } while (0)
 
-  if (sizeof (size_t) == 8 && likely (c->size == 8))
-    REORDER_23 (8, 8, 0);
-  else if (likely (c->size == 4))
-    REORDER_23 (4, 4, 0);
+  if (likely (c->size == sizeof (size_t)))
+    REORDER_23 (size_t, sizeof (size_t), 0);
+  else if (likely (c->size == sizeof (int)))
+    REORDER_23 (int, sizeof (int), 0);
   else
     {
       size_t offset = 0, step = sizeof (size_t);
       for (; offset + step <= c->size; offset += step)
-	REORDER_23 (step, c->size, offset);
+	REORDER_23 (size_t, c->size, offset);
       for (; offset < c->size; offset++)
-	REORDER_23 (1, c->size, offset);
+	REORDER_23 (char, c->size, offset);
     }
 }
 
@@ -92,33 +92,33 @@ do {                                            \
 static void
 reorder45 (sort_ctx *c, char *e0, char *e1, char *e2, char *e3, char *e4)
 {
-#define REORDER_45(SIZE, STRIDE, OFFSET)        \
-do {                                            \
-  size_t t0, t1, t2, t3;                        \
-  memcpy (&t0, e0 + OFFSET, SIZE);              \
-  memcpy (&t1, e1 + OFFSET, SIZE);              \
-  memcpy (&t2, e2 + OFFSET, SIZE);              \
-  memcpy (&t3, e3 + OFFSET, SIZE);              \
-  char *out = c->out + OFFSET;                  \
-  if (likely (c->n == 5))                       \
-    memcpy (out + 4*STRIDE, e4 + OFFSET, SIZE); \
-  memcpy (out, &t0, SIZE); out += STRIDE;       \
-  memcpy (out, &t1, SIZE); out += STRIDE;       \
-  memcpy (out, &t2, SIZE); out += STRIDE;       \
-  memcpy (out, &t3, SIZE);                      \
+#define REORDER_45(TYPE, STRIDE, OFFSET)                 \
+do {                                                     \
+  TYPE t0, t1, t2, t3;                                   \
+  memcpy (&t0, e0 + OFFSET, sizeof (TYPE));              \
+  memcpy (&t1, e1 + OFFSET, sizeof (TYPE));              \
+  memcpy (&t2, e2 + OFFSET, sizeof (TYPE));              \
+  memcpy (&t3, e3 + OFFSET, sizeof (TYPE));              \
+  char *out = c->out + OFFSET;                           \
+  if (likely (c->n == 5))                                \
+    memcpy (out + 4*STRIDE, e4 + OFFSET, sizeof (TYPE)); \
+  memcpy (out, &t0, sizeof (TYPE)); out += STRIDE;       \
+  memcpy (out, &t1, sizeof (TYPE)); out += STRIDE;       \
+  memcpy (out, &t2, sizeof (TYPE)); out += STRIDE;       \
+  memcpy (out, &t3, sizeof (TYPE));                      \
 } while (0)
 
-  if (sizeof (size_t) == 8 && likely (c->size == 8))
-    REORDER_45 (8, 8, 0);
-  else if (likely(c->size == 4))
-    REORDER_45 (4, 4, 0);
+  if (likely (c->size == sizeof (size_t)))
+    REORDER_45 (size_t, sizeof (size_t), 0);
+  else if (likely(c->size == sizeof (int)))
+    REORDER_45 (int,  sizeof (int), 0);
   else
     {
       size_t offset = 0, step = sizeof (size_t);
       for (; offset + step <= c->size; offset += step)
-	REORDER_45 (step, c->size, offset);
+	REORDER_45 (size_t, c->size, offset);
       for (; offset < c->size; offset++)
-	REORDER_45 (1, c->size, offset);
+	REORDER_45 (char, c->size, offset);
     }
 }
 
-- 
2.13.3
Richard Biener May 14, 2018, 8:44 a.m. | #8
On Mon, May 14, 2018 at 8:37 AM, Alexander Monakov <amonakov@ispras.ru> wrote:
> On Sun, 13 May 2018, H.J. Lu wrote:

>> This breaks bootstrap on Fedora 28/i686:

>>

>> https://gcc.gnu.org/ml/gcc-regression/2018-05/msg00088.html

>>

>> ../../src-trunk/gcc/sort.cc:112:5: note: in expansion of macro ‘REORDER_45’

>>      REORDER_45 (8, 8, 0);

>>      ^~~~~~~~~~

>> ../../src-trunk/gcc/sort.cc:100:10: error: ‘void* memcpy(void*, const

>> void*, size_t)’ forming offset [5, 8] is out of the bounds [0, 4] of

>> object ‘t2’ with type ‘size_t’ {aka ‘unsigned int’}

>> [-Werror=array-bounds]

>>    memcpy (&t2, e2 + OFFSET, SIZE);              \

>>    ~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~

>

> Hm, on 32-bit this is trivially dead code, I wonder why we issue the warning?

>

> In any case, due to PR 85757 it's desirable to use types with sizes matching

> the memcpy size; is the following OK to apply? Bootstrapped on 32-bit x86.


OK.

Richard.

>         * sort.cc (REORDER_23): Pass the type for the temporaries instead of

>         intended memcpy size.

>         (REORDER_45): Likewise.

> ---

>  gcc/sort.cc | 72 ++++++++++++++++++++++++++++++-------------------------------

>  1 file changed, 36 insertions(+), 36 deletions(-)

>

> diff --git a/gcc/sort.cc b/gcc/sort.cc

> index 4faf6d45dc6..c41683c91dd 100644

> --- a/gcc/sort.cc

> +++ b/gcc/sort.cc

> @@ -62,29 +62,29 @@ struct sort_ctx

>  static void

>  reorder23 (sort_ctx *c, char *e0, char *e1, char *e2)

>  {

> -#define REORDER_23(SIZE, STRIDE, OFFSET)        \

> -do {                                            \

> -  size_t t0, t1;                                \

> -  memcpy (&t0, e0 + OFFSET, SIZE);              \

> -  memcpy (&t1, e1 + OFFSET, SIZE);              \

> -  char *out = c->out + OFFSET;                  \

> -  if (likely (c->n == 3))                       \

> -    memcpy (out + 2*STRIDE, e2 + OFFSET, SIZE); \

> -  memcpy (out, &t0, SIZE); out += STRIDE;       \

> -  memcpy (out, &t1, SIZE);                      \

> +#define REORDER_23(TYPE, STRIDE, OFFSET)                 \

> +do {                                                     \

> +  TYPE t0, t1;                                           \

> +  memcpy (&t0, e0 + OFFSET, sizeof (TYPE));              \

> +  memcpy (&t1, e1 + OFFSET, sizeof (TYPE));              \

> +  char *out = c->out + OFFSET;                           \

> +  if (likely (c->n == 3))                                \

> +    memcpy (out + 2*STRIDE, e2 + OFFSET, sizeof (TYPE)); \

> +  memcpy (out, &t0, sizeof (TYPE)); out += STRIDE;       \

> +  memcpy (out, &t1, sizeof (TYPE));                      \

>  } while (0)

>

> -  if (sizeof (size_t) == 8 && likely (c->size == 8))

> -    REORDER_23 (8, 8, 0);

> -  else if (likely (c->size == 4))

> -    REORDER_23 (4, 4, 0);

> +  if (likely (c->size == sizeof (size_t)))

> +    REORDER_23 (size_t, sizeof (size_t), 0);

> +  else if (likely (c->size == sizeof (int)))

> +    REORDER_23 (int, sizeof (int), 0);

>    else

>      {

>        size_t offset = 0, step = sizeof (size_t);

>        for (; offset + step <= c->size; offset += step)

> -       REORDER_23 (step, c->size, offset);

> +       REORDER_23 (size_t, c->size, offset);

>        for (; offset < c->size; offset++)

> -       REORDER_23 (1, c->size, offset);

> +       REORDER_23 (char, c->size, offset);

>      }

>  }

>

> @@ -92,33 +92,33 @@ do {                                            \

>  static void

>  reorder45 (sort_ctx *c, char *e0, char *e1, char *e2, char *e3, char *e4)

>  {

> -#define REORDER_45(SIZE, STRIDE, OFFSET)        \

> -do {                                            \

> -  size_t t0, t1, t2, t3;                        \

> -  memcpy (&t0, e0 + OFFSET, SIZE);              \

> -  memcpy (&t1, e1 + OFFSET, SIZE);              \

> -  memcpy (&t2, e2 + OFFSET, SIZE);              \

> -  memcpy (&t3, e3 + OFFSET, SIZE);              \

> -  char *out = c->out + OFFSET;                  \

> -  if (likely (c->n == 5))                       \

> -    memcpy (out + 4*STRIDE, e4 + OFFSET, SIZE); \

> -  memcpy (out, &t0, SIZE); out += STRIDE;       \

> -  memcpy (out, &t1, SIZE); out += STRIDE;       \

> -  memcpy (out, &t2, SIZE); out += STRIDE;       \

> -  memcpy (out, &t3, SIZE);                      \

> +#define REORDER_45(TYPE, STRIDE, OFFSET)                 \

> +do {                                                     \

> +  TYPE t0, t1, t2, t3;                                   \

> +  memcpy (&t0, e0 + OFFSET, sizeof (TYPE));              \

> +  memcpy (&t1, e1 + OFFSET, sizeof (TYPE));              \

> +  memcpy (&t2, e2 + OFFSET, sizeof (TYPE));              \

> +  memcpy (&t3, e3 + OFFSET, sizeof (TYPE));              \

> +  char *out = c->out + OFFSET;                           \

> +  if (likely (c->n == 5))                                \

> +    memcpy (out + 4*STRIDE, e4 + OFFSET, sizeof (TYPE)); \

> +  memcpy (out, &t0, sizeof (TYPE)); out += STRIDE;       \

> +  memcpy (out, &t1, sizeof (TYPE)); out += STRIDE;       \

> +  memcpy (out, &t2, sizeof (TYPE)); out += STRIDE;       \

> +  memcpy (out, &t3, sizeof (TYPE));                      \

>  } while (0)

>

> -  if (sizeof (size_t) == 8 && likely (c->size == 8))

> -    REORDER_45 (8, 8, 0);

> -  else if (likely(c->size == 4))

> -    REORDER_45 (4, 4, 0);

> +  if (likely (c->size == sizeof (size_t)))

> +    REORDER_45 (size_t, sizeof (size_t), 0);

> +  else if (likely(c->size == sizeof (int)))

> +    REORDER_45 (int,  sizeof (int), 0);

>    else

>      {

>        size_t offset = 0, step = sizeof (size_t);

>        for (; offset + step <= c->size; offset += step)

> -       REORDER_45 (step, c->size, offset);

> +       REORDER_45 (size_t, c->size, offset);

>        for (; offset < c->size; offset++)

> -       REORDER_45 (1, c->size, offset);

> +       REORDER_45 (char, c->size, offset);

>      }

>  }

>

> --

> 2.13.3

Patch

diff --git a/gcc/sort.cc b/gcc/sort.cc
new file mode 100644
index 00000000000..4faf6d45dc6
--- /dev/null
+++ b/gcc/sort.cc
@@ -0,0 +1,232 @@ 
+/* Platform-independent deterministic sort function.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   Contributed by Alexander Monakov.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* This implements a sort function suitable for GCC use cases:
+   - signature-compatible to C qsort, but relaxed contract:
+     - may apply the comparator to elements in a temporary buffer
+     - may abort on allocation failure
+   - deterministic (but not necessarily stable)
+   - fast, especially for common cases (0-5 elements of size 8 or 4)
+
+   The implementation uses a network sort for up to 5 elements and
+   a merge sort on top of that.  Neither stage has branches depending on
+   comparator result, trading extra arithmetic for branch mispredictions.  */
+
+#ifdef GENERATOR_FILE
+#include "bconfig.h"
+#else
+#include "config.h"
+#endif
+
+#include "system.h"
+
+#define likely(cond) __builtin_expect ((cond), 1)
+
+#ifdef __GNUC__
+#define noinline __attribute__ ((__noinline__))
+#else
+#define noinline
+#endif
+
+/* C-style qsort comparator function type.  */
+typedef int cmp_fn (const void *, const void *);
+
+/* Structure holding read-mostly (read-only in netsort) context. */
+struct sort_ctx
+{
+  cmp_fn *cmp; // pointer to comparator
+  char   *out; // output buffer
+  size_t n;    // number of elements
+  size_t size; // element size
+};
+
+/* Helper for netsort. Permute, possibly in-place, 2 or 3 elements,
+   placing E0 to C->OUT, E1 to C->OUT + C->SIZE, and so on. */
+static void
+reorder23 (sort_ctx *c, char *e0, char *e1, char *e2)
+{
+#define REORDER_23(SIZE, STRIDE, OFFSET)        \
+do {                                            \
+  size_t t0, t1;                                \
+  memcpy (&t0, e0 + OFFSET, SIZE);              \
+  memcpy (&t1, e1 + OFFSET, SIZE);              \
+  char *out = c->out + OFFSET;                  \
+  if (likely (c->n == 3))                       \
+    memcpy (out + 2*STRIDE, e2 + OFFSET, SIZE); \
+  memcpy (out, &t0, SIZE); out += STRIDE;       \
+  memcpy (out, &t1, SIZE);                      \
+} while (0)
+
+  if (sizeof (size_t) == 8 && likely (c->size == 8))
+    REORDER_23 (8, 8, 0);
+  else if (likely (c->size == 4))
+    REORDER_23 (4, 4, 0);
+  else
+    {
+      size_t offset = 0, step = sizeof (size_t);
+      for (; offset + step <= c->size; offset += step)
+	REORDER_23 (step, c->size, offset);
+      for (; offset < c->size; offset++)
+	REORDER_23 (1, c->size, offset);
+    }
+}
+
+/* Like reorder23, but permute 4 or 5 elements. */
+static void
+reorder45 (sort_ctx *c, char *e0, char *e1, char *e2, char *e3, char *e4)
+{
+#define REORDER_45(SIZE, STRIDE, OFFSET)        \
+do {                                            \
+  size_t t0, t1, t2, t3;                        \
+  memcpy (&t0, e0 + OFFSET, SIZE);              \
+  memcpy (&t1, e1 + OFFSET, SIZE);              \
+  memcpy (&t2, e2 + OFFSET, SIZE);              \
+  memcpy (&t3, e3 + OFFSET, SIZE);              \
+  char *out = c->out + OFFSET;                  \
+  if (likely (c->n == 5))                       \
+    memcpy (out + 4*STRIDE, e4 + OFFSET, SIZE); \
+  memcpy (out, &t0, SIZE); out += STRIDE;       \
+  memcpy (out, &t1, SIZE); out += STRIDE;       \
+  memcpy (out, &t2, SIZE); out += STRIDE;       \
+  memcpy (out, &t3, SIZE);                      \
+} while (0)
+
+  if (sizeof (size_t) == 8 && likely (c->size == 8))
+    REORDER_45 (8, 8, 0);
+  else if (likely(c->size == 4))
+    REORDER_45 (4, 4, 0);
+  else
+    {
+      size_t offset = 0, step = sizeof (size_t);
+      for (; offset + step <= c->size; offset += step)
+	REORDER_45 (step, c->size, offset);
+      for (; offset < c->size; offset++)
+	REORDER_45 (1, c->size, offset);
+    }
+}
+
+/* Helper for netsort. Invoke comparator CMP on E0 and E1.
+   Return E0^E1 if E0 compares less than E1, zero otherwise.
+   This is noinline to avoid code growth and confine invocation
+   to a single call site, assisting indirect branch prediction. */
+noinline static intptr_t
+cmp1 (char *e0, char *e1, cmp_fn *cmp)
+{
+  intptr_t x = (intptr_t)e0 ^ (intptr_t)e1;
+  return x & (cmp (e0, e1) >> 31);
+}
+
+/* Execute network sort on 2 to 5 elements from IN, placing them into C->OUT.
+   IN may be equal to C->OUT, in which case elements are sorted in place.  */
+static void
+netsort (char *in, sort_ctx *c)
+{
+#define CMP(e0, e1)                   \
+do {                                  \
+  intptr_t x = cmp1 (e1, e0, c->cmp); \
+  e0 = (char *)((intptr_t)e0 ^ x);    \
+  e1 = (char *)((intptr_t)e1 ^ x);    \
+} while (0)
+
+  char *e0 = in, *e1 = e0 + c->size, *e2 = e1 + c->size;
+  CMP (e0, e1);
+  if (likely (c->n == 3))
+    {
+      CMP (e1, e2);
+      CMP (e0, e1);
+    }
+  if (c->n <= 3)
+    return reorder23 (c, e0, e1, e2);
+  char *e3 = e2 + c->size, *e4 = e3 + c->size;
+  if (likely (c->n == 5))
+    {
+      CMP (e3, e4);
+      CMP (e2, e4);
+    }
+  CMP (e2, e3);
+  if (likely (c->n == 5))
+    {
+      CMP (e0, e3);
+      CMP (e1, e4);
+    }
+  CMP (e0, e2);
+  CMP (e1, e3);
+  CMP (e1, e2);
+  reorder45 (c, e0, e1, e2, e3, e4);
+}
+
+/* Execute merge sort on N elements from IN, placing them into OUT,
+   using TMP as temporary storage if IN is equal to OUT.
+   This is a stable sort if netsort is used only for 2 or 3 elements. */
+static void
+mergesort (char *in, sort_ctx *c, size_t n, char *out, char *tmp)
+{
+  if (likely (n <= 5))
+    {
+      c->out = out;
+      c->n = n;
+      return netsort (in, c);
+    }
+  size_t nl = n / 2, nr = n - nl, sz = nl * c->size;
+  char *mid = in + sz, *r = out + sz, *l = in == out ? tmp : in;
+  /* Sort the right half, outputting to right half of OUT. */
+  mergesort (mid, c, nr, r, tmp);
+  /* Sort the left half, leaving left half of OUT free.  */
+  mergesort (in, c, nl, l, mid);
+  /* Merge sorted halves given by L, R to [OUT, END). */
+#define MERGE_ELTSIZE(SIZE)                     \
+do {                                            \
+  intptr_t mr = c->cmp (r, l) >> 31;            \
+  intptr_t lr = (intptr_t)l ^ (intptr_t)r;      \
+  lr = (intptr_t)l ^ (lr & mr);                 \
+  out = (char *)memcpy (out, (char *)lr, SIZE); \
+  out += SIZE;                                  \
+  r += mr & SIZE;                               \
+  if (r == out) return;                         \
+  l += ~mr & SIZE;                              \
+} while (r != end)
+
+  if (likely (c->cmp(r, l + (r - out) - c->size) < 0))
+    {
+      char *end = out + n * c->size;
+      if (sizeof (size_t) == 8 && likely (c->size == 8))
+	MERGE_ELTSIZE (8);
+      else if (likely (c->size == 4))
+	MERGE_ELTSIZE (4);
+      else
+	MERGE_ELTSIZE (c->size);
+    }
+  memcpy (out, l, r - out);
+}
+
+void
+gcc_qsort (void *vbase, size_t n, size_t size, cmp_fn *cmp)
+{
+  if (n < 2)
+    return;
+  char *base = (char *)vbase;
+  sort_ctx c = {cmp, base, n, size};
+  long long scratch[32];
+  size_t bufsz = (n / 2) * size;
+  void *buf = bufsz <= sizeof scratch ? scratch : xmalloc (bufsz);
+  mergesort (base, &c, n, base, (char *)buf);
+  if (buf != scratch)
+    free (buf);
+}
diff --git a/gcc/system.h b/gcc/system.h
index 4abc321c71d..88dffccb8ab 100644
--- a/gcc/system.h
+++ b/gcc/system.h
@@ -1202,11 +1202,14 @@  helper_const_non_const_cast (const char *p)
 /* qsort comparator consistency checking: except in release-checking compilers,
    redirect 4-argument qsort calls to qsort_chk; keep 1-argument invocations
    corresponding to vec::qsort (cmp): they use C qsort internally anyway.  */
-#if CHECKING_P
+void qsort_chk (void *, size_t, size_t, int (*)(const void *, const void *));
+void gcc_qsort (void *, size_t, size_t, int (*)(const void *, const void *));
 #define PP_5th(a1, a2, a3, a4, a5, ...) a5
 #undef qsort
+#if CHECKING_P
 #define qsort(...) PP_5th (__VA_ARGS__, qsort_chk, 3, 2, qsort, 0) (__VA_ARGS__)
-void qsort_chk (void *, size_t, size_t, int (*)(const void *, const void *));
+#else
+#define qsort(...) PP_5th (__VA_ARGS__, gcc_qsort, 3, 2, qsort, 0) (__VA_ARGS__)
 #endif
 
 #endif /* ! GCC_SYSTEM_H */
diff --git a/gcc/vec.c b/gcc/vec.c
index 11924a80a2d..2941715a34a 100644
--- a/gcc/vec.c
+++ b/gcc/vec.c
@@ -215,7 +215,7 @@  void
 qsort_chk (void *base, size_t n, size_t size,
 	   int (*cmp)(const void *, const void *))
 {
-  (qsort) (base, n, size, cmp);
+  gcc_qsort (base, n, size, cmp);
 #if 0
 #define LIM(n) (n)
 #else