[v3,4/7] stdlib: Move insertion sort out qsort

Message ID 20210903171144.952737-5-adhemerval.zanella@linaro.org
State New
Headers show
Series
  • Use introsort for qsort
Related show

Commit Message

Fangrui Song via Libc-alpha Sept. 3, 2021, 5:11 p.m.
---
 stdlib/qsort.c | 100 ++++++++++++++++++++++++++-----------------------
 1 file changed, 53 insertions(+), 47 deletions(-)

-- 
2.30.2

Comments

Fangrui Song via Libc-alpha Sept. 6, 2021, 8:35 p.m. | #1
On 2021-09-03, Adhemerval Zanella via Libc-alpha wrote:
>---

> stdlib/qsort.c | 100 ++++++++++++++++++++++++++-----------------------

> 1 file changed, 53 insertions(+), 47 deletions(-)

>

>diff --git a/stdlib/qsort.c b/stdlib/qsort.c

>index 59458d151b..b69417dedd 100644

>--- a/stdlib/qsort.c

>+++ b/stdlib/qsort.c

>@@ -150,6 +150,58 @@ typedef struct

>       smaller partition.  This *guarantees* no more than log (total_elems)

>       stack size is needed (actually O(1) in this case)!  */

>

>+static void

>+insertion_sort (void *const pbase, size_t total_elems, size_t size,

>+                swap_func_t swap_func,

>+	        __compar_d_fn_t cmp, void *arg)

>+{

>+  char *base_ptr = (char *) pbase;

>+  char *const end_ptr = &base_ptr[size * (total_elems - 1)];

>+  char *tmp_ptr = base_ptr;

>+#define min(x, y) ((x) < (y) ? (x) : (y))

>+  const size_t max_thresh = MAX_THRESH * size;


But I think MAX_THRESH being 4 is unfortunate.
All modern architectures want a value larger than 4 :)

Reviewed-by: Fangrui Song <maskray@google.com>


>+  char *thresh = min(end_ptr, base_ptr + max_thresh);

>+  char *run_ptr;

>+

>+  /* Find smallest element in first threshold and place it at the

>+     array's beginning.  This is the smallest array element,

>+     and the operation speeds up insertion sort's inner loop. */

>+

>+  for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)

>+    if (cmp (run_ptr, tmp_ptr, arg) < 0)

>+      tmp_ptr = run_ptr;

>+

>+  if (tmp_ptr != base_ptr)

>+    do_swap (tmp_ptr, base_ptr, size, swap_func);

>+

>+  /* Insertion sort, running from left-hand-side up to right-hand-side.  */

>+

>+  run_ptr = base_ptr + size;

>+  while ((run_ptr += size) <= end_ptr)

>+    {

>+      tmp_ptr = run_ptr - size;

>+      while (cmp (run_ptr, tmp_ptr, arg) < 0)

>+        tmp_ptr -= size;

>+

>+      tmp_ptr += size;

>+      if (tmp_ptr != run_ptr)

>+        {

>+          char *trav;

>+

>+          trav = run_ptr + size;

>+          while (--trav >= run_ptr)

>+            {

>+              char c = *trav;

>+              char *hi, *lo;

>+

>+              for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)

>+                *hi = *lo;

>+              *hi = c;

>+            }


The bytewise move is a bit unfortunate and may slow down the insertion sort
quite a bit... But without allocation or code duplication I don't know a
better approach...

>+        }

>+    }

>+}

>+

> void

> _quicksort (void *const pbase, size_t total_elems, size_t size,

> 	    __compar_d_fn_t cmp, void *arg)

>@@ -272,51 +324,5 @@ _quicksort (void *const pbase, size_t total_elems, size_t size,

>      for partitions below MAX_THRESH size. BASE_PTR points to the beginning

>      of the array to sort, and END_PTR points at the very last element in

>      the array (*not* one beyond it!). */

>-

>-#define min(x, y) ((x) < (y) ? (x) : (y))

>-

>-  {

>-    char *const end_ptr = &base_ptr[size * (total_elems - 1)];

>-    char *tmp_ptr = base_ptr;

>-    char *thresh = min(end_ptr, base_ptr + max_thresh);

>-    char *run_ptr;

>-

>-    /* Find smallest element in first threshold and place it at the

>-       array's beginning.  This is the smallest array element,

>-       and the operation speeds up insertion sort's inner loop. */

>-

>-    for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)

>-      if ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, arg) < 0)

>-        tmp_ptr = run_ptr;

>-

>-    if (tmp_ptr != base_ptr)

>-      do_swap (tmp_ptr, base_ptr, size, swap_func);

>-

>-    /* Insertion sort, running from left-hand-side up to right-hand-side.  */

>-

>-    run_ptr = base_ptr + size;

>-    while ((run_ptr += size) <= end_ptr)

>-      {

>-	tmp_ptr = run_ptr - size;

>-	while ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, arg) < 0)

>-	  tmp_ptr -= size;

>-

>-	tmp_ptr += size;

>-        if (tmp_ptr != run_ptr)

>-          {

>-            char *trav;

>-

>-	    trav = run_ptr + size;

>-	    while (--trav >= run_ptr)

>-              {

>-                char c = *trav;

>-                char *hi, *lo;

>-

>-                for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)

>-                  *hi = *lo;

>-                *hi = c;

>-              }

>-          }

>-      }

>-  }

>+  insertion_sort (pbase, total_elems, size, swap_func, cmp, arg);

> }

>-- 

>2.30.2

>
Fangrui Song via Libc-alpha Sept. 6, 2021, 8:48 p.m. | #2
On 2021-09-06, Fangrui Song wrote:
>On 2021-09-03, Adhemerval Zanella via Libc-alpha wrote:

>>---

>>stdlib/qsort.c | 100 ++++++++++++++++++++++++++-----------------------

>>1 file changed, 53 insertions(+), 47 deletions(-)

>>

>>diff --git a/stdlib/qsort.c b/stdlib/qsort.c

>>index 59458d151b..b69417dedd 100644

>>--- a/stdlib/qsort.c

>>+++ b/stdlib/qsort.c

>>@@ -150,6 +150,58 @@ typedef struct

>>      smaller partition.  This *guarantees* no more than log (total_elems)

>>      stack size is needed (actually O(1) in this case)!  */

>>

>>+static void

>>+insertion_sort (void *const pbase, size_t total_elems, size_t size,

>>+                swap_func_t swap_func,

>>+	        __compar_d_fn_t cmp, void *arg)

>>+{

>>+  char *base_ptr = (char *) pbase;

>>+  char *const end_ptr = &base_ptr[size * (total_elems - 1)];

>>+  char *tmp_ptr = base_ptr;

>>+#define min(x, y) ((x) < (y) ? (x) : (y))

>>+  const size_t max_thresh = MAX_THRESH * size;

>

>But I think MAX_THRESH being 4 is unfortunate.

>All modern architectures want a value larger than 4 :)

>

>Reviewed-by: Fangrui Song <maskray@google.com>

>

>>+  char *thresh = min(end_ptr, base_ptr + max_thresh);

>>+  char *run_ptr;

>>+

>>+  /* Find smallest element in first threshold and place it at the

>>+     array's beginning.  This is the smallest array element,

>>+     and the operation speeds up insertion sort's inner loop. */

>>+

>>+  for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)

>>+    if (cmp (run_ptr, tmp_ptr, arg) < 0)

>>+      tmp_ptr = run_ptr;

>>+

>>+  if (tmp_ptr != base_ptr)

>>+    do_swap (tmp_ptr, base_ptr, size, swap_func);

>>+

>>+  /* Insertion sort, running from left-hand-side up to right-hand-side.  */

>>+

>>+  run_ptr = base_ptr + size;

>>+  while ((run_ptr += size) <= end_ptr)

>>+    {

>>+      tmp_ptr = run_ptr - size;

>>+      while (cmp (run_ptr, tmp_ptr, arg) < 0)

>>+        tmp_ptr -= size;

>>+

>>+      tmp_ptr += size;

>>+      if (tmp_ptr != run_ptr)

>>+        {

>>+          char *trav;

>>+

>>+          trav = run_ptr + size;

>>+          while (--trav >= run_ptr)

>>+            {

>>+              char c = *trav;

>>+              char *hi, *lo;

>>+

>>+              for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)

>>+                *hi = *lo;

>>+              *hi = c;

>>+            }

>

>The bytewise move is a bit unfortunate and may slow down the insertion sort

>quite a bit... But without allocation or code duplication I don't know a

>better approach...


If we want to optimize insertion sort for the common case,
perhaps also optimize the cases when the element size is <= SWAP_GENERIC_SIZE.

Use an   unsigned char tmp[SWAP_GENERIC_SIZE];
as you do in another patch.

There will be a bit code bloat, though...

>

>>+        }

>>+    }

>>+}

>>+

>>void

>>_quicksort (void *const pbase, size_t total_elems, size_t size,

>>	    __compar_d_fn_t cmp, void *arg)

>>@@ -272,51 +324,5 @@ _quicksort (void *const pbase, size_t total_elems, size_t size,

>>     for partitions below MAX_THRESH size. BASE_PTR points to the beginning

>>     of the array to sort, and END_PTR points at the very last element in

>>     the array (*not* one beyond it!). */

>>-

>>-#define min(x, y) ((x) < (y) ? (x) : (y))

>>-

>>-  {

>>-    char *const end_ptr = &base_ptr[size * (total_elems - 1)];

>>-    char *tmp_ptr = base_ptr;

>>-    char *thresh = min(end_ptr, base_ptr + max_thresh);

>>-    char *run_ptr;

>>-

>>-    /* Find smallest element in first threshold and place it at the

>>-       array's beginning.  This is the smallest array element,

>>-       and the operation speeds up insertion sort's inner loop. */

>>-

>>-    for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)

>>-      if ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, arg) < 0)

>>-        tmp_ptr = run_ptr;

>>-

>>-    if (tmp_ptr != base_ptr)

>>-      do_swap (tmp_ptr, base_ptr, size, swap_func);

>>-

>>-    /* Insertion sort, running from left-hand-side up to right-hand-side.  */

>>-

>>-    run_ptr = base_ptr + size;

>>-    while ((run_ptr += size) <= end_ptr)

>>-      {

>>-	tmp_ptr = run_ptr - size;

>>-	while ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, arg) < 0)

>>-	  tmp_ptr -= size;

>>-

>>-	tmp_ptr += size;

>>-        if (tmp_ptr != run_ptr)

>>-          {

>>-            char *trav;

>>-

>>-	    trav = run_ptr + size;

>>-	    while (--trav >= run_ptr)

>>-              {

>>-                char c = *trav;

>>-                char *hi, *lo;

>>-

>>-                for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)

>>-                  *hi = *lo;

>>-                *hi = c;

>>-              }

>>-          }

>>-      }

>>-  }

>>+  insertion_sort (pbase, total_elems, size, swap_func, cmp, arg);

>>}

>>-- 

>>2.30.2

>>

Patch

diff --git a/stdlib/qsort.c b/stdlib/qsort.c
index 59458d151b..b69417dedd 100644
--- a/stdlib/qsort.c
+++ b/stdlib/qsort.c
@@ -150,6 +150,58 @@  typedef struct
       smaller partition.  This *guarantees* no more than log (total_elems)
       stack size is needed (actually O(1) in this case)!  */
 
+static void
+insertion_sort (void *const pbase, size_t total_elems, size_t size,
+                swap_func_t swap_func,
+	        __compar_d_fn_t cmp, void *arg)
+{
+  char *base_ptr = (char *) pbase;
+  char *const end_ptr = &base_ptr[size * (total_elems - 1)];
+  char *tmp_ptr = base_ptr;
+#define min(x, y) ((x) < (y) ? (x) : (y))
+  const size_t max_thresh = MAX_THRESH * size;
+  char *thresh = min(end_ptr, base_ptr + max_thresh);
+  char *run_ptr;
+
+  /* Find smallest element in first threshold and place it at the
+     array's beginning.  This is the smallest array element,
+     and the operation speeds up insertion sort's inner loop. */
+
+  for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
+    if (cmp (run_ptr, tmp_ptr, arg) < 0)
+      tmp_ptr = run_ptr;
+
+  if (tmp_ptr != base_ptr)
+    do_swap (tmp_ptr, base_ptr, size, swap_func);
+
+  /* Insertion sort, running from left-hand-side up to right-hand-side.  */
+
+  run_ptr = base_ptr + size;
+  while ((run_ptr += size) <= end_ptr)
+    {
+      tmp_ptr = run_ptr - size;
+      while (cmp (run_ptr, tmp_ptr, arg) < 0)
+        tmp_ptr -= size;
+
+      tmp_ptr += size;
+      if (tmp_ptr != run_ptr)
+        {
+          char *trav;
+
+          trav = run_ptr + size;
+          while (--trav >= run_ptr)
+            {
+              char c = *trav;
+              char *hi, *lo;
+
+              for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
+                *hi = *lo;
+              *hi = c;
+            }
+        }
+    }
+}
+
 void
 _quicksort (void *const pbase, size_t total_elems, size_t size,
 	    __compar_d_fn_t cmp, void *arg)
@@ -272,51 +324,5 @@  _quicksort (void *const pbase, size_t total_elems, size_t size,
      for partitions below MAX_THRESH size. BASE_PTR points to the beginning
      of the array to sort, and END_PTR points at the very last element in
      the array (*not* one beyond it!). */
-
-#define min(x, y) ((x) < (y) ? (x) : (y))
-
-  {
-    char *const end_ptr = &base_ptr[size * (total_elems - 1)];
-    char *tmp_ptr = base_ptr;
-    char *thresh = min(end_ptr, base_ptr + max_thresh);
-    char *run_ptr;
-
-    /* Find smallest element in first threshold and place it at the
-       array's beginning.  This is the smallest array element,
-       and the operation speeds up insertion sort's inner loop. */
-
-    for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
-      if ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, arg) < 0)
-        tmp_ptr = run_ptr;
-
-    if (tmp_ptr != base_ptr)
-      do_swap (tmp_ptr, base_ptr, size, swap_func);
-
-    /* Insertion sort, running from left-hand-side up to right-hand-side.  */
-
-    run_ptr = base_ptr + size;
-    while ((run_ptr += size) <= end_ptr)
-      {
-	tmp_ptr = run_ptr - size;
-	while ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, arg) < 0)
-	  tmp_ptr -= size;
-
-	tmp_ptr += size;
-        if (tmp_ptr != run_ptr)
-          {
-            char *trav;
-
-	    trav = run_ptr + size;
-	    while (--trav >= run_ptr)
-              {
-                char c = *trav;
-                char *hi, *lo;
-
-                for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
-                  *hi = *lo;
-                *hi = c;
-              }
-          }
-      }
-  }
+  insertion_sort (pbase, total_elems, size, swap_func, cmp, arg);
 }