libc/stdlib: Rework nano-mallocr

Message ID 20200827013200.2651163-1-keithp@keithp.com
State New
Headers show
Series
  • libc/stdlib: Rework nano-mallocr
Related show

Commit Message

Corinna Vinschen via Newlib Aug. 27, 2020, 1:32 a.m.
This rework of the smaller malloc implementation offers:

 * Simpler header design. Gets rid of 'padding' by keeping chunks
   naturally aligned.

 * Simpler list walking code. Uses a different linked list walking
   pattern that allows for shorter code with fewer special cases.

 * Extend the last chunk in the heap when possible. For malloc and
   realloc, attempt to sbrk more memory onto the last chunk.

 * In-place realloc. Instead of always allocating a new chunk,
   shrink reallocations in place, returning large-enough chunks
   to the free list. When growing, attempt to merge adjacent free
   chunks or extend the heap.

 * Return cleared memory. Instead of leaving whatever data was in
   memory to the application, malloc and realloc always erase new
   storage to zero. This mitigates the impact of application bugs by
   reducing accidental information disclosure and by making
   application operation consistent when initializations are missing.

 * Use compiler to compute correct alignment constraints.

 * Remove 32-bit limitations.

Memory usage with these changes will be the same or less than the
previous version. Peak memory usage may be lower due to the modified
behavior of malloc/realloc.

Undefining 'MALLOC_OPTIMIZE_HEAP' removes the improvements from malloc
and realloc, leaving the redesign heap structure and list walking
code.

Here's some text size comparisons, this is build on a 32-bit ARM v5
without thumb (the default arm-none-eabi target):

	File                   old    new   no-heap
	----                   ---    ---   ------
	nano-callocr.o          68     56     56
	nano-cfreer.o           16      0      0
	nano-freer.o           288    224    224
	nano-malignr.o         260    236    236
	nano-mallinfor.o       196    172    172
	nano-mallocr.o         348    612    440
	nano-malloptr.o          8      8      0
	nano-mallstatsr.o      201    246    246
	nano-msizer.o           24     12     12
	nano-pvallocr.o         32     32     32
	nano-reallocr.o        156    400    144
	nano-vallocr.o          24     24     24

	Total                 1621   2022   1594

old:      existing newlib code
new:	  new code with heap-improvements included
no-heap:  new code with heap-improvements excluded

Signed-off-by: Keith Packard <keithp@keithp.com>


fixup
---
 newlib/libc/stdlib/nano-mallocr.c | 874 +++++++++++++++++-------------
 1 file changed, 510 insertions(+), 364 deletions(-)

-- 
2.28.0

Patch

diff --git a/newlib/libc/stdlib/nano-mallocr.c b/newlib/libc/stdlib/nano-mallocr.c
index 6ba0eb74b..06bf2b564 100644
--- a/newlib/libc/stdlib/nano-mallocr.c
+++ b/newlib/libc/stdlib/nano-mallocr.c
@@ -34,24 +34,13 @@ 
 
 #include <stdio.h>
 #include <string.h>
+#include <stdbool.h>
 #include <errno.h>
 #include <malloc.h>
-
-#if DEBUG
-#include <assert.h>
-#else
-#define assert(x) ((void)0)
-#endif
-
-#ifndef MAX
-#define MAX(a,b) ((a) >= (b) ? (a) : (b))
-#endif
-
-#define _SBRK_R(X) _sbrk_r(X)
-
-#ifdef INTERNAL_NEWLIB
-
+#include <unistd.h>
 #include <sys/config.h>
+#include <sys/cdefs.h>
+
 #include <reent.h>
 
 #define RARG struct _reent *reent_ptr,
@@ -59,238 +48,340 @@ 
 #define RCALL reent_ptr,
 #define RONECALL reent_ptr
 
+#define MALLOC_OPTIMIZE_HEAP
+
+#if MALLOC_DEBUG
+#include <assert.h>
+#define MALLOC_LOCK do { __malloc_lock(reent_ptr); __malloc_validate(); } while(0)
+#define MALLOC_UNLOCK do { __malloc_validate(reent_ptr); __malloc_unlock(); } while(0)
+void __malloc_validate(void);
+#else
 #define MALLOC_LOCK __malloc_lock(reent_ptr)
 #define MALLOC_UNLOCK __malloc_unlock(reent_ptr)
+#define assert(x) ((void)0)
+#endif
 
 #define RERRNO reent_ptr->_errno
 
-#define nano_malloc		_malloc_r
-#define nano_free		_free_r
-#define nano_realloc		_realloc_r
-#define nano_memalign		_memalign_r
-#define nano_valloc		_valloc_r
-#define nano_pvalloc		_pvalloc_r
-#define nano_calloc		_calloc_r
-#define nano_cfree		_cfree_r
-#define nano_malloc_usable_size _malloc_usable_size_r
-#define nano_malloc_stats	_malloc_stats_r
-#define nano_mallinfo		_mallinfo_r
-#define nano_mallopt		_mallopt_r
-
-#else /* ! INTERNAL_NEWLIB */
-
-#define RARG
-#define RONEARG
-#define RCALL
-#define RONECALL
-#define MALLOC_LOCK
-#define MALLOC_UNLOCK
-#define RERRNO errno
-
-#define nano_malloc		malloc
-#define nano_free		free
-#define nano_realloc		realloc
-#define nano_memalign		memalign
-#define nano_valloc		valloc
-#define nano_pvalloc		pvalloc
-#define nano_calloc		calloc
-#define nano_cfree		cfree
-#define nano_malloc_usable_size malloc_usable_size
-#define nano_malloc_stats	malloc_stats
-#define nano_mallinfo		mallinfo
-#define nano_mallopt		mallopt
-#endif /* ! INTERNAL_NEWLIB */
-
-/* Redefine names to avoid conflict with user names */
-#define free_list __malloc_free_list
-#define sbrk_start __malloc_sbrk_start
-#define current_mallinfo __malloc_current_mallinfo
+#define malloc			_malloc_r
+#define free			_free_r
+#define realloc			_realloc_r
+#define memalign		_memalign_r
+#define valloc			_valloc_r
+#define pvalloc			_pvalloc_r
+#define calloc			_calloc_r
+#define cfree			_cfree_r
+#define malloc_usable_size	_malloc_usable_size_r
+#define malloc_stats		_malloc_stats_r
+#define mallinfo(...)		_mallinfo_r(__VA_ARGS__)
+#define mallopt			_mallopt_r
+#define fprintf			_fprintf_r
+#undef stderr
+#define stderr			_stderr_r(RONECALL)
+#define sbrk(x)			_sbrk_r(RCALL x)
+
+#ifdef __strong_reference
+#define make_reference(a,b) 	__strong_reference(a,b)
+#endif
+
+#ifndef MAX
+#define MAX(a,b) ((a) >= (b) ? (a) : (b))
+#endif
 
 #define ALIGN_TO(size, align) \
     (((size) + (align) -1L) & ~((align) -1L))
 
-/* Alignment of allocated block */
-#define MALLOC_ALIGN (8U)
-#define CHUNK_ALIGN (sizeof(void*))
-#define MALLOC_PADDING ((MAX(MALLOC_ALIGN, CHUNK_ALIGN)) - CHUNK_ALIGN)
+#define ALIGN_PTR(ptr, align)	(void *) ALIGN_TO((uintptr_t) ptr, align)
 
-/* as well as the minimal allocation size
- * to hold a free pointer */
-#define MALLOC_MINSIZE (sizeof(void *))
-#define MALLOC_PAGE_ALIGN (0x1000)
-#define MAX_ALLOC_SIZE (0x80000000U)
+typedef struct {
+    char c;
+    union {
+	void *p;
+	double d;
+	long long ll;
+	size_t s;
+    } u;
+} align_chunk_t;
 
-typedef size_t malloc_size_t;
+typedef struct {
+    char c;
+    size_t s;
+} align_head_t;
 
 typedef struct malloc_chunk
 {
     /*          --------------------------------------
      *   chunk->| size                               |
-     *          --------------------------------------
-     *          | Padding for alignment              |
-     *          | This includes padding inserted by  |
-     *          | the compiler (to align fields) and |
-     *          | explicit padding inserted by this  |
-     *          | implementation. If any explicit    |
-     *          | padding is being used then the     |
-     *          | sizeof (size) bytes at             |
-     *          | mem_ptr - CHUNK_OFFSET must be     |
-     *          | initialized with the negative      |
-     *          | offset to size.                    |
-     *          --------------------------------------
      * mem_ptr->| When allocated: data               |
      *          | When freed: pointer to next free   |
      *          | chunk                              |
      *          --------------------------------------
+     *
+     * mem_ptr is aligned to MALLOC_CHUNK_ALIGN. That means that
+     * the chunk may not be aligned to MALLOC_CHUNK_ALIGN. But
+     * it will be aligned to MALLOC_HEAD_ALIGN.
+     *
+     * size is set so that a chunk starting at chunk+size will be
+     * aligned correctly
      */
-    /* size of the allocated payload area, including size before
-       CHUNK_OFFSET */
-    long size;
 
-    /* since here, the memory is either the next free block, or data load */
+    /* size of the allocated payload area */
+    size_t size;
+
+    /* pointer to next chunk */
     struct malloc_chunk * next;
-}chunk;
+} chunk_t;
 
+/* Alignment of allocated chunk. Compute the alignment required from a
+ * range of types */
+#define MALLOC_CHUNK_ALIGN	(offsetof(align_chunk_t, u))
 
-#define CHUNK_OFFSET ((malloc_size_t)(&(((struct malloc_chunk *)0)->next)))
+/* Alignment of the header. Never larger than MALLOC_CHUNK_ALIGN */
+#define MALLOC_HEAD_ALIGN	(offsetof(align_head_t, s))
 
-/* size of smallest possible chunk. A memory piece smaller than this size
- * won't be able to create a chunk */
-#define MALLOC_MINCHUNK (CHUNK_OFFSET + MALLOC_PADDING + MALLOC_MINSIZE)
+/* Size of malloc header. Keep it aligned. */
+#define MALLOC_HEAD 		ALIGN_TO(sizeof(size_t), MALLOC_HEAD_ALIGN)
+
+/* nominal "page size" */
+#define MALLOC_PAGE_ALIGN 	(0x1000)
+
+/* Minimum allocation size */
+#define MALLOC_MINSIZE		ALIGN_TO(sizeof(chunk_t), MALLOC_HEAD_ALIGN)
+
+/* Maximum allocation size */
+#define MALLOC_MAXSIZE 		(__SIZE_MAX__ - MALLOC_HEAD)
 
 /* Forward data declarations */
-extern chunk * free_list;
-extern char * sbrk_start;
-extern struct mallinfo current_mallinfo;
+extern chunk_t * __malloc_free_list;
+extern char * __malloc_sbrk_start;
+extern char * __malloc_sbrk_top;
 
 /* Forward function declarations */
-extern void * nano_malloc(RARG malloc_size_t);
-extern void nano_free (RARG void * free_p);
-extern void nano_cfree(RARG void * ptr);
-extern void * nano_calloc(RARG malloc_size_t n, malloc_size_t elem);
-extern void nano_malloc_stats(RONEARG);
-extern malloc_size_t nano_malloc_usable_size(RARG void * ptr);
-extern void * nano_realloc(RARG void * ptr, malloc_size_t size);
-extern void * nano_memalign(RARG size_t align, size_t s);
-extern int nano_mallopt(RARG int parameter_number, int parameter_value);
-extern void * nano_valloc(RARG size_t s);
-extern void * nano_pvalloc(RARG size_t s);
-
-static inline chunk * get_chunk_from_ptr(void * ptr)
+void * malloc(RARG size_t);
+void free (RARG void * free_p);
+void cfree(RARG void * ptr);
+void * calloc(RARG size_t n, size_t elem);
+void malloc_stats(RONEARG);
+size_t malloc_usable_size(RARG void * ptr);
+void * realloc(RARG void * ptr, size_t size);
+void * memalign(RARG size_t align, size_t s);
+int mallopt(RARG int parameter_number, int parameter_value);
+void * valloc(RARG size_t s);
+void * pvalloc(RARG size_t s);
+void * __malloc_sbrk_aligned(RARG size_t s);
+bool __malloc_grow_chunk(RARG chunk_t *c, size_t new_size);
+
+/* Work around compiler optimizing away stores to 'size' field before
+ * call to free.
+ */
+void __malloc_free(RARG void * free_p);
+
+/* convert storage pointer to chunk */
+static inline chunk_t *
+ptr_to_chunk(void * ptr)
 {
-    /* Assume that there is no explicit padding in the
-       chunk, and that the chunk starts at ptr - CHUNK_OFFSET.  */
-    chunk * c = (chunk *)((char *)ptr - CHUNK_OFFSET);
-
-    /* c->size being negative indicates that there is explicit padding in
-       the chunk. In which case, c->size is currently the negative offset to
-       the true size.  */
-    if (c->size < 0) c = (chunk *)((char *)c + c->size);
-    return c;
+    return (chunk_t *) ((char *) ptr - MALLOC_HEAD);
+}
+
+/* convert chunk to storage pointer */
+static inline void *
+chunk_to_ptr(chunk_t *c)
+{
+    return (char *) c + MALLOC_HEAD;
+}
+
+/* end of chunk -- address of first byte past chunk storage */
+static inline void *
+chunk_end(chunk_t *c)
+{
+    return (char *) c + c->size;
+}
+
+/* chunk size needed to hold 'malloc_size' bytes */
+static inline size_t
+chunk_size(size_t malloc_size)
+{
+    /* Keep all blocks aligned */
+    malloc_size = ALIGN_TO(malloc_size, MALLOC_CHUNK_ALIGN);
+
+    /* Add space for header */
+    malloc_size += MALLOC_HEAD;
+
+    /* fill the gap between chunks */
+    malloc_size += (MALLOC_CHUNK_ALIGN - MALLOC_HEAD_ALIGN);
+
+    /* Make sure the requested size is big enough to hold a free chunk */
+    malloc_size = MAX(MALLOC_MINSIZE, malloc_size);
+    return malloc_size;
+}
+
+/* available storage in chunk */
+static inline size_t
+chunk_usable(chunk_t *c)
+{
+    return c->size - MALLOC_HEAD;
+}
+
+/* assign 'size' to the specified chunk and return it to the free
+ * pool */
+static inline void
+make_free_chunk(RARG chunk_t *c, size_t size)
+{
+    c->size = size;
+    __malloc_free(RCALL chunk_to_ptr(c));
 }
 
 #ifdef DEFINE_MALLOC
 /* List list header of free blocks */
-chunk * free_list = NULL;
+chunk_t * __malloc_free_list;
 
 /* Starting point of memory allocated from system */
-char * sbrk_start = NULL;
+char * __malloc_sbrk_start;
+char * __malloc_sbrk_top;
 
-/** Function sbrk_aligned
+/** Function __malloc_sbrk_aligned
   * Algorithm:
-  *   Use sbrk() to obtain more memory and ensure it is CHUNK_ALIGN aligned
-  *   Optimise for the case that it is already aligned - only ask for extra
-  *   padding after we know we need it
+  *   Use sbrk() to obtain more memory and ensure the storage is
+  *   MALLOC_CHUNK_ALIGN aligned. Optimise for the case that it is
+  *   already aligned - only ask for extra padding after we know we
+  *   need it
   */
-static void* sbrk_aligned(RARG malloc_size_t s)
+void* __malloc_sbrk_aligned(RARG size_t s)
 {
     char *p, *align_p;
 
-    if (sbrk_start == NULL) sbrk_start = _SBRK_R(RCALL 0);
+    if (__malloc_sbrk_start == NULL)
+	__malloc_sbrk_start = sbrk(0);
 
-    p = _SBRK_R(RCALL s);
+    p = sbrk(s);
 
     /* sbrk returns -1 if fail to allocate */
     if (p == (void *)-1)
         return p;
 
-    align_p = (char*)ALIGN_TO((unsigned long)p, CHUNK_ALIGN);
+    __malloc_sbrk_top = p + s;
+
+    /* Adjust returned space so that the storage area
+     * is MALLOC_CHUNK_ALIGN aligned and the head is
+     * MALLOC_HEAD_ALIGN aligned.
+     */
+    align_p = ALIGN_PTR(p + MALLOC_HEAD, MALLOC_CHUNK_ALIGN) - MALLOC_HEAD;
+
     if (align_p != p)
     {
-        /* p is not aligned, ask for a few more bytes so that we have s
-         * bytes reserved from align_p. */
-        p = _SBRK_R(RCALL align_p - p);
-        if (p == (void *)-1)
-            return p;
+        /* p is not aligned, ask for a few more bytes so that we have
+         * s bytes reserved from align_p. This should only occur for
+         * the first sbrk in a chunk of memory as all others should be
+         * aligned to the right value as chunk sizes are selected to
+         * make them abut in memory
+	 */
+	size_t adjust = align_p - p;
+        char *extra = sbrk(adjust);
+        if (extra != p + s)
+            return (void *) -1;
+	__malloc_sbrk_top = extra + adjust;
     }
+
     return align_p;
 }
 
-/** Function nano_malloc
+#ifdef MALLOC_OPTIMIZE_HEAP
+bool
+__malloc_grow_chunk(RARG chunk_t *c, size_t new_size)
+{
+    char *chunk_e = chunk_end(c);
+
+    if (chunk_e != __malloc_sbrk_top)
+	return false;
+    size_t add_size = MAX(MALLOC_MINSIZE, new_size - c->size);
+
+    /* Ask for the extra memory needed */
+    char *heap = __malloc_sbrk_aligned(RCALL add_size);
+
+    /* Check if we got what we wanted */
+    if (heap == chunk_e)
+    {
+	/* Set size and return */
+	c->size += add_size;
+	return true;
+    }
+
+    if (heap != (char *) -1)
+    {
+	/* sbrk returned unexpected memory, free it */
+	make_free_chunk(RCALL (chunk_t *) heap, add_size);
+    }
+    return false;
+}
+#endif
+
+/** Function malloc
   * Algorithm:
   *   Walk through the free list to find the first match. If fails to find
   *   one, call sbrk to allocate a new chunk.
   */
-void * nano_malloc(RARG malloc_size_t s)
+void * malloc(RARG size_t s)
 {
-    chunk *p, *r;
-    char * ptr, * align_ptr;
-    int offset;
-
-    malloc_size_t alloc_size;
+    chunk_t **p, *r;
+    char * ptr;
+    size_t alloc_size;
 
-    alloc_size = ALIGN_TO(s, CHUNK_ALIGN); /* size of aligned data load */
-    alloc_size += MALLOC_PADDING; /* padding */
-    alloc_size += CHUNK_OFFSET; /* size of chunk head */
-    alloc_size = MAX(alloc_size, MALLOC_MINCHUNK);
-
-    if (alloc_size >= MAX_ALLOC_SIZE || alloc_size < s)
+    if (s > MALLOC_MAXSIZE)
     {
         RERRNO = ENOMEM;
         return NULL;
     }
 
-    MALLOC_LOCK;
+    alloc_size = chunk_size(s);
 
-    p = free_list;
-    r = p;
+    MALLOC_LOCK;
 
-    while (r)
+    for (p = &__malloc_free_list; (r = *p) != NULL; p = &r->next)
     {
-        int rem = r->size - alloc_size;
-        if (rem >= 0)
+        if (r->size >= alloc_size)
         {
-            if (rem >= MALLOC_MINCHUNK)
+	    size_t rem = r->size - alloc_size;
+
+            if (rem >= MALLOC_MINSIZE)
             {
-                /* Find a chunk that much larger than required size, break
-                * it into two chunks and return the second one */
-                r->size = rem;
-                r = (chunk *)((char *)r + rem);
+                /* Find a chunk_t that much larger than required size, break
+		 * it into two chunks and return the first one
+		 */
+
+		chunk_t *s = (chunk_t *)((char *)r + alloc_size);
+                s->size = rem;
+		s->next = r->next;
+		*p = s;
+
                 r->size = alloc_size;
             }
-            /* Find a chunk that is exactly the size or slightly bigger
-             * than requested size, just return this chunk */
-            else if (p == r)
-            {
-                /* Now it implies p==r==free_list. Move the free_list
-                 * to next chunk */
-                free_list = r->next;
-            }
-            else
-            {
-                /* Normal case. Remove it from free_list */
-                p->next = r->next;
-            }
+	    else
+	    {
+		/* Find a chunk_t that is exactly the size or slightly bigger
+		 * than requested size, just return this chunk_t
+		 */
+		*p = r->next;
+	    }
             break;
         }
-        p=r;
-        r=r->next;
+#ifdef MALLOC_OPTIMIZE_HEAP
+	if (!r->next && __malloc_grow_chunk(RCALL r, alloc_size))
+	{
+	    /* Grow the last chunk in memory to the requested size,
+	     * just return it
+	     */
+	    *p = r->next;
+	    break;
+	}
+#endif
     }
 
-    /* Failed to find a appropriate chunk. Ask for more memory */
+    MALLOC_UNLOCK;
+
+    /* Failed to find a appropriate chunk_t. Ask for more memory */
     if (r == NULL)
     {
-        r = sbrk_aligned(RCALL alloc_size);
+        r = __malloc_sbrk_aligned(RCALL alloc_size);
 
         /* sbrk returns -1 if fail to allocate */
         if (r == (void *)-1)
@@ -301,222 +392,274 @@  void * nano_malloc(RARG malloc_size_t s)
         }
         r->size = alloc_size;
     }
-    MALLOC_UNLOCK;
 
-    ptr = (char *)r + CHUNK_OFFSET;
+    ptr = (char *)r + MALLOC_HEAD;
 
-    align_ptr = (char *)ALIGN_TO((unsigned long)ptr, MALLOC_ALIGN);
-    offset = align_ptr - ptr;
+    memset(ptr, '\0', alloc_size - MALLOC_HEAD);
 
-    if (offset)
-    {
-        /* Initialize sizeof (malloc_chunk.size) bytes at
-           align_ptr - CHUNK_OFFSET with negative offset to the
-           size field (at the start of the chunk).
-
-           The negative offset to size from align_ptr - CHUNK_OFFSET is
-           the size of any remaining padding minus CHUNK_OFFSET.  This is
-           equivalent to the total size of the padding, because the size of
-           any remaining padding is the total size of the padding minus
-           CHUNK_OFFSET.
-
-           Note that the size of the padding must be at least CHUNK_OFFSET.
+    return ptr;
+}
 
-           The rest of the padding is not initialized.  */
-        *(long *)((char *)r + offset) = -offset;
+#if MALLOC_DEBUG
+void
+__malloc_validate(void)
+{
+    chunk_t *r;
+
+    for (r = __malloc_free_list; r; r = r->next) {
+	assert (ALIGN_PTR(chunk_to_ptr(r), MALLOC_CHUNK_ALIGN) == chunk_to_ptr(r));
+	assert (ALIGN_PTR(r, MALLOC_HEAD_ALIGN) == r);
+	assert (r->size >= MALLOC_MINSIZE);
+	assert (ALIGN_TO(r->size, MALLOC_HEAD_ALIGN) == r->size);
+	assert (r->next == NULL || (char *) r + r->size < (char *) r->next);
     }
-
-    assert(align_ptr + size <= (char *)r + alloc_size);
-    return align_ptr;
 }
+#endif
+
 #endif /* DEFINE_MALLOC */
 
 #ifdef DEFINE_FREE
-#define MALLOC_CHECK_DOUBLE_FREE
 
-/** Function nano_free
+/** Function free
   * Implementation of libc free.
   * Algorithm:
-  *  Maintain a global free chunk single link list, headed by global
-  *  variable free_list.
-  *  When free, insert the to-be-freed chunk into free list. The place to
+  *  Maintain a global free chunk_t single link list, headed by global
+  *  variable __malloc_free_list.
+  *  When free, insert the to-be-freed chunk_t into free list. The place to
   *  insert should make sure all chunks are sorted by address from low to
   *  high.  Then merge with neighbor chunks if adjacent.
   */
-void nano_free (RARG void * free_p)
+void free (RARG void * free_p)
 {
-    chunk * p_to_free;
-    chunk * p, * q;
+    chunk_t * p_to_free;
+    chunk_t ** p, * r;
 
     if (free_p == NULL) return;
 
-    p_to_free = get_chunk_from_ptr(free_p);
+    p_to_free = ptr_to_chunk(free_p);
+    p_to_free->next = NULL;
 
     MALLOC_LOCK;
-    if (free_list == NULL)
-    {
-        /* Set first free list element */
-        p_to_free->next = free_list;
-        free_list = p_to_free;
-        MALLOC_UNLOCK;
-        return;
-    }
 
-    if (p_to_free < free_list)
+    for (p = &__malloc_free_list; (r = *p) != NULL; p = &r->next)
     {
-        if ((char *)p_to_free + p_to_free->size == (char *)free_list)
-        {
-            /* Chunk to free is just before the first element of
-             * free list  */
-            p_to_free->size += free_list->size;
-            p_to_free->next = free_list->next;
-        }
-        else
-        {
-            /* Insert before current free_list */
-            p_to_free->next = free_list;
-        }
-        free_list = p_to_free;
-        MALLOC_UNLOCK;
-        return;
-    }
+	/* Insert in address order */
+	if (p_to_free < r)
+	    break;
+
+	/* Merge blocks together */
+	if (chunk_end(r) == p_to_free)
+	{
+	    r->size += p_to_free->size;
+	    p_to_free = r;
+	    r = r->next;
+	    goto no_insert;
+	}
+
+	/* Check for double free */
+	if (p_to_free == r)
+	{
+	    RERRNO = ENOMEM;
+	    MALLOC_UNLOCK;
+	    return;
+	}
 
-    q = free_list;
-    /* Walk through the free list to find the place for insert. */
-    do
-    {
-        p = q;
-        q = q->next;
-    } while (q && q <= p_to_free);
+    }
+    p_to_free->next = r;
+    *p = p_to_free;
 
-    /* Now p <= p_to_free and either q == NULL or q > p_to_free
-     * Try to merge with chunks immediately before/after it. */
+no_insert:
 
-    if ((char *)p + p->size == (char *)p_to_free)
-    {
-        /* Chunk to be freed is adjacent
-         * to a free chunk before it */
-        p->size += p_to_free->size;
-        /* If the merged chunk is also adjacent
-         * to the chunk after it, merge again */
-        if ((char *)p + p->size == (char *) q)
-        {
-            p->size += q->size;
-            p->next = q->next;
-        }
-    }
-#ifdef MALLOC_CHECK_DOUBLE_FREE
-    else if ((char *)p + p->size > (char *)p_to_free)
-    {
-        /* Report double free fault */
-        RERRNO = ENOMEM;
-        MALLOC_UNLOCK;
-        return;
-    }
-#endif
-    else if ((char *)p_to_free + p_to_free->size == (char *) q)
-    {
-        /* Chunk to be freed is adjacent
-         * to a free chunk after it */
-        p_to_free->size += q->size;
-        p_to_free->next = q->next;
-        p->next = p_to_free;
-    }
-    else
+    /* Merge blocks together */
+    if (chunk_end(p_to_free) == r)
     {
-        /* Not adjacent to any chunk. Just insert it. Resulting
-         * a fragment. */
-        p_to_free->next = q;
-        p->next = p_to_free;
+	p_to_free->size += r->size;
+	p_to_free->next = r->next;
     }
+
     MALLOC_UNLOCK;
 }
+#ifdef make_reference
+#pragma GCC diagnostic push
+#ifndef __clang__
+#pragma GCC diagnostic ignored "-Wmissing-attributes"
+#endif
+make_reference(free, __malloc_free);
+make_reference(free, cfree);
+#pragma GCC diagnostic pop
+#else
+void __malloc_free(RARG void * free_p)
+{
+    free(RCALL free_p);
+}
+#endif /* make_reference */
 #endif /* DEFINE_FREE */
 
 #ifdef DEFINE_CFREE
-void nano_cfree(RARG void * ptr)
+#ifndef make_reference
+void cfree(RARG void * ptr)
 {
-    nano_free(RCALL ptr);
+    free(RCALL ptr);
 }
+#endif
 #endif /* DEFINE_CFREE */
 
 #ifdef DEFINE_CALLOC
-/* Function nano_calloc
- * Implement calloc simply by calling malloc and set zero */
-void * nano_calloc(RARG malloc_size_t n, malloc_size_t elem)
+
+/* Function calloc
+ *
+ * Implement calloc by multiplying sizes (with overflow check) and
+ * calling malloc (which sets to zero)
+ */
+
+void * calloc(RARG size_t n, size_t elem)
 {
-    malloc_size_t bytes;
-    void * mem;
+    size_t bytes;
 
     if (__builtin_mul_overflow (n, elem, &bytes))
     {
         RERRNO = ENOMEM;
         return NULL;
     }
-    mem = nano_malloc(RCALL bytes);
-    if (mem != NULL) memset(mem, 0, bytes);
-    return mem;
+    return malloc(RCALL bytes);
 }
 #endif /* DEFINE_CALLOC */
 
 #ifdef DEFINE_REALLOC
-/* Function nano_realloc
- * Implement realloc by malloc + memcpy */
-void * nano_realloc(RARG void * ptr, malloc_size_t size)
+
+/* Function realloc
+ *
+ * Implement either by merging adjacent free memory
+ * or by calling malloc/memcpy
+ */
+void * realloc(RARG void * ptr, size_t size)
 {
     void * mem;
-    chunk * p_to_realloc;
-    malloc_size_t old_size;
 
-    if (ptr == NULL) return nano_malloc(RCALL size);
+    if (ptr == NULL)
+	return malloc(RCALL size);
 
     if (size == 0)
     {
-        nano_free(RCALL ptr);
+        free(RCALL ptr);
         return NULL;
     }
 
-    old_size = nano_malloc_usable_size(RCALL ptr);
-    if (size <= old_size && (old_size >> 1) < size)
-      return ptr;
+    chunk_t *p_to_realloc = ptr_to_chunk(ptr);
+
+#ifdef MALLOC_OPTIMIZE_HEAP
+    size_t old_size = p_to_realloc->size;
+    size_t new_size = chunk_size(size);
 
-    mem = nano_malloc(RCALL size);
-    if (mem != NULL)
+    /* See if we can avoid allocating new memory
+     * when increasing the size
+     */
+    if (new_size > old_size)
     {
-	if (old_size > size)
-	    old_size = size;
-        memcpy(mem, ptr, old_size);
-        nano_free(RCALL ptr);
+	void *chunk_e = chunk_end(p_to_realloc);
+
+	MALLOC_LOCK;
+
+	if (__malloc_grow_chunk(RCALL p_to_realloc, new_size))
+	{
+	    /* clear new memory */
+	    memset(chunk_e, '\0', new_size - old_size);
+	    /* adjust chunk_t size */
+	    old_size = new_size;
+	}
+	else
+	{
+	    chunk_t **p, *r;
+
+	    /* Check to see if there's a chunk_t of free space just past
+	     * the current block, merge it in in case that's useful
+	     */
+	    for (p = &__malloc_free_list; (r = *p) != NULL; p = &r->next)
+	    {
+		if (r == chunk_e)
+		{
+		    size_t r_size = r->size;
+
+		    /* remove R from the free list */
+		    *p = r->next;
+
+		    /* clear the memory from r */
+		    memset(r, '\0', r_size);
+
+		    /* add it's size to our block */
+		    old_size += r_size;
+		    p_to_realloc->size = old_size;
+		    break;
+		}
+		if (p_to_realloc < r)
+		    break;
+	    }
+	}
+
+	MALLOC_UNLOCK;
+    }
+
+    if (new_size <= old_size)
+    {
+	size_t extra = old_size - new_size;
+
+	/* If there's enough space left over, split it out
+	 * and free it
+	 */
+	if (extra >= MALLOC_MINSIZE) {
+	    p_to_realloc->size = new_size;
+	    make_free_chunk(RCALL chunk_end(p_to_realloc), extra);
+	}
+	return ptr;
     }
+    old_size -= MALLOC_HEAD;
+#else
+    size_t old_size = malloc_usable_size(RCALL ptr);
+    if (size <= old_size && (old_size >> 1) < size)
+	    return ptr;
+    if (old_size > size)
+	    old_size = size;
+#endif
+
+    /* No short cuts, allocate new memory and copy */
+
+    mem = malloc(RCALL size);
+    memcpy(mem, ptr, old_size);
+    free(RCALL ptr);
+
     return mem;
 }
 #endif /* DEFINE_REALLOC */
 
 #ifdef DEFINE_MALLINFO
-struct mallinfo current_mallinfo={0,0,0,0,0,0,0,0,0,0};
 
-struct mallinfo nano_mallinfo(RONEARG)
+struct mallinfo mallinfo(RONEARG)
 {
     char * sbrk_now;
-    chunk * pf;
+    chunk_t * pf;
     size_t free_size = 0;
     size_t total_size;
+    int ordblks = 0;
+    struct mallinfo current_mallinfo;
 
     MALLOC_LOCK;
 
-    if (sbrk_start == NULL) total_size = 0;
+    if (__malloc_sbrk_start == NULL) total_size = 0;
     else {
-        sbrk_now = _SBRK_R(RCALL 0);
+        sbrk_now = __malloc_sbrk_top;
 
-        if (sbrk_now == (void *)-1)
+        if (sbrk_now == NULL)
             total_size = (size_t)-1;
         else
-            total_size = (size_t) (sbrk_now - sbrk_start);
+            total_size = (size_t) (sbrk_now - __malloc_sbrk_start);
     }
 
-    for (pf = free_list; pf; pf = pf->next)
+    for (pf = __malloc_free_list; pf; pf = pf->next) {
+	ordblks++;
         free_size += pf->size;
+    }
 
+    current_mallinfo.ordblks = ordblks;
     current_mallinfo.arena = total_size;
     current_mallinfo.fordblks = free_size;
     current_mallinfo.uordblks = total_size - free_size;
@@ -527,36 +670,32 @@  struct mallinfo nano_mallinfo(RONEARG)
 #endif /* DEFINE_MALLINFO */
 
 #ifdef DEFINE_MALLOC_STATS
-void nano_malloc_stats(RONEARG)
+
+void malloc_stats(RONEARG)
 {
-    nano_mallinfo(RONECALL);
-    fiprintf(stderr, "max system bytes = %10u\n",
-             current_mallinfo.arena);
-    fiprintf(stderr, "system bytes     = %10u\n",
-             current_mallinfo.arena);
-    fiprintf(stderr, "in use bytes     = %10u\n",
-             current_mallinfo.uordblks);
+    struct mallinfo current_mallinfo;
+
+    current_mallinfo = mallinfo(RONECALL);
+    fprintf(RCALL stderr, "max system bytes = %10lu\n",
+            (long) current_mallinfo.arena);
+    fprintf(RCALL stderr, "system bytes     = %10lu\n",
+            (long) current_mallinfo.arena);
+    fprintf(RCALL stderr, "in use bytes     = %10lu\n",
+            (long) current_mallinfo.uordblks);
+    fprintf(RCALL stderr, "free blocks      = %10lu\n",
+	    (long) current_mallinfo.ordblks);
 }
 #endif /* DEFINE_MALLOC_STATS */
 
 #ifdef DEFINE_MALLOC_USABLE_SIZE
-malloc_size_t nano_malloc_usable_size(RARG void * ptr)
+size_t malloc_usable_size(RARG void * ptr)
 {
-    chunk * c = (chunk *)((char *)ptr - CHUNK_OFFSET);
-    int size_or_offset = c->size;
-
-    if (size_or_offset < 0)
-    {
-        /* Padding is used. Excluding the padding size */
-        c = (chunk *)((char *)c + c->size);
-        return c->size - CHUNK_OFFSET + size_or_offset;
-    }
-    return c->size - CHUNK_OFFSET;
+    return chunk_usable(ptr_to_chunk(ptr));
 }
 #endif /* DEFINE_MALLOC_USABLE_SIZE */
 
 #ifdef DEFINE_MEMALIGN
-/* Function nano_memalign
+/* Function memalign
  * Allocate memory block aligned at specific boundary.
  *   align: required alignment. Must be power of 2. Return NULL
  *          if not power of 2. Undefined behavior is bigger than
@@ -568,80 +707,87 @@  malloc_size_t nano_malloc_usable_size(RARG void * ptr)
  *            Record the offset of align pointer and original pointer
  *            in the padding area.
  */
-void * nano_memalign(RARG size_t align, size_t s)
+void * memalign(RARG size_t align, size_t s)
 {
-    chunk * chunk_p;
-    malloc_size_t size_allocated, offset, ma_size, size_with_padding;
+    chunk_t * chunk_p;
+    size_t offset, size_with_padding;
     char * allocated, * aligned_p;
 
     /* Return NULL if align isn't power of 2 */
     if ((align & (align-1)) != 0) return NULL;
 
-    align = MAX(align, MALLOC_ALIGN);
-    ma_size = ALIGN_TO(MAX(s, MALLOC_MINSIZE), CHUNK_ALIGN);
-    size_with_padding = ma_size + align - MALLOC_ALIGN;
+    s = ALIGN_TO(MAX(s,1), MALLOC_CHUNK_ALIGN);
+    align = MAX(align, MALLOC_MINSIZE);
 
-    allocated = nano_malloc(RCALL size_with_padding);
+    /* Make sure there's space to align the allocation and split
+     * off chunk_t from the front
+     */
+    size_with_padding = s + align + MALLOC_MINSIZE;
+
+    allocated = malloc(RCALL size_with_padding);
     if (allocated == NULL) return NULL;
 
-    chunk_p = get_chunk_from_ptr(allocated);
-    aligned_p = (char *)ALIGN_TO(
-                  (unsigned long)((char *)chunk_p + CHUNK_OFFSET),
-                  (unsigned long)align);
-    offset = aligned_p - ((char *)chunk_p + CHUNK_OFFSET);
+    chunk_p = ptr_to_chunk(allocated);
+
+    aligned_p = ALIGN_PTR(allocated, align);
 
+    offset = aligned_p - allocated;
+
+    /* Split off the front piece if necessary */
     if (offset)
     {
-        if (offset >= MALLOC_MINCHUNK)
-        {
-            /* Padding is too large, free it */
-            chunk * front_chunk = chunk_p;
-            chunk_p = (chunk *)((char *)chunk_p + offset);
-            chunk_p->size = front_chunk->size - offset;
-            front_chunk->size = offset;
-            nano_free(RCALL (char *)front_chunk + CHUNK_OFFSET);
-        }
-        else
-        {
-            /* Padding is used. Need to set a jump offset for aligned pointer
-            * to get back to chunk head */
-            assert(offset >= sizeof(int));
-            *(long *)((char *)chunk_p + offset) = -offset;
-        }
+	if (offset < MALLOC_MINSIZE) {
+	    aligned_p += align;
+	    offset += align;
+	}
+
+	chunk_t *new_chunk_p = ptr_to_chunk(aligned_p);
+	new_chunk_p->size = chunk_p->size - offset;
+
+	make_free_chunk(RCALL chunk_p, offset);
+
+	chunk_p = new_chunk_p;
     }
 
-    size_allocated = chunk_p->size;
-    if ((char *)chunk_p + size_allocated >
-         (aligned_p + ma_size + MALLOC_MINCHUNK))
+    offset = chunk_p->size - chunk_size(s);
+
+    /* Split off the back piece if large enough */
+    if (offset >= MALLOC_MINSIZE)
     {
-        /* allocated much more than what's required for padding, free
-         * tail part */
-        chunk * tail_chunk = (chunk *)(aligned_p + ma_size);
-        chunk_p->size = aligned_p + ma_size - (char *)chunk_p;
-        tail_chunk->size = size_allocated - chunk_p->size;
-        nano_free(RCALL (char *)tail_chunk + CHUNK_OFFSET);
+	chunk_p->size -= offset;
+
+	make_free_chunk(RCALL (chunk_t *) chunk_end(chunk_p), offset);
     }
     return aligned_p;
 }
+#ifdef make_reference
+make_reference(memalign, aligned_alloc);
+#else
+void *
+aligned_alloc(RARG size_t align, size_t s)
+{
+    return memalign(RCALL align, s);
+}
+#endif
 #endif /* DEFINE_MEMALIGN */
 
 #ifdef DEFINE_MALLOPT
-int nano_mallopt(RARG int parameter_number, int parameter_value)
+int mallopt(RARG int parameter_number, int parameter_value)
 {
     return 0;
 }
 #endif /* DEFINE_MALLOPT */
 
 #ifdef DEFINE_VALLOC
-void * nano_valloc(RARG size_t s)
+void * valloc(RARG size_t s)
 {
-    return nano_memalign(RCALL MALLOC_PAGE_ALIGN, s);
+    return memalign(RCALL MALLOC_PAGE_ALIGN, s);
 }
 #endif /* DEFINE_VALLOC */
 
 #ifdef DEFINE_PVALLOC
-void * nano_pvalloc(RARG size_t s)
+void * pvalloc(RARG size_t s)
 {
-    return nano_valloc(RCALL ALIGN_TO(s, MALLOC_PAGE_ALIGN));
+    return valloc(RCALL ALIGN_TO(s, MALLOC_PAGE_ALIGN));
 }
 #endif /* DEFINE_PVALLOC */