From f2c4c1d4e811964ff9fc87101f0804cf10f9789e Mon Sep 17 00:00:00 2001
From: "rneugeba@wyvis.research" <rneugeba@wyvis.research>
Date: Mon, 6 Oct 2003 11:26:01 +0000
Subject: bitkeeper revision 1.478.1.1 (3f815149k7sE-z_IK6MG5eHi34m-Qg)

Minimal guest OS (based on some old code from Keir)
---
 .bk-to-hg              |    8 +
 .hg-to-bk              |    7 +
 .rootkeys              |   27 +
 mini-os/Makefile       |   40 +
 mini-os/README         |   35 +
 mini-os/entry.S        |  348 +++
 mini-os/events.c       |  106 +
 mini-os/h/events.h     |   53 +
 mini-os/h/hypervisor.h |  266 +++
 mini-os/h/lib.h        |  129 ++
 mini-os/h/list.h       |  164 ++
 mini-os/h/mm.h         |  107 +
 mini-os/h/os.h         |  270 +++
 mini-os/h/time.h       |   59 +
 mini-os/h/types.h      |   41 +
 mini-os/head.S         |   46 +
 mini-os/hypervisor.c   |   83 +
 mini-os/kernel.c       |  115 +
 mini-os/lib/malloc.c   | 5700 ++++++++++++++++++++++++++++++++++++++++++++++++
 mini-os/lib/math.c     |  385 ++++
 mini-os/lib/printf.c   |  470 ++++
 mini-os/lib/string.c   |  142 ++
 mini-os/mm.c           |  375 ++++
 mini-os/time.c         |  149 ++
 mini-os/traps.c        |  150 ++
 mini-os/vmlinux.lds    |   82 +
 26 files changed, 9357 insertions(+)
 create mode 100644 mini-os/Makefile
 create mode 100644 mini-os/README
 create mode 100644 mini-os/entry.S
 create mode 100644 mini-os/events.c
 create mode 100644 mini-os/h/events.h
 create mode 100644 mini-os/h/hypervisor.h
 create mode 100644 mini-os/h/lib.h
 create mode 100644 mini-os/h/list.h
 create mode 100644 mini-os/h/mm.h
 create mode 100644 mini-os/h/os.h
 create mode 100644 mini-os/h/time.h
 create mode 100644 mini-os/h/types.h
 create mode 100644 mini-os/head.S
 create mode 100644 mini-os/hypervisor.c
 create mode 100644 mini-os/kernel.c
 create mode 100644 mini-os/lib/malloc.c
 create mode 100644 mini-os/lib/math.c
 create mode 100644 mini-os/lib/printf.c
 create mode 100644 mini-os/lib/string.c
 create mode 100644 mini-os/mm.c
 create mode 100644 mini-os/time.c
 create mode 100644 mini-os/traps.c
 create mode 100644 mini-os/vmlinux.lds

diff --git a/.bk-to-hg b/.bk-to-hg
index a41913bd35..f76cff0b51 100755
--- a/.bk-to-hg
+++ b/.bk-to-hg
@@ -1,5 +1,13 @@
 #!/bin/sh -x
 set -e
+test -L mini-os/h/hypervisor-ifs/block.h
+rm      mini-os/h/hypervisor-ifs/block.h
+test -L mini-os/h/hypervisor-ifs/hypervisor-if.h
+rm      mini-os/h/hypervisor-ifs/hypervisor-if.h
+test -L mini-os/h/hypervisor-ifs/kbd.h
+rm      mini-os/h/hypervisor-ifs/kbd.h
+test -L mini-os/h/hypervisor-ifs/network.h
+rm      mini-os/h/hypervisor-ifs/network.h
 test -L xenolinux-sparse
 rm      xenolinux-sparse
 (find -depth -type d -print | xargs -r rmdir 2>/dev/null) || true
diff --git a/.hg-to-bk b/.hg-to-bk
index f74c6a785b..714544304a 100755
--- a/.hg-to-bk
+++ b/.hg-to-bk
@@ -1,5 +1,12 @@
 #!/bin/sh -x
 set -e
+mkdir -p mini-os
+mkdir -p mini-os/h
+mkdir -p mini-os/h/hypervisor-ifs
+ln -s ../../../xen/include/hypervisor-ifs/block.h mini-os/h/hypervisor-ifs/block.h
+ln -s ../../../xen/include/hypervisor-ifs/hypervisor-if.h mini-os/h/hypervisor-ifs/hypervisor-if.h
+ln -s ../../../xen/include/hypervisor-ifs/kbd.h mini-os/h/hypervisor-ifs/kbd.h
+ln -s ../../../xen/include/hypervisor-ifs/network.h mini-os/h/hypervisor-ifs/network.h
 ln -s xenolinux-2.4.22-sparse xenolinux-sparse
 (find -depth -type d -print | xargs -r rmdir 2>/dev/null) || true
 exit 0
diff --git a/.rootkeys b/.rootkeys
index 7f3ce574b9..1e7a841a52 100644
--- a/.rootkeys
+++ b/.rootkeys
@@ -6,6 +6,33 @@
 3f5ef5a24IaQasQE2tyMxrfxskMmvw README
 3f5ef5a2l4kfBYSQTUaOyyD76WROZQ README.CD
 3f69d8abYB1vMyD_QVDvzxy5Zscf1A TODO
+3f815144d1vI2777JI-dO4wk49Iw7g mini-os/Makefile
+3f815144zTnCV5591ulIJQrpe5b-5Q mini-os/README
+3f815144wiiDekmfMl9LIPIvhR83Uw mini-os/entry.S
+3f815144r7AHj8GPvc3Nl1L9OSsWIg mini-os/events.c
+3f815144h-Chna6E38yo40jqU95G1Q mini-os/h/events.h
+3f815144oqr2OlUDzE2GfkKX5Hcxqg mini-os/h/hypervisor-ifs/block.h
+3f8151443nGXvfUTFG67VXOIH8P4lg mini-os/h/hypervisor-ifs/hypervisor-if.h
+3f81514417ZlYqiRdM_AHPy7G11htA mini-os/h/hypervisor-ifs/kbd.h
+3f815144J3ZfU5am03Td7Wjfrz30qQ mini-os/h/hypervisor-ifs/network.h
+3f8151445bYdgThGHQPeOW49PsrJ_A mini-os/h/hypervisor.h
+3f815144f2Vg3qb6tiwt2VZad-DWsg mini-os/h/lib.h
+3f815144iqXtdYup_pyfPSmDZuvZcg mini-os/h/list.h
+3f81514437EzzRWAnZl4_Ej1oznMjg mini-os/h/mm.h
+3f815144nbSjjT1h4m99-QPbeSWY0Q mini-os/h/os.h
+3f815144L1t0AevJt2JDXPegv6JTrw mini-os/h/time.h
+3f815144UxddtL0ICCKisN-NDHNFaA mini-os/h/types.h
+3f815145W2mamPMclRLOzm5B38vWUQ mini-os/head.S
+3f815145LqcH11TCEZbAvcjarckkJw mini-os/hypervisor.c
+3f815145vwnmxhCwN7dMRWv_XFtXbg mini-os/kernel.c
+3f8151451k5emQAlRe80JdIvfSN4VA mini-os/lib/malloc.c
+3f815145Mb9WSKjOPsYTLsPIvPyy4Q mini-os/lib/math.c
+3f8151454rEuPjN74V2Bcu65RLnM-Q mini-os/lib/printf.c
+3f815145MQZrUJV0iRmTK2KIhwB2wg mini-os/lib/string.c
+3f815145CB8XdPUqsmhAjSDFuwOoqA mini-os/mm.c
+3f815145vGYx1WY79voKkZB9yKwJKQ mini-os/time.c
+3f815145xlKBAQmal9oces3G_Mvxqw mini-os/traps.c
+3f815145AYE58Kpmsj5U7oHDpVDZJA mini-os/vmlinux.lds
 3f776bd1Hy9rn69ntXBhPReUFw9IEA tools/Makefile
 3e6377b24eQqYMsDi9XrFkIgTzZ47A tools/balloon/Makefile
 3e6377d6eiFjF1hHIS6JEIOFk62xSA tools/balloon/README
diff --git a/mini-os/Makefile b/mini-os/Makefile
new file mode 100644
index 0000000000..251ee273ba
--- /dev/null
+++ b/mini-os/Makefile
@@ -0,0 +1,40 @@
+
+CC := gcc
+LD := ld
+# Linker should relocate monitor to this address
+MONITOR_BASE := 0xE0100000
+CFLAGS  := -fno-builtin -O3 -Wall -Ih/
+
+TARGET := image.final
+
+LOBJS:= lib/malloc.o lib/math.o lib/printf.o lib/string.o 
+OBJS := entry.o kernel.o traps.o hypervisor.o mm.o events.o time.o ${LOBJS}
+
+HINTF := h/hypervisor-ifs/hypervisor-if.h
+HDRS :=  h/os.h h/types.h h/hypervisor.h h/mm.h h/events.h h/time.h h/lib.h $(HINTF)
+
+default: $(TARGET)
+
+$(TARGET): head.o $(OBJS)
+	# Image will load at 0xC0000000. First bytes from head.o
+	#$(LD) -N -Ttext 0xC0000000 head.o $(OBJS) -o image.elf
+	$(LD) -N -T vmlinux.lds head.o $(OBJS) -o image.elf
+	# Guest OS header -- first 8 bytes are identifier 'XenoGues'.
+	echo -e -n 'XenoGues' >$@ 
+	# Guest OS header -- next 4 bytes are load address (0xC0000000).
+	echo -e -n '\000\000\000\300' >>$@
+	# Create a raw bag of bytes from the ELF image.
+	objcopy -O binary -R .note -R .comment image.elf image.raw
+	# Guest OS header is immediately followed by raw OS image.
+	cat image.raw >>$@
+	#gzip -f -9 $@
+
+clean:
+	rm -f *.o *~ core image.elf image.raw image.final image.final.gz
+
+%.o: %.c $(HDRS) Makefile
+	$(CC) $(CFLAGS) -c $< -o $@
+
+%.o: %.S $(HDRS) Makefile
+	$(CC) $(CFLAGS) -D__ASSEMBLY__ -c $< -o $@
+
diff --git a/mini-os/README b/mini-os/README
new file mode 100644
index 0000000000..98661a9274
--- /dev/null
+++ b/mini-os/README
@@ -0,0 +1,35 @@
+ Minimal OS
+ ----------
+
+This shows some of the stuff that any guest OS will have to set up.
+
+This includes:
+
+ * installing a virtual exception table
+ * handling virtual exceptions
+ * handling asynchronous events
+ * enabling/disabling async events
+ * parsing start_info struct at start-of-day
+ * registering virtual interrupt handlers (for timer interrupts)
+ * a simple page and memory allocator
+ * minimal libc support
+
+Stuff it doesn't show:
+ 
+ * modifying page tables
+ * network code
+ * block-device code
+
+
+- to build it just type make.
+
+- copy image.final somewhere where dom0 can access it
+
+- in dom0
+  # xi_create 16000 test
+    <domid>
+  # xi_build <domid> image.final 0
+  # xi_start <domid>
+
+this prints out a bunch of stuff and then every 1000 timer interrupts the
+system time.
diff --git a/mini-os/entry.S b/mini-os/entry.S
new file mode 100644
index 0000000000..a5a86543b0
--- /dev/null
+++ b/mini-os/entry.S
@@ -0,0 +1,348 @@
+/*
+ *  linux/arch/i386/entry.S
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  Adjusted for XenoLinux use by K A Frasier
+ *  Adjusted for Xen minimal os by R Neugebauer
+ */
+
+        
+#include <os.h>
+        
+EBX		= 0x00
+ECX		= 0x04
+EDX		= 0x08
+ESI		= 0x0C
+EDI		= 0x10
+EBP		= 0x14
+EAX		= 0x18
+DS		= 0x1C
+ES		= 0x20
+ORIG_EAX	= 0x24
+EIP		= 0x28
+CS		= 0x2C
+EFLAGS		= 0x30
+OLDESP		= 0x34
+OLDSS		= 0x38
+
+CF_MASK		= 0x00000001
+IF_MASK		= 0x00000200
+NT_MASK		= 0x00004000
+
+/* Declare a globally-visible label */
+#define ENTRY(X) .globl X ; X :
+
+/* A Linux hangover. Just ignore it. */
+#define SYMBOL_NAME(X) X
+        
+#define SAVE_ALL \
+	cld; \
+	pushl %es; \
+	pushl %ds; \
+	pushl %eax; \
+	pushl %ebp; \
+	pushl %edi; \
+	pushl %esi; \
+	pushl %edx; \
+	pushl %ecx; \
+	pushl %ebx; \
+	movl $(__KERNEL_DS),%edx; \
+	movl %edx,%ds; \
+	movl %edx,%es;
+
+#define RESTORE_ALL	\
+	popl %ebx;	\
+	popl %ecx;	\
+	popl %edx;	\
+	popl %esi;	\
+	popl %edi;	\
+	popl %ebp;	\
+	popl %eax;	\
+1:	popl %ds;	\
+2:	popl %es;	\
+	addl $4,%esp;	\
+3:	iret;		\
+.section .fixup,"ax";	\
+4:	movl $0,(%esp);	\
+	jmp 1b;		\
+5:	movl $0,(%esp);	\
+	jmp 2b;		\
+6:	pushl %ss;	\
+	popl %ds;	\
+	pushl %ss;	\
+	popl %es;	\
+	pushl $11;	\
+        call do_exit;	\
+.previous;		\
+.section __ex_table,"a";\
+	.align 4;	\
+	.long 1b,4b;	\
+	.long 2b,5b;	\
+	.long 3b,6b;	\
+.previous
+
+ENTRY(divide_error)
+	pushl $0		# no error code
+	pushl $ SYMBOL_NAME(do_divide_error)
+	.align 4
+error_code:
+	pushl %ds
+	pushl %eax
+	xorl %eax,%eax
+	pushl %ebp
+	pushl %edi
+	pushl %esi
+	pushl %edx
+	decl %eax			# eax = -1
+	pushl %ecx
+	pushl %ebx
+	cld
+	movl %es,%ecx
+	movl ORIG_EAX(%esp), %esi	# get the error code
+	movl ES(%esp), %edi		# get the function address
+	movl %eax, ORIG_EAX(%esp)
+	movl %ecx, ES(%esp)
+	movl %esp,%edx
+	pushl %esi			# push the error code
+	pushl %edx			# push the pt_regs pointer
+	movl $(__KERNEL_DS),%edx
+	movl %edx,%ds
+	movl %edx,%es
+	call *%edi
+	addl $8,%esp
+
+# These are the tests Linux makes before exiting the OS back to userland.
+# At these point preeemption may occur, or signals may get delivered.
+ret_to_user_tests:
+#        cmpl $0,need_resched(%ebx)
+#        jne reschedule
+#        cmpl $0,sigpending(%ebx)
+#        je   safesti
+        jmp safesti
+               
+        
+ret_from_exception:
+        movb CS(%esp),%cl
+	    test $2,%cl          # slow return to ring 2 or 3
+	    jne  ret_to_user_tests
+        RESTORE_ALL
+
+# A note on the "critical region" in our callback handler.
+# We want to avoid stacking callback handlers due to events occurring
+# during handling of the last event. To do this, we keep events disabled
+# until weve done all processing. HOWEVER, we must enable events before
+# popping the stack frame (cant be done atomically) and so it would still
+# be possible to get enough handler activations to overflow the stack.
+# Although unlikely, bugs of that kind are hard to track down, so wed
+# like to avoid the possibility.
+# So, on entry to the handler we detect whether we interrupted an
+# existing activation in its critical region -- if so, we pop the current
+# activation and restart the handler using the previous one.
+ENTRY(hypervisor_callback)
+        pushl %eax
+        SAVE_ALL
+        movl EIP(%esp),%eax
+        cmpl $scrit,%eax
+        jb   11f
+        cmpl $ecrit,%eax
+        jb   critical_region_fixup
+11:     push %esp
+        call do_hypervisor_callback
+        add  $4,%esp
+        movl SYMBOL_NAME(HYPERVISOR_shared_info),%esi
+        xorl %eax,%eax
+        movb CS(%esp),%cl
+    	test $2,%cl          # slow return to ring 2 or 3
+	    jne  ret_to_user_tests
+safesti:btsl $31,4(%esi)     # reenable event callbacks
+scrit:  /**** START OF CRITICAL REGION ****/
+        cmpl %eax,(%esi)
+        jne  14f              # process more events if necessary...
+        RESTORE_ALL
+14:     btrl %eax,4(%esi)
+        jmp  11b
+ecrit:  /**** END OF CRITICAL REGION ****/
+# [How we do the fixup]. We want to merge the current stack frame with the
+# just-interrupted frame. How we do this depends on where in the critical
+# region the interrupted handler was executing, and so how many saved
+# registers are in each frame. We do this quickly using the lookup table
+# 'critical_fixup_table'. For each byte offset in the critical region, it
+# provides the number of bytes which have already been popped from the
+# interrupted stack frame. 
+critical_region_fixup:
+        addl $critical_fixup_table-scrit,%eax
+        movzbl (%eax),%eax    # %eax contains num bytes popped
+        mov  %esp,%esi
+        add  %eax,%esi        # %esi points at end of src region
+        mov  %esp,%edi
+        add  $0x34,%edi       # %edi points at end of dst region
+        mov  %eax,%ecx
+        shr  $2,%ecx          # convert words to bytes
+        je   16f              # skip loop if nothing to copy
+15:     subl $4,%esi          # pre-decrementing copy loop
+        subl $4,%edi
+        movl (%esi),%eax
+        movl %eax,(%edi)
+        loop 15b
+16:     movl %edi,%esp        # final %edi is top of merged stack
+        jmp  11b
+         
+critical_fixup_table:        
+        .byte 0x00,0x00                       # cmpl %eax,(%esi)
+        .byte 0x00,0x00                       # jne  14f
+        .byte 0x00                            # pop  %ebx
+        .byte 0x04                            # pop  %ecx
+        .byte 0x08                            # pop  %edx
+        .byte 0x0c                            # pop  %esi
+        .byte 0x10                            # pop  %edi
+        .byte 0x14                            # pop  %ebp
+        .byte 0x18                            # pop  %eax
+        .byte 0x1c                            # pop  %ds
+        .byte 0x20                            # pop  %es
+        .byte 0x24,0x24,0x24                  # add  $4,%esp
+        .byte 0x28                            # iret
+        .byte 0x00,0x00,0x00,0x00,0x00        # btrl $31,4(%esi)
+        .byte 0x00,0x00                       # jmp  11b
+       
+# Hypervisor uses this for application faults while it executes.
+ENTRY(failsafe_callback)
+1:      pop  %ds
+2:      pop  %es
+3:      pop  %fs
+4:      pop  %gs
+5:      iret
+.section .fixup,"ax";	\
+6:	movl $0,(%esp);	\
+	jmp 1b;		\
+7:	movl $0,(%esp);	\
+	jmp 2b;		\
+8:	movl $0,(%esp);	\
+	jmp 3b;		\
+9:	movl $0,(%esp);	\
+	jmp 4b;		\
+10:	pushl %ss;	\
+	popl %ds;	\
+	pushl %ss;	\
+	popl %es;	\
+	pushl $11;	\
+	call do_exit;	\
+.previous;		\
+.section __ex_table,"a";\
+	.align 4;	\
+	.long 1b,6b;	\
+	.long 2b,7b;	\
+	.long 3b,8b;	\
+	.long 4b,9b;	\
+	.long 5b,10b;	\
+.previous
+                
+ENTRY(coprocessor_error)
+	pushl $0
+	pushl $ SYMBOL_NAME(do_coprocessor_error)
+	jmp error_code
+
+ENTRY(simd_coprocessor_error)
+	pushl $0
+	pushl $ SYMBOL_NAME(do_simd_coprocessor_error)
+	jmp error_code
+
+ENTRY(device_not_available)
+	pushl $-1		# mark this as an int
+	SAVE_ALL
+	#call SYMBOL_NAME(math_state_restore)
+	jmp ret_from_exception
+
+ENTRY(debug)
+	pushl $0
+	pushl $ SYMBOL_NAME(do_debug)
+	jmp error_code
+
+ENTRY(int3)
+	pushl $0
+	pushl $ SYMBOL_NAME(do_int3)
+	jmp error_code
+
+ENTRY(overflow)
+	pushl $0
+	pushl $ SYMBOL_NAME(do_overflow)
+	jmp error_code
+
+ENTRY(bounds)
+	pushl $0
+	pushl $ SYMBOL_NAME(do_bounds)
+	jmp error_code
+
+ENTRY(invalid_op)
+	pushl $0
+	pushl $ SYMBOL_NAME(do_invalid_op)
+	jmp error_code
+
+ENTRY(coprocessor_segment_overrun)
+	pushl $0
+	pushl $ SYMBOL_NAME(do_coprocessor_segment_overrun)
+	jmp error_code
+
+ENTRY(double_fault)
+	pushl $ SYMBOL_NAME(do_double_fault)
+	jmp error_code
+
+ENTRY(invalid_TSS)
+	pushl $ SYMBOL_NAME(do_invalid_TSS)
+	jmp error_code
+
+ENTRY(segment_not_present)
+	pushl $ SYMBOL_NAME(do_segment_not_present)
+	jmp error_code
+
+ENTRY(stack_segment)
+	pushl $ SYMBOL_NAME(do_stack_segment)
+	jmp error_code
+
+ENTRY(general_protection)
+	pushl $ SYMBOL_NAME(do_general_protection)
+	jmp error_code
+
+ENTRY(alignment_check)
+	pushl $ SYMBOL_NAME(do_alignment_check)
+	jmp error_code
+
+# This handler is special, because it gets an extra value on its stack,
+# which is the linear faulting address.
+ENTRY(page_fault)
+	pushl %ds
+	pushl %eax
+	xorl %eax,%eax
+	pushl %ebp
+	pushl %edi
+	pushl %esi
+	pushl %edx
+	decl %eax			# eax = -1
+	pushl %ecx
+	pushl %ebx
+	cld
+	movl %es,%ecx
+	movl ORIG_EAX(%esp), %esi	# get the error code
+	movl ES(%esp), %edi		# get the faulting address
+	movl %eax, ORIG_EAX(%esp)
+	movl %ecx, ES(%esp)
+	movl %esp,%edx
+        pushl %edi                      # push the faulting address
+	pushl %esi			# push the error code
+	pushl %edx			# push the pt_regs pointer
+	movl $(__KERNEL_DS),%edx
+	movl %edx,%ds
+	movl %edx,%es
+	call SYMBOL_NAME(do_page_fault)
+	addl $12,%esp
+	jmp ret_from_exception
+
+ENTRY(machine_check)
+	pushl $0
+	pushl $ SYMBOL_NAME(do_machine_check)
+	jmp error_code
+
+ENTRY(spurious_interrupt_bug)
+	pushl $0
+	pushl $ SYMBOL_NAME(do_spurious_interrupt_bug)
+	jmp error_code
diff --git a/mini-os/events.c b/mini-os/events.c
new file mode 100644
index 0000000000..a2083afa37
--- /dev/null
+++ b/mini-os/events.c
@@ -0,0 +1,106 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ *        File: events.c
+ *      Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ *     Changes: 
+ *              
+ *        Date: Jul 2003
+ * 
+ * Environment: Xen Minimal OS
+ * Description: Deal with events
+ *
+ ****************************************************************************
+ * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
+ ****************************************************************************
+ */
+
+#include <os.h>
+#include <hypervisor.h>
+#include <events.h>
+#include <lib.h>
+
+static ev_action_t ev_actions[NR_EVS];
+void default_handler(int ev, struct pt_regs *regs);
+
+
+/*
+ * demux events to different handlers
+ */
+asmlinkage unsigned int do_event(int ev, struct pt_regs *regs)
+{
+    ev_action_t  *action;
+
+    if (ev >= NR_EVS) {
+        printk("Large event number %d\n", ev);
+        return 0;
+    }
+
+    action = &ev_actions[ev];
+    action->count++;
+    ack_hypervisor_event(ev);
+
+    if (!action->handler)
+        goto out;
+    
+    if (action->status & EVS_DISABLED)
+        goto out;
+    
+    /* call the handler */
+    action->handler(ev, regs);
+    
+ out:
+    return 1;
+
+}
+
+/*
+ * add a handler
+ */
+unsigned int add_ev_action( int ev, void (*handler)(int, struct pt_regs *) )
+{
+    if (ev_actions[ev].handler) {
+        printk ("event[%d] already handled by %p", ev, ev_actions[ev].handler);
+        return 0;
+    }
+
+    ev_actions[ev].handler = handler;
+    return 1;
+}
+
+unsigned int enable_ev_action( int ev )
+{
+    if (!ev_actions[ev].handler) {
+        printk ("enable event[%d], no handler installed", ev);
+        return 0;
+    }
+    ev_actions[ev].status &= ~EVS_DISABLED;
+    return 1;
+}
+
+unsigned int disable_ev_action( int ev )
+{
+    ev_actions[ev].status |= EVS_DISABLED;
+    return 1;
+}
+
+/*
+ * initially all events are without a handler and disabled
+ */
+void init_events(void)
+{
+    int i;
+
+    /* inintialise event handler */
+    for ( i = 0; i < NR_EVS; i++ )
+    {
+        ev_actions[i].status  = EVS_DISABLED;
+        ev_actions[i].handler = NULL;
+    }
+}
+
+void default_handler(int ev, struct pt_regs *regs) {
+    printk("X[%d] ", ev);
+}
diff --git a/mini-os/h/events.h b/mini-os/h/events.h
new file mode 100644
index 0000000000..5166876b76
--- /dev/null
+++ b/mini-os/h/events.h
@@ -0,0 +1,53 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ *        File: events.h
+ *      Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ *     Changes: 
+ *              
+ *        Date: Jul 2003
+ * 
+ * Environment: Xen Minimal OS
+ * Description: deal with events
+ *
+ ****************************************************************************
+ * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $
+ ****************************************************************************
+ */
+
+#ifndef _EVENTS_H_
+#define _EVENTS_H_
+
+/* _EVENT_* are defined in hypervisor-if.h  */
+#define EV_BLKDEV _EVENT_BLKDEV
+#define EV_TIMER  _EVENT_TIMER
+#define EV_DIE    _EVENT_DIE
+#define EV_DEBUG  _EVENT_DEBUG
+#define EV_NET    _EVENT_NET
+#define EV_PS2    _EVENT_PS2
+
+#define NR_EVS (sizeof(HYPERVISOR_shared_info->events) * 8)
+
+/* ev handler status */
+#define EVS_INPROGRESS	1	/* Event handler active - do not enter! */
+#define EVS_DISABLED	2	/* Event disabled - do not enter! */
+#define EVS_PENDING	    4	/* Event pending - replay on enable */
+#define EVS_REPLAY	    8	/* Event has been replayed but not acked yet */
+
+/* this represents a event handler. Chaining or sharing is not allowed */
+typedef struct _ev_action_t {
+	void (*handler)(int, struct pt_regs *);
+    unsigned int status;		/* IRQ status */
+    u32 count;
+} ev_action_t;
+
+/* prototypes */
+unsigned int do_event(int ev, struct pt_regs *regs);
+unsigned int add_ev_action( int ev, void (*handler)(int, struct pt_regs *) );
+unsigned int enable_ev_action( int ev );
+unsigned int disable_ev_action( int ev );
+void init_events(void);
+
+#endif /* _EVENTS_H_ */
diff --git a/mini-os/h/hypervisor.h b/mini-os/h/hypervisor.h
new file mode 100644
index 0000000000..c0d340f339
--- /dev/null
+++ b/mini-os/h/hypervisor.h
@@ -0,0 +1,266 @@
+
+/******************************************************************************
+ * hypervisor.h
+ * 
+ * Linux-specific hypervisor handling.
+ * 
+ * Adjusted by R Neugebauer for Xen minimal OS
+ *
+ * Copyright (c) 2002, K A Fraser
+ */
+
+#ifndef _HYPERVISOR_H_
+#define _HYPERVISOR_H_
+
+#include <types.h>
+
+/* include the hypervisor interface */
+#include <hypervisor-ifs/network.h>
+#include <hypervisor-ifs/block.h>
+#include <hypervisor-ifs/hypervisor-if.h>
+
+
+/*
+ * a placeholder for the start of day information passed up from the hypervisor
+ */
+union start_info_union
+{
+    start_info_t start_info;
+    char padding[512];
+};
+extern union start_info_union start_info_union;
+#define start_info (start_info_union.start_info)
+
+
+/* hypervisor.c */
+void do_hypervisor_callback(struct pt_regs *regs);
+void enable_hypervisor_event(unsigned int ev);
+void disable_hypervisor_event(unsigned int ev);
+void ack_hypervisor_event(unsigned int ev);
+
+/*
+ * Assembler stubs for hyper-calls.
+ */
+
+static inline int HYPERVISOR_set_trap_table(trap_info_t *table)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_set_trap_table),
+        "b" (table) );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_pt_update(page_update_request_t *req, int count)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_pt_update), 
+        "b" (req), "c" (count) );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_console_write(const char *str, int count)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_console_write), 
+        "b" (str), "c" (count) );
+
+
+    return ret;
+}
+
+static inline int HYPERVISOR_set_gdt(unsigned long *frame_list, int entries)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_set_gdt), 
+        "b" (frame_list), "c" (entries) );
+
+
+    return ret;
+}
+
+static inline int HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_stack_switch),
+        "b" (ss), "c" (esp) : "memory" );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_set_callbacks(
+    unsigned long event_selector, unsigned long event_address,
+    unsigned long failsafe_selector, unsigned long failsafe_address)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_set_callbacks),
+        "b" (event_selector), "c" (event_address), 
+        "d" (failsafe_selector), "S" (failsafe_address) : "memory" );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_net_update(void)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_net_update) );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_fpu_taskswitch(void)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_fpu_taskswitch) );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_yield(void)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_yield) );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_exit(void)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_exit) );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_dom0_op(void *dom0_op)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_dom0_op),
+        "b" (dom0_op) : "memory" );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_network_op(void *network_op)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_network_op),
+        "b" (network_op) );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_block_io_op(void)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_block_io_op) ); 
+
+    return ret;
+}
+
+static inline int HYPERVISOR_set_debugreg(int reg, unsigned long value)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_set_debugreg),
+        "b" (reg), "c" (value) );
+
+    return ret;
+}
+
+static inline unsigned long HYPERVISOR_get_debugreg(int reg)
+{
+    unsigned long ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_get_debugreg),
+        "b" (reg) );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_update_descriptor(
+    unsigned long pa, unsigned long word1, unsigned long word2)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_update_descriptor), 
+        "b" (pa), "c" (word1), "d" (word2) );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_set_fast_trap(int idx)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_set_fast_trap), 
+        "b" (idx) );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_dom_mem_op(void *dom_mem_op)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_dom_mem_op),
+        "b" (dom_mem_op) : "memory" );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_multicall(void *call_list, int nr_calls)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_multicall),
+        "b" (call_list), "c" (nr_calls) : "memory" );
+
+    return ret;
+}
+
+static inline long HYPERVISOR_kbd_op(unsigned char op, unsigned char val)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_kbd_op),
+        "b" (op), "c" (val) );
+
+    return ret;
+}
+
+#endif /* __HYPERVISOR_H__ */
diff --git a/mini-os/h/lib.h b/mini-os/h/lib.h
new file mode 100644
index 0000000000..48140ab79d
--- /dev/null
+++ b/mini-os/h/lib.h
@@ -0,0 +1,129 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ *        File: lib.h
+ *      Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ *     Changes: 
+ *              
+ *        Date: Aug 2003
+ * 
+ * Environment: Xen Minimal OS
+ * Description: Random useful library functions, contains some freebsd stuff
+ *
+ ****************************************************************************
+ * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $
+ ****************************************************************************
+ *
+ *-
+ * Copyright (c) 1991, 1993
+ *      The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by the University of
+ *      California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *      @(#)stdarg.h    8.1 (Berkeley) 6/10/93
+ * $FreeBSD: src/sys/i386/include/stdarg.h,v 1.10 1999/08/28 00:44:26 peter Exp $
+ */
+
+#ifndef _LIB_H_
+#define _LIB_H_
+
+
+/* variadic function support */
+typedef char *va_list;
+#define __va_size(type) \
+        (((sizeof(type) + sizeof(int) - 1) / sizeof(int)) * sizeof(int))
+#ifdef __GNUC__
+#define va_start(ap, last) \
+        ((ap) = (va_list)__builtin_next_arg(last))
+#else
+#define va_start(ap, last) \
+        ((ap) = (va_list)&(last) + __va_size(last))
+#endif
+#define va_arg(ap, type) \
+        (*(type *)((ap) += __va_size(type), (ap) - __va_size(type)))
+#define va_end(ap)
+
+
+/* printing */
+#define printk  printf
+#define kprintf printf
+int printf(const char *fmt, ...);
+int vprintf(const char *fmt, va_list ap);
+int sprintf(char *buf, const char *cfmt, ...);
+int vsprintf(char *buf, const char *cfmt, va_list ap);
+
+/* string and memory manipulation */
+int    memcmp(const void *cs, const void *ct, size_t count);
+void  *memcpy(void *dest, const void *src, size_t count);
+int    strncmp(const char *cs, const char *ct, size_t count);
+int    strcmp(const char *cs, const char *ct);
+char  *strcpy(char *dest, const char *src);
+char  *strncpy(char *dest, const char *src, size_t count);
+void  *memset(void *s,int c, size_t count);
+size_t strnlen(const char *s, size_t count);
+size_t strlen(const char *s);
+char  *strchr(const char *s, int c);
+char  *strstr(const char *s1, const char *s2);
+
+
+/* dlmalloc functions */
+struct mallinfo {
+  int arena;    /* non-mmapped space allocated from system */
+  int ordblks;  /* number of free chunks */
+  int smblks;   /* number of fastbin blocks */
+  int hblks;    /* number of mmapped regions */
+  int hblkhd;   /* space in mmapped regions */
+  int usmblks;  /* maximum total allocated space */
+  int fsmblks;  /* space available in freed fastbin blocks */
+  int uordblks; /* total allocated space */
+  int fordblks; /* total free space */
+  int keepcost; /* top-most, releasable (via malloc_trim) space */
+};
+
+void *malloc(size_t n);
+void *calloc(size_t n_elements, size_t element_size);
+void  free(void* p);
+void *realloc(void* p, size_t n);
+void *memalign(size_t alignment, size_t n);
+void *valloc(size_t n);
+struct mallinfo mallinfo();
+int  mallopt(int parameter_number, int parameter_value);
+
+void **independent_calloc(size_t n_elements, size_t size, void* chunks[]);
+void **independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
+void *pvalloc(size_t n);
+void cfree(void* p);
+int malloc_trim(size_t pad);
+size_t malloc_usable_size(void* p);
+void malloc_stats();
+
+
+#endif /* _LIB_H_ */
diff --git a/mini-os/h/list.h b/mini-os/h/list.h
new file mode 100644
index 0000000000..eec102405c
--- /dev/null
+++ b/mini-os/h/list.h
@@ -0,0 +1,164 @@
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
+
+#define ASSERT(x) ((void)0)
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+struct list_head {
+	struct list_head *next, *prev;
+};
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+	struct list_head name = LIST_HEAD_INIT(name)
+
+#define INIT_LIST_HEAD(ptr) do { \
+	(ptr)->next = (ptr); (ptr)->prev = (ptr); \
+} while (0)
+
+/*
+ * Insert a new entry between two known consecutive entries. 
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static __inline__ void __list_add(struct list_head * new,
+	struct list_head * prev,
+	struct list_head * next)
+{
+	next->prev = new;
+	new->next = next;
+	new->prev = prev;
+	prev->next = new;
+}
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static __inline__ void list_add(struct list_head *new, struct list_head *head)
+{
+	__list_add(new, head, head->next);
+}
+
+/**
+ * list_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static __inline__ void list_add_tail(struct list_head *new, struct list_head *head)
+{
+	__list_add(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static __inline__ void __list_del(struct list_head * prev,
+				  struct list_head * next)
+{
+	next->prev = prev;
+	prev->next = next;
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty on entry does not return true after this, the entry is in an undefined state.
+ */
+static __inline__ void list_del(struct list_head *entry)
+{
+	ASSERT(entry->next->prev == entry);
+	ASSERT(entry->prev->next == entry);
+	__list_del(entry->prev, entry->next);
+}
+
+/**
+ * list_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static __inline__ void list_del_init(struct list_head *entry)
+{
+	__list_del(entry->prev, entry->next);
+	INIT_LIST_HEAD(entry); 
+}
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static __inline__ int list_empty(struct list_head *head)
+{
+	return head->next == head;
+}
+
+/**
+ * list_splice - join two lists
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static __inline__ void list_splice(struct list_head *list, struct list_head *head)
+{
+	struct list_head *first = list->next;
+
+	if (first != list) {
+		struct list_head *last = list->prev;
+		struct list_head *at = head->next;
+
+		first->prev = head;
+		head->next = first;
+
+		last->next = at;
+		at->prev = last;
+	}
+}
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr:	the &struct list_head pointer.
+ * @type:	the type of the struct this is embedded in.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_entry(ptr, type, member) \
+	((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+
+/**
+ * list_for_each	-	iterate over a list
+ * @pos:	the &struct list_head to use as a loop counter.
+ * @head:	the head for your list.
+ */
+#define list_for_each(pos, head) \
+	for (pos = (head)->next; pos != (head); pos = pos->next)
+        	
+/**
+ * list_for_each_safe	-	iterate over a list safe against removal of list entry
+ * @pos:	the &struct list_head to use as a loop counter.
+ * @n:		another &struct list_head to use as temporary storage
+ * @head:	the head for your list.
+ */
+#define list_for_each_safe(pos, n, head) \
+	for (pos = (head)->next, n = pos->next; pos != (head); \
+		pos = n, n = pos->next)
+
+#endif
diff --git a/mini-os/h/mm.h b/mini-os/h/mm.h
new file mode 100644
index 0000000000..9cc2271f91
--- /dev/null
+++ b/mini-os/h/mm.h
@@ -0,0 +1,107 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ *        File: mm.h
+ *      Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ *     Changes: 
+ *              
+ *        Date: Aug 2003
+ * 
+ * Environment: 
+ * Description: 
+ *
+ ****************************************************************************
+ * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $
+ ****************************************************************************
+ */
+
+#ifndef _MM_H_
+#define _MM_H_
+
+/* PAGE_SHIFT determines the page size */
+#define PAGE_SHIFT      12
+#define PAGE_SIZE       (1UL << PAGE_SHIFT)
+#define PAGE_MASK       (~(PAGE_SIZE-1))
+
+
+#define PFN_UP(x)	(((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
+#define PFN_DOWN(x)	((x) >> PAGE_SHIFT)
+#define PFN_PHYS(x)	((x) << PAGE_SHIFT)
+
+
+/* to align the pointer to the (next) page boundary */
+#define PAGE_ALIGN(addr)        (((addr)+PAGE_SIZE-1)&PAGE_MASK)
+
+
+extern unsigned long *phys_to_machine_mapping;
+#define pfn_to_mfn(_pfn) (phys_to_machine_mapping[(_pfn)])
+#define mfn_to_pfn(_mfn) (machine_to_phys_mapping[(_mfn)])
+static inline unsigned long phys_to_machine(unsigned long phys)
+{
+    unsigned long machine = pfn_to_mfn(phys >> PAGE_SHIFT);
+    machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK);
+    return machine;
+}
+static inline unsigned long machine_to_phys(unsigned long machine)
+{
+    unsigned long phys = mfn_to_pfn(machine >> PAGE_SHIFT);
+    phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK);
+    return phys;
+}
+
+/* VIRT <-> MACHINE conversion */
+#define virt_to_machine(_a) (phys_to_machine(__pa(_a)))
+#define machine_to_virt(_m) (__va(machine_to_phys(_m)))
+
+/*
+ * This handles the memory map.. We could make this a config
+ * option, but too many people screw it up, and too few need
+ * it.
+ *
+ * A __PAGE_OFFSET of 0xC0000000 means that the kernel has
+ * a virtual address space of one gigabyte, which limits the
+ * amount of physical memory you can use to about 950MB. 
+ *
+ * If you want more physical memory than this then see the CONFIG_HIGHMEM4G
+ * and CONFIG_HIGHMEM64G options in the kernel configuration.
+ */
+
+#define __PAGE_OFFSET           (0xC0000000)
+
+#define PAGE_OFFSET             ((unsigned long)__PAGE_OFFSET)
+#define __pa(x)                 ((unsigned long)(x)-PAGE_OFFSET)
+#define __va(x)                 ((void *)((unsigned long)(x)+PAGE_OFFSET))
+#define virt_to_page(kaddr)     (mem_map + (__pa(kaddr) >> PAGE_SHIFT))
+#define VALID_PAGE(page)        ((page - mem_map) < max_mapnr)
+
+#define VM_DATA_DEFAULT_FLAGS   (VM_READ | VM_WRITE | VM_EXEC | \
+                                 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+
+/* prototypes */
+void init_mm();
+void release_bytes_to_allocator(unsigned long min, unsigned long max);
+unsigned long __get_free_pages(int order);
+void __free_pages(unsigned long p, int order);
+#define get_free_pages(_o) (__get_free_pages(_o))
+#define get_free_page() (__get_free_pages(0))
+#define free_pages(_p,_o) (__free_pages(_p,_o))
+#define free_page(_p) (__free_pages(_p,0))
+
+static __inline__ int get_order(unsigned long size)
+{
+    int order;
+    
+    size = (size-1) >> (PAGE_SHIFT-1);
+    order = -1;
+    do {
+        size >>= 1;
+        order++;
+    } while (size);
+    return order;
+}
+
+
+#endif /* _MM_H_ */
diff --git a/mini-os/h/os.h b/mini-os/h/os.h
new file mode 100644
index 0000000000..2645bea954
--- /dev/null
+++ b/mini-os/h/os.h
@@ -0,0 +1,270 @@
+/******************************************************************************
+ * os.h
+ * 
+ * random collection of macros and definition
+ */
+
+#ifndef _OS_H_
+#define _OS_H_
+
+
+#define NULL 0
+
+/*
+ * These are the segment descriptors provided for us by the hypervisor.
+ * For now, these are hardwired -- guest OSes cannot update the GDT
+ * or LDT.
+ * 
+ * It shouldn't be hard to support descriptor-table frobbing -- let me 
+ * know if the BSD or XP ports require flexibility here.
+ */
+
+
+/*
+ * these are also defined in hypervisor-if.h but can't be pulled in as
+ * they are used in start of day assembly. Need to clean up the .h files
+ * a bit more...
+ */
+
+#ifndef FLAT_RING1_CS
+#define FLAT_RING1_CS		0x0819
+#define FLAT_RING1_DS		0x0821
+#define FLAT_RING3_CS		0x082b
+#define FLAT_RING3_DS		0x0833
+#endif
+
+#define __KERNEL_CS        FLAT_RING1_CS
+#define __KERNEL_DS        FLAT_RING1_DS
+
+/* Everything below this point is not included by assembler (.S) files. */
+#ifndef __ASSEMBLY__
+
+#include <types.h>
+#include <hypervisor-ifs/hypervisor-if.h>
+
+
+/* this struct defines the way the registers are stored on the 
+   stack during an exception or interrupt. */
+struct pt_regs {
+	long ebx;
+	long ecx;
+	long edx;
+	long esi;
+	long edi;
+	long ebp;
+	long eax;
+	int  xds;
+	int  xes;
+	long orig_eax;
+	long eip;
+	int  xcs;
+	long eflags;
+	long esp;
+	int  xss;
+};
+
+
+/*
+ * STI/CLI equivalents. These basically set and clear the virtual
+ * event_enable flag in teh shared_info structure. Note that when
+ * the enable bit is set, there may be pending events to be handled.
+ * We may therefore call into do_hypervisor_callback() directly.
+ */
+#define unlikely(x)  __builtin_expect((x),0)
+#define __save_flags(x)                                                       \
+do {                                                                          \
+    (x) = test_bit(EVENTS_MASTER_ENABLE_BIT,                                  \
+                   &HYPERVISOR_shared_info->events_mask);                     \
+    barrier();                                                                \
+} while (0)
+
+#define __restore_flags(x)                                                    \
+do {                                                                          \
+    shared_info_t *_shared = HYPERVISOR_shared_info;                          \
+    if (x) set_bit(EVENTS_MASTER_ENABLE_BIT, &_shared->events_mask);          \
+    barrier();                                                                \
+    if ( unlikely(_shared->events) && (x) ) do_hypervisor_callback(NULL);     \
+} while (0)
+
+#define __cli()                                                               \
+do {                                                                          \
+    clear_bit(EVENTS_MASTER_ENABLE_BIT, &HYPERVISOR_shared_info->events_mask);\
+    barrier();                                                                \
+} while (0)
+
+#define __sti()                                                               \
+do {                                                                          \
+    shared_info_t *_shared = HYPERVISOR_shared_info;                          \
+    set_bit(EVENTS_MASTER_ENABLE_BIT, &_shared->events_mask);                 \
+    barrier();                                                                \
+    if ( unlikely(_shared->events) ) do_hypervisor_callback(NULL);            \
+} while (0)
+#define cli() __cli()
+#define sti() __sti()
+#define save_flags(x) __save_flags(x)
+#define restore_flags(x) __restore_flags(x)
+#define save_and_cli(x) __save_and_cli(x)
+#define save_and_sti(x) __save_and_sti(x)
+
+
+
+/* This is a barrier for the compiler only, NOT the processor! */
+#define barrier() __asm__ __volatile__("": : :"memory")
+
+#define LOCK_PREFIX ""
+#define LOCK ""
+#define ADDR (*(volatile long *) addr)
+/*
+ * Make sure gcc doesn't try to be clever and move things around
+ * on us. We need to use _exactly_ the address the user gave us,
+ * not some alias that contains the same information.
+ */
+typedef struct { volatile int counter; } atomic_t;
+
+
+/*
+ * This XCHG macro is straight from Linux. It is gross.
+ */
+#define xchg(ptr,v) \
+        ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
+struct __xchg_dummy { unsigned long a[100]; };
+#define __xg(x) ((struct __xchg_dummy *)(x))
+static inline unsigned long __xchg(unsigned long x, volatile void * ptr,
+                                   int size)
+{
+    switch (size) {
+    case 1:
+        __asm__ __volatile__("xchgb %b0,%1"
+                             :"=q" (x)
+                             :"m" (*__xg(ptr)), "0" (x)
+                             :"memory");
+        break;
+    case 2:
+        __asm__ __volatile__("xchgw %w0,%1"
+                             :"=r" (x)
+                             :"m" (*__xg(ptr)), "0" (x)
+                             :"memory");
+        break;
+    case 4:
+        __asm__ __volatile__("xchgl %0,%1"
+                             :"=r" (x)
+                             :"m" (*__xg(ptr)), "0" (x)
+                             :"memory");
+        break;
+    }
+    return x;
+}
+
+/**
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.  
+ * It also implies a memory barrier.
+ */
+static __inline__ int test_and_clear_bit(int nr, volatile void * addr)
+{
+        int oldbit;
+
+        __asm__ __volatile__( LOCK_PREFIX
+                "btrl %2,%1\n\tsbbl %0,%0"
+                :"=r" (oldbit),"=m" (ADDR)
+                :"Ir" (nr) : "memory");
+        return oldbit;
+}
+
+static __inline__ int constant_test_bit(int nr, const volatile void * addr)
+{
+    return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
+}
+
+static __inline__ int variable_test_bit(int nr, volatile void * addr)
+{
+    int oldbit;
+    
+    __asm__ __volatile__(
+        "btl %2,%1\n\tsbbl %0,%0"
+        :"=r" (oldbit)
+        :"m" (ADDR),"Ir" (nr));
+    return oldbit;
+}
+
+#define test_bit(nr,addr) \
+(__builtin_constant_p(nr) ? \
+ constant_test_bit((nr),(addr)) : \
+ variable_test_bit((nr),(addr)))
+
+
+/**
+ * set_bit - Atomically set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * This function is atomic and may not be reordered.  See __set_bit()
+ * if you do not require the atomic guarantees.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static __inline__ void set_bit(int nr, volatile void * addr)
+{
+        __asm__ __volatile__( LOCK_PREFIX
+                "btsl %1,%0"
+                :"=m" (ADDR)
+                :"Ir" (nr));
+}
+
+/**
+ * clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit() is atomic and may not be reordered.  However, it does
+ * not contain a memory barrier, so if it is used for locking purposes,
+ * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
+ * in order to ensure changes are visible on other processors.
+ */
+static __inline__ void clear_bit(int nr, volatile void * addr)
+{
+        __asm__ __volatile__( LOCK_PREFIX
+                "btrl %1,%0"
+                :"=m" (ADDR)
+                :"Ir" (nr));
+}
+
+/**
+ * atomic_inc - increment atomic variable
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically increments @v by 1.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+static __inline__ void atomic_inc(atomic_t *v)
+{
+        __asm__ __volatile__(
+                LOCK "incl %0"
+                :"=m" (v->counter)
+                :"m" (v->counter));
+}
+
+
+/* useful hypervisor macros */
+
+struct desc_struct {
+        unsigned long a,b;
+};
+extern struct desc_struct default_ldt[];
+
+#define asmlinkage        __attribute__((regparm(0)))
+
+/*
+ * some random linux macros
+ */
+
+#define rdtscll(val) \
+     __asm__ __volatile__("rdtsc" : "=A" (val))
+
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _OS_H_ */
diff --git a/mini-os/h/time.h b/mini-os/h/time.h
new file mode 100644
index 0000000000..b136f4b0c3
--- /dev/null
+++ b/mini-os/h/time.h
@@ -0,0 +1,59 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ *        File: time.h
+ *      Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ *     Changes: 
+ *              
+ *        Date: Jul 2003
+ * 
+ * Environment: Xen Minimal OS
+ * Description: Time and timer functions
+ *
+ ****************************************************************************
+ * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $
+ ****************************************************************************
+ */
+
+#ifndef _TIME_H_
+#define _TIME_H_
+
+#include <list.h>
+
+/*
+ * System Time
+ * 64 bit value containing the nanoseconds elapsed since boot time.
+ * This value is adjusted by frequency drift.
+ * NOW() returns the current time.
+ * The other macros are for convenience to approximate short intervals
+ * of real time into system time 
+ */
+typedef s64 s_time_t;
+#define NOW()                   ((s_time_t)get_s_time())
+#define SECONDS(_s)             (((s_time_t)(_s))  * 1000000000UL )
+#define TENTHS(_ts)             (((s_time_t)(_ts)) * 100000000UL )
+#define HUNDREDTHS(_hs)         (((s_time_t)(_hs)) * 10000000UL )
+#define MILLISECS(_ms)          (((s_time_t)(_ms)) * 1000000UL )
+#define MICROSECS(_us)          (((s_time_t)(_us)) * 1000UL )
+#define Time_Max                ((s_time_t) 0x7fffffffffffffffLL)
+#define FOREVER                 Time_Max
+
+
+/* wall clock time  */
+typedef long time_t;
+typedef long suseconds_t;
+struct timeval {
+	time_t		tv_sec;		/* seconds */
+	suseconds_t	tv_usec;	/* microseconds */
+};
+
+
+/* prototypes */
+void     init_time(void);
+s_time_t get_s_time(void);
+s_time_t get_v_time(void);
+void     gettimeofday(struct timeval *tv);
+
+#endif /* _TIME_H_ */
diff --git a/mini-os/h/types.h b/mini-os/h/types.h
new file mode 100644
index 0000000000..4f87a74e99
--- /dev/null
+++ b/mini-os/h/types.h
@@ -0,0 +1,41 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ *        File: types.h
+ *      Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ *     Changes: 
+ *              
+ *        Date: May 2003
+ * 
+ * Environment: Xeno Minimal OS
+ * Description: a random collection of type definitions
+ *
+ ****************************************************************************
+ * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $
+ ****************************************************************************
+ */
+
+#ifndef _TYPES_H_
+#define _TYPES_H_
+
+typedef signed char         s8;
+typedef unsigned char       u8;
+typedef signed short        s16;
+typedef unsigned short      u16;
+typedef signed int          s32;
+typedef unsigned int        u32;
+typedef signed long long    s64;
+typedef unsigned long long  u64;
+
+typedef unsigned int        size_t;
+
+/* FreeBSD compat types */
+typedef unsigned char       u_char;
+typedef unsigned int        u_int;
+typedef unsigned long       u_long;
+typedef long long           quad_t;
+typedef unsigned long long  u_quad_t;
+typedef unsigned int        uintptr_t;
+#endif /* _TYPES_H_ */
diff --git a/mini-os/head.S b/mini-os/head.S
new file mode 100644
index 0000000000..3f4e6670c3
--- /dev/null
+++ b/mini-os/head.S
@@ -0,0 +1,46 @@
+#include <os.h>
+
+/* Offsets in start_info structure */
+#define SHARED_INFO  4
+#define MOD_START   12
+#define MOD_LEN     16
+
+#define ENTRY(X) .globl X ; X :
+
+.globl _start                
+_start:
+        cld
+        
+        lss stack_start,%esp
+        
+        /* Copy any module somewhere safe before it's clobbered by BSS. */
+        mov  MOD_LEN(%esi),%ecx
+        shr  $2,%ecx
+        jz   2f        /* bail from copy loop if no module */
+        
+        mov  $_end,%edi
+        add  MOD_LEN(%esi),%edi
+        mov  MOD_START(%esi),%eax
+        add  MOD_LEN(%esi),%eax
+1:      sub  $4,%eax
+        sub  $4,%edi
+        mov  (%eax),%ebx
+        mov  %ebx,(%edi)
+        loop 1b
+        mov  %edi,MOD_START(%esi)
+
+        /* Clear BSS first so that there are no surprises... */
+2:      xorl %eax,%eax
+	    movl $__bss_start,%edi
+	    movl $_end,%ecx
+	    subl %edi,%ecx
+	    rep stosb
+
+        push %esi 
+        call start_kernel
+
+
+stack_start:
+	.long stack+8192, __KERNEL_DS
+
+
diff --git a/mini-os/hypervisor.c b/mini-os/hypervisor.c
new file mode 100644
index 0000000000..cf3349426e
--- /dev/null
+++ b/mini-os/hypervisor.c
@@ -0,0 +1,83 @@
+/******************************************************************************
+ * hypervisor.c
+ * 
+ * Communication to/from hypervisor.
+ *
+ * Copied from XenoLinux and adjusted by Rolf.Neugebauer@intel.com
+ * 
+ * Copyright (c) 2002, K A Fraser
+ */
+
+#include <os.h>
+#include <hypervisor.h>
+
+static unsigned long event_mask = 0;
+static unsigned long ev_err_count;
+
+void do_hypervisor_callback(struct pt_regs *regs)
+{
+    unsigned long events, flags;
+    shared_info_t *shared = HYPERVISOR_shared_info;
+
+    do {
+        /* Specialised local_irq_save(). */
+        flags = test_and_clear_bit(EVENTS_MASTER_ENABLE_BIT, 
+                                   &shared->events_mask);
+        barrier();
+
+        events  = xchg(&shared->events, 0);
+        events &= event_mask;
+
+        /* 'events' now contains some pending events to handle. */
+        __asm__ __volatile__ (
+            "   push %1                            ;"
+            "   sub  $4,%%esp                      ;"
+            "   jmp  2f                            ;"
+            "1: btrl %%eax,%0                      ;" /* clear bit     */
+            "   mov  %%eax,(%%esp)                 ;"
+            "   call do_event                      ;" /* do_event(event) */
+            "2: bsfl %0,%%eax                      ;" /* %eax == bit # */
+            "   jnz  1b                            ;"
+            "   add  $8,%%esp                      ;"
+            /* we use %ebx because it is callee-saved */
+            : : "b" (events), "r" (regs)
+            /* clobbered by callback function calls */
+            : "eax", "ecx", "edx", "memory" ); 
+
+        /* Specialised local_irq_restore(). */
+        if ( flags ) set_bit(EVENTS_MASTER_ENABLE_BIT, &shared->events_mask);
+        barrier();
+    }
+    while ( shared->events );
+}
+
+
+
+/*
+ * Define interface to generic handling in irq.c
+ */
+
+void enable_hypervisor_event(unsigned int ev)
+{
+    set_bit(ev, &event_mask);
+    set_bit(ev, &HYPERVISOR_shared_info->events_mask);
+    if ( test_bit(EVENTS_MASTER_ENABLE_BIT, 
+                  &HYPERVISOR_shared_info->events_mask) )
+        do_hypervisor_callback(NULL);
+}
+
+void disable_hypervisor_event(unsigned int ev)
+{
+    clear_bit(ev, &event_mask);
+    clear_bit(ev, &HYPERVISOR_shared_info->events_mask);
+}
+
+void ack_hypervisor_event(unsigned int ev)
+{
+    if ( !(event_mask & (1<<ev)) )
+    {
+        //printk("Unexpected hypervisor event %d\n", ev);
+        atomic_inc((atomic_t *)&ev_err_count);
+    }
+    set_bit(ev, &HYPERVISOR_shared_info->events_mask);
+}
diff --git a/mini-os/kernel.c b/mini-os/kernel.c
new file mode 100644
index 0000000000..7c37604745
--- /dev/null
+++ b/mini-os/kernel.c
@@ -0,0 +1,115 @@
+/******************************************************************************
+ * kernel.c
+ * 
+ * Assorted crap goes here, including the initial C entry point, jumped at
+ * from head.S.
+ */
+
+#include <os.h>
+#include <hypervisor.h>
+#include <mm.h>
+#include <events.h>
+#include <time.h>
+#include <types.h>
+#include <lib.h>
+
+/*
+ * Shared page for communicating with the hypervisor.
+ * Events flags go here, for example.
+ */
+shared_info_t *HYPERVISOR_shared_info;
+
+/*
+ * This structure contains start-of-day info, such as pagetable base pointer,
+ * address of the shared_info structure, and things like that.
+ */
+union start_info_union start_info_union;
+
+/*
+ * Just allocate the kernel stack here. SS:ESP is set up to point here
+ * in head.S.
+ */
+char stack[8192];
+
+
+/* Assembler interface fns in entry.S. */
+void hypervisor_callback(void);
+void failsafe_callback(void);
+
+/* default exit event handler */
+static void exit_handler(int ev, struct pt_regs *regs);
+
+/*
+ * INITIAL C ENTRY POINT.
+ */
+void start_kernel(start_info_t *si)
+{
+    int i;
+
+    /* Copy the start_info struct to a globally-accessible area. */
+    memcpy(&start_info, si, sizeof(*si));
+
+    /* Grab the shared_info pointer and put it in a safe place. */
+    HYPERVISOR_shared_info = start_info.shared_info;
+
+    /* Set up event and failsafe callback addresses. */
+    HYPERVISOR_set_callbacks(
+        __KERNEL_CS, (unsigned long)hypervisor_callback,
+        __KERNEL_CS, (unsigned long)failsafe_callback);
+
+
+    /* ENABLE EVENT DELIVERY. This is disabled at start of day. */
+    __sti();
+    
+    /* print out some useful information  */
+    printk("Xeno Minimal OS!\n");
+    printk("start_info:   %p\n",  si);
+    printk("  nr_pages:   %lu",   si->nr_pages);
+    printk("  shared_inf: %p\n",  si->shared_info);
+    printk("  pt_base:    %p",    (void *)si->pt_base); 
+    printk("  mod_start:  0x%lx\n", si->mod_start);
+    printk("  mod_len:    %lu\n", si->mod_len); 
+    printk("  net_rings: ");
+    for (i = 0; i < MAX_DOMAIN_VIFS; i++) {
+        printk(" %lx", si->net_rings[i]);
+    }; printk("\n");
+    printk("  blk_ring:   0x%lx\n", si->blk_ring);
+    printk("  dom_id:     %d\n",  si->dom_id);
+    printk("  flags:      0x%lx\n", si->flags);
+    printk("  cmd_line:   %s\n",  si->cmd_line ? (const char *)si->cmd_line : "NULL");
+
+    /* init memory management */
+    init_mm();
+
+    /* set up events */
+    init_events();
+
+    /* install some handlers */
+    add_ev_action(EV_DIE, &exit_handler);
+    enable_ev_action(EV_DIE);
+    enable_hypervisor_event(EV_DIE);
+
+    /* init time and timers */
+    init_time();
+
+    /* do nothing */
+    for ( ; ; ) HYPERVISOR_yield();
+}
+
+
+/*
+ * do_exit: This is called whenever an IRET fails in entry.S.
+ * This will generally be because an application has got itself into
+ * a really bad state (probably a bad CS or SS). It must be killed.
+ * Of course, minimal OS doesn't have applications :-)
+ */
+
+void do_exit(void)
+{
+    printk("do_exit called!\n");
+    for ( ;; ) ;
+}
+static void exit_handler(int ev, struct pt_regs *regs) {
+    do_exit();
+}
+
diff --git a/mini-os/lib/malloc.c b/mini-os/lib/malloc.c
new file mode 100644
index 0000000000..003c086858
--- /dev/null
+++ b/mini-os/lib/malloc.c
@@ -0,0 +1,5700 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ *        File: malloc.c
+ *      Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ *     Changes: 
+ *              
+ *        Date: Aug 2003
+ * 
+ * Environment: Xen Minimal OS
+ * Description: Library functions, maloc at al
+ *
+ ****************************************************************************
+ * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
+ ****************************************************************************
+ */
+
+#include <os.h>
+#include <mm.h>
+#include <types.h>
+#include <lib.h>
+
+/* standard compile option */
+#define HAVE_MEMCOPY                1
+#define USE_MEMCPY                  1
+#undef  HAVE_MMAP
+#undef  MMAP_CLEARS
+#undef  HAVE_MREMAP
+#define malloc_getpagesize          PAGE_SIZE
+#undef  HAVE_USR_INCLUDE_MALLOC_H   
+#define LACKS_UNISTD_H              1
+#define LACKS_SYS_PARAM_H           1
+#define LACKS_SYS_MMAN_H            1
+#define LACKS_FCNTL_H               1
+
+
+/* page allocator interface */
+#define MORECORE             more_core
+#define MORECORE_CONTIGUOUS  0
+#define MORECORE_FAILURE     0
+#define MORECORE_CANNOT_TRIM 1
+
+static void *more_core(size_t n)
+{
+    static void *last;
+    unsigned long order, num_pages;
+    void *ret;
+
+    if (n == 0)
+        return last;
+    
+    /* get pages */
+    order = get_order(n);
+    ret = (void *)get_free_pages(order);
+
+    /* work out pointer to end of chunk */
+    if (ret) {
+        num_pages = 1 << order;
+        last = ret + (num_pages * PAGE_SIZE);
+    }
+    
+    //printk("malloc(%lu) -> o=%lu r=%p, l=%p", n, order, ret, last);
+
+    return ret;      
+}
+
+/* other options commented out below */
+#define __STD_C     1
+#define Void_t      void
+#define assert(x) ((void)0)
+
+#define CHUNK_SIZE_T unsigned long
+#define PTR_UINT unsigned long
+#define INTERNAL_SIZE_T size_t
+#define SIZE_SZ                (sizeof(INTERNAL_SIZE_T))
+#define MALLOC_ALIGNMENT       (2 * SIZE_SZ)
+#define MALLOC_ALIGN_MASK      (MALLOC_ALIGNMENT - 1)
+#define TRIM_FASTBINS  0
+
+#define M_MXFAST            1    
+#define DEFAULT_MXFAST     64
+#define M_TRIM_THRESHOLD       -1
+#define DEFAULT_TRIM_THRESHOLD (256 * 1024)
+#define M_TOP_PAD              -2
+#define DEFAULT_TOP_PAD        (0)
+#define M_MMAP_THRESHOLD      -3
+#define DEFAULT_MMAP_THRESHOLD (256 * 1024)
+#define M_MMAP_MAX             -4
+#define DEFAULT_MMAP_MAX       (0)
+#define MALLOC_FAILURE_ACTION   printf("malloc failure\n")
+
+#define cALLOc      public_cALLOc
+#define fREe        public_fREe
+#define cFREe       public_cFREe
+#define mALLOc      public_mALLOc
+#define mEMALIGn    public_mEMALIGn
+#define rEALLOc     public_rEALLOc
+#define vALLOc      public_vALLOc
+#define pVALLOc     public_pVALLOc
+#define mALLINFo    public_mALLINFo
+#define mALLOPt     public_mALLOPt
+#define mTRIm       public_mTRIm
+#define mSTATs      public_mSTATs
+#define mUSABLe     public_mUSABLe
+#define iCALLOc     public_iCALLOc
+#define iCOMALLOc   public_iCOMALLOc
+
+#define public_cALLOc    calloc
+#define public_fREe      free
+#define public_cFREe     cfree
+#define public_mALLOc    malloc
+#define public_mEMALIGn  memalign
+#define public_rEALLOc   realloc
+#define public_vALLOc    valloc
+#define public_pVALLOc   pvalloc
+#define public_mALLINFo  mallinfo
+#define public_mALLOPt   mallopt
+#define public_mTRIm     malloc_trim
+#define public_mSTATs    malloc_stats
+#define public_mUSABLe   malloc_usable_size
+#define public_iCALLOc   independent_calloc
+#define public_iCOMALLOc independent_comalloc
+
+
+/*
+  This is a version (aka dlmalloc) of malloc/free/realloc written by
+  Doug Lea and released to the public domain.  Use, modify, and
+  redistribute this code without permission or acknowledgement in any
+  way you wish.  Send questions, comments, complaints, performance
+  data, etc to dl@cs.oswego.edu
+
+* VERSION 2.7.2 Sat Aug 17 09:07:30 2002  Doug Lea  (dl at gee)
+
+   Note: There may be an updated version of this malloc obtainable at
+           ftp://gee.cs.oswego.edu/pub/misc/malloc.c
+         Check before installing!
+
+* Quickstart
+
+  This library is all in one file to simplify the most common usage:
+  ftp it, compile it (-O), and link it into another program. All
+  of the compile-time options default to reasonable values for use on
+  most unix platforms. Compile -DWIN32 for reasonable defaults on windows.
+  You might later want to step through various compile-time and dynamic
+  tuning options.
+
+  For convenience, an include file for code using this malloc is at:
+     ftp://gee.cs.oswego.edu/pub/misc/malloc-2.7.1.h
+  You don't really need this .h file unless you call functions not
+  defined in your system include files.  The .h file contains only the
+  excerpts from this file needed for using this malloc on ANSI C/C++
+  systems, so long as you haven't changed compile-time options about
+  naming and tuning parameters.  If you do, then you can create your
+  own malloc.h that does include all settings by cutting at the point
+  indicated below.
+
+* Why use this malloc?
+
+  This is not the fastest, most space-conserving, most portable, or
+  most tunable malloc ever written. However it is among the fastest
+  while also being among the most space-conserving, portable and tunable.
+  Consistent balance across these factors results in a good general-purpose
+  allocator for malloc-intensive programs.
+
+  The main properties of the algorithms are:
+  * For large (>= 512 bytes) requests, it is a pure best-fit allocator,
+    with ties normally decided via FIFO (i.e. least recently used).
+  * For small (<= 64 bytes by default) requests, it is a caching
+    allocator, that maintains pools of quickly recycled chunks.
+  * In between, and for combinations of large and small requests, it does
+    the best it can trying to meet both goals at once.
+  * For very large requests (>= 128KB by default), it relies on system
+    memory mapping facilities, if supported.
+
+  For a longer but slightly out of date high-level description, see
+     http://gee.cs.oswego.edu/dl/html/malloc.html
+
+  You may already by default be using a C library containing a malloc
+  that is  based on some version of this malloc (for example in
+  linux). You might still want to use the one in this file in order to
+  customize settings or to avoid overheads associated with library
+  versions.
+
+* Contents, described in more detail in "description of public routines" below.
+
+  Standard (ANSI/SVID/...)  functions:
+    malloc(size_t n);
+    calloc(size_t n_elements, size_t element_size);
+    free(Void_t* p);
+    realloc(Void_t* p, size_t n);
+    memalign(size_t alignment, size_t n);
+    valloc(size_t n);
+    mallinfo()
+    mallopt(int parameter_number, int parameter_value)
+
+  Additional functions:
+    independent_calloc(size_t n_elements, size_t size, Void_t* chunks[]);
+    independent_comalloc(size_t n_elements, size_t sizes[], Void_t* chunks[]);
+    pvalloc(size_t n);
+    cfree(Void_t* p);
+    malloc_trim(size_t pad);
+    malloc_usable_size(Void_t* p);
+    malloc_stats();
+
+* Vital statistics:
+
+  Supported pointer representation:       4 or 8 bytes
+  Supported size_t  representation:       4 or 8 bytes 
+       Note that size_t is allowed to be 4 bytes even if pointers are 8.
+       You can adjust this by defining INTERNAL_SIZE_T
+
+  Alignment:                              2 * sizeof(size_t) (default)
+       (i.e., 8 byte alignment with 4byte size_t). This suffices for
+       nearly all current machines and C compilers. However, you can
+       define MALLOC_ALIGNMENT to be wider than this if necessary.
+
+  Minimum overhead per allocated chunk:   4 or 8 bytes
+       Each malloced chunk has a hidden word of overhead holding size
+       and status information.
+
+  Minimum allocated size: 4-byte ptrs:  16 bytes    (including 4 overhead)
+                          8-byte ptrs:  24/32 bytes (including, 4/8 overhead)
+
+       When a chunk is freed, 12 (for 4byte ptrs) or 20 (for 8 byte
+       ptrs but 4 byte size) or 24 (for 8/8) additional bytes are
+       needed; 4 (8) for a trailing size field and 8 (16) bytes for
+       free list pointers. Thus, the minimum allocatable size is
+       16/24/32 bytes.
+
+       Even a request for zero bytes (i.e., malloc(0)) returns a
+       pointer to something of the minimum allocatable size.
+
+       The maximum overhead wastage (i.e., number of extra bytes
+       allocated than were requested in malloc) is less than or equal
+       to the minimum size, except for requests >= mmap_threshold that
+       are serviced via mmap(), where the worst case wastage is 2 *
+       sizeof(size_t) bytes plus the remainder from a system page (the
+       minimal mmap unit); typically 4096 or 8192 bytes.
+
+  Maximum allocated size:  4-byte size_t: 2^32 minus about two pages 
+                           8-byte size_t: 2^64 minus about two pages
+
+       It is assumed that (possibly signed) size_t values suffice to
+       represent chunk sizes. `Possibly signed' is due to the fact
+       that `size_t' may be defined on a system as either a signed or
+       an unsigned type. The ISO C standard says that it must be
+       unsigned, but a few systems are known not to adhere to this.
+       Additionally, even when size_t is unsigned, sbrk (which is by
+       default used to obtain memory from system) accepts signed
+       arguments, and may not be able to handle size_t-wide arguments
+       with negative sign bit.  Generally, values that would
+       appear as negative after accounting for overhead and alignment
+       are supported only via mmap(), which does not have this
+       limitation.
+
+       Requests for sizes outside the allowed range will perform an optional
+       failure action and then return null. (Requests may also
+       also fail because a system is out of memory.)
+
+  Thread-safety: NOT thread-safe unless USE_MALLOC_LOCK defined
+
+       When USE_MALLOC_LOCK is defined, wrappers are created to
+       surround every public call with either a pthread mutex or
+       a win32 spinlock (depending on WIN32). This is not
+       especially fast, and can be a major bottleneck.
+       It is designed only to provide minimal protection
+       in concurrent environments, and to provide a basis for
+       extensions.  If you are using malloc in a concurrent program,
+       you would be far better off obtaining ptmalloc, which is
+       derived from a version of this malloc, and is well-tuned for
+       concurrent programs. (See http://www.malloc.de) Note that
+       even when USE_MALLOC_LOCK is defined, you can can guarantee
+       full thread-safety only if no threads acquire memory through 
+       direct calls to MORECORE or other system-level allocators.
+
+  Compliance: I believe it is compliant with the 1997 Single Unix Specification
+       (See http://www.opennc.org). Also SVID/XPG, ANSI C, and probably 
+       others as well.
+
+* Synopsis of compile-time options:
+
+    People have reported using previous versions of this malloc on all
+    versions of Unix, sometimes by tweaking some of the defines
+    below. It has been tested most extensively on Solaris and
+    Linux. It is also reported to work on WIN32 platforms.
+    People also report using it in stand-alone embedded systems.
+
+    The implementation is in straight, hand-tuned ANSI C.  It is not
+    at all modular. (Sorry!)  It uses a lot of macros.  To be at all
+    usable, this code should be compiled using an optimizing compiler
+    (for example gcc -O3) that can simplify expressions and control
+    paths. (FAQ: some macros import variables as arguments rather than
+    declare locals because people reported that some debuggers
+    otherwise get confused.)
+
+    OPTION                     DEFAULT VALUE
+
+    Compilation Environment options:
+
+    __STD_C                    derived from C compiler defines
+    WIN32                      NOT defined
+    HAVE_MEMCPY                defined
+    USE_MEMCPY                 1 if HAVE_MEMCPY is defined
+    HAVE_MMAP                  defined as 1 
+    MMAP_CLEARS                1
+    HAVE_MREMAP                0 unless linux defined
+    malloc_getpagesize         derived from system #includes, or 4096 if not
+    HAVE_USR_INCLUDE_MALLOC_H  NOT defined
+    LACKS_UNISTD_H             NOT defined unless WIN32
+    LACKS_SYS_PARAM_H          NOT defined unless WIN32
+    LACKS_SYS_MMAN_H           NOT defined unless WIN32
+    LACKS_FCNTL_H              NOT defined
+
+    Changing default word sizes:
+
+    INTERNAL_SIZE_T            size_t
+    MALLOC_ALIGNMENT           2 * sizeof(INTERNAL_SIZE_T)
+    PTR_UINT                   unsigned long
+    CHUNK_SIZE_T               unsigned long
+
+    Configuration and functionality options:
+
+    USE_DL_PREFIX              NOT defined
+    USE_PUBLIC_MALLOC_WRAPPERS NOT defined
+    USE_MALLOC_LOCK            NOT defined
+    DEBUG                      NOT defined
+    REALLOC_ZERO_BYTES_FREES   NOT defined
+    MALLOC_FAILURE_ACTION      errno = ENOMEM, if __STD_C defined, else no-op
+    TRIM_FASTBINS              0
+    FIRST_SORTED_BIN_SIZE      512
+
+    Options for customizing MORECORE:
+
+    MORECORE                   sbrk
+    MORECORE_CONTIGUOUS        1 
+    MORECORE_CANNOT_TRIM       NOT defined
+    MMAP_AS_MORECORE_SIZE      (1024 * 1024) 
+
+    Tuning options that are also dynamically changeable via mallopt:
+
+    DEFAULT_MXFAST             64
+    DEFAULT_TRIM_THRESHOLD     256 * 1024
+    DEFAULT_TOP_PAD            0
+    DEFAULT_MMAP_THRESHOLD     256 * 1024
+    DEFAULT_MMAP_MAX           65536
+
+    There are several other #defined constants and macros that you
+    probably don't want to touch unless you are extending or adapting malloc.
+*/
+
+/* RN: XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */
+#if 0
+
+/*
+  WIN32 sets up defaults for MS environment and compilers.
+  Otherwise defaults are for unix.
+*/
+
+/* #define WIN32 */
+
+#ifdef WIN32
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+/* Win32 doesn't supply or need the following headers */
+#define LACKS_UNISTD_H
+#define LACKS_SYS_PARAM_H
+#define LACKS_SYS_MMAN_H
+
+/* Use the supplied emulation of sbrk */
+#define MORECORE sbrk
+#define MORECORE_CONTIGUOUS 1
+#define MORECORE_FAILURE    ((void*)(-1))
+
+/* Use the supplied emulation of mmap and munmap */
+#define HAVE_MMAP 1
+#define MUNMAP_FAILURE  (-1)
+#define MMAP_CLEARS 1
+
+/* These values don't really matter in windows mmap emulation */
+#define MAP_PRIVATE 1
+#define MAP_ANONYMOUS 2
+#define PROT_READ 1
+#define PROT_WRITE 2
+
+/* Emulation functions defined at the end of this file */
+
+/* If USE_MALLOC_LOCK, use supplied critical-section-based lock functions */
+#ifdef USE_MALLOC_LOCK
+static int slwait(int *sl);
+static int slrelease(int *sl);
+#endif
+
+static long getpagesize(void);
+static long getregionsize(void);
+static void *sbrk(long size);
+static void *mmap(void *ptr, long size, long prot, long type, long handle, long arg);
+static long munmap(void *ptr, long size);
+
+static void vminfo (unsigned long*free, unsigned long*reserved, unsigned long*committed);
+static int cpuinfo (int whole, unsigned long*kernel, unsigned long*user);
+
+#endif
+
+/*
+  __STD_C should be nonzero if using ANSI-standard C compiler, a C++
+  compiler, or a C compiler sufficiently close to ANSI to get away
+  with it.
+*/
+
+#ifndef __STD_C
+#if defined(__STDC__) || defined(_cplusplus)
+#define __STD_C     1
+#else
+#define __STD_C     0
+#endif 
+#endif /*__STD_C*/
+
+
+/*
+  Void_t* is the pointer type that malloc should say it returns
+*/
+
+#ifndef Void_t
+#if (__STD_C || defined(WIN32))
+#define Void_t      void
+#else
+#define Void_t      char
+#endif
+#endif /*Void_t*/
+
+#if __STD_C
+#include <stddef.h>   /* for size_t */
+#else
+#include <sys/types.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* define LACKS_UNISTD_H if your system does not have a <unistd.h>. */
+
+/* #define  LACKS_UNISTD_H */
+
+#ifndef LACKS_UNISTD_H
+#include <unistd.h>
+#endif
+
+/* define LACKS_SYS_PARAM_H if your system does not have a <sys/param.h>. */
+
+/* #define  LACKS_SYS_PARAM_H */
+
+
+#include <stdio.h>    /* needed for malloc_stats */
+#include <errno.h>    /* needed for optional MALLOC_FAILURE_ACTION */
+
+
+/*
+  Debugging:
+
+  Because freed chunks may be overwritten with bookkeeping fields, this
+  malloc will often die when freed memory is overwritten by user
+  programs.  This can be very effective (albeit in an annoying way)
+  in helping track down dangling pointers.
+
+  If you compile with -DDEBUG, a number of assertion checks are
+  enabled that will catch more memory errors. You probably won't be
+  able to make much sense of the actual assertion errors, but they
+  should help you locate incorrectly overwritten memory.  The
+  checking is fairly extensive, and will slow down execution
+  noticeably. Calling malloc_stats or mallinfo with DEBUG set will
+  attempt to check every non-mmapped allocated and free chunk in the
+  course of computing the summmaries. (By nature, mmapped regions
+  cannot be checked very much automatically.)
+
+  Setting DEBUG may also be helpful if you are trying to modify
+  this code. The assertions in the check routines spell out in more
+  detail the assumptions and invariants underlying the algorithms.
+
+  Setting DEBUG does NOT provide an automated mechanism for checking
+  that all accesses to malloced memory stay within their
+  bounds. However, there are several add-ons and adaptations of this
+  or other mallocs available that do this.
+*/
+
+#if DEBUG
+#include <assert.h>
+#else
+#define assert(x) ((void)0)
+#endif
+
+/*
+  The unsigned integer type used for comparing any two chunk sizes.
+  This should be at least as wide as size_t, but should not be signed.
+*/
+
+#ifndef CHUNK_SIZE_T
+#define CHUNK_SIZE_T unsigned long
+#endif
+
+/* 
+  The unsigned integer type used to hold addresses when they are are
+  manipulated as integers. Except that it is not defined on all
+  systems, intptr_t would suffice.
+*/
+#ifndef PTR_UINT
+#define PTR_UINT unsigned long
+#endif
+
+
+/*
+  INTERNAL_SIZE_T is the word-size used for internal bookkeeping
+  of chunk sizes.
+
+  The default version is the same as size_t.
+
+  While not strictly necessary, it is best to define this as an
+  unsigned type, even if size_t is a signed type. This may avoid some
+  artificial size limitations on some systems.
+
+  On a 64-bit machine, you may be able to reduce malloc overhead by
+  defining INTERNAL_SIZE_T to be a 32 bit `unsigned int' at the
+  expense of not being able to handle more than 2^32 of malloced
+  space. If this limitation is acceptable, you are encouraged to set
+  this unless you are on a platform requiring 16byte alignments. In
+  this case the alignment requirements turn out to negate any
+  potential advantages of decreasing size_t word size.
+
+  Implementors: Beware of the possible combinations of:
+     - INTERNAL_SIZE_T might be signed or unsigned, might be 32 or 64 bits,
+       and might be the same width as int or as long
+     - size_t might have different width and signedness as INTERNAL_SIZE_T
+     - int and long might be 32 or 64 bits, and might be the same width
+  To deal with this, most comparisons and difference computations
+  among INTERNAL_SIZE_Ts should cast them to CHUNK_SIZE_T, being
+  aware of the fact that casting an unsigned int to a wider long does
+  not sign-extend. (This also makes checking for negative numbers
+  awkward.) Some of these casts result in harmless compiler warnings
+  on some systems.
+*/
+
+#ifndef INTERNAL_SIZE_T
+#define INTERNAL_SIZE_T size_t
+#endif
+
+/* The corresponding word size */
+#define SIZE_SZ                (sizeof(INTERNAL_SIZE_T))
+
+
+
+/*
+  MALLOC_ALIGNMENT is the minimum alignment for malloc'ed chunks.
+  It must be a power of two at least 2 * SIZE_SZ, even on machines
+  for which smaller alignments would suffice. It may be defined as
+  larger than this though. Note however that code and data structures
+  are optimized for the case of 8-byte alignment.
+*/
+
+
+#ifndef MALLOC_ALIGNMENT
+#define MALLOC_ALIGNMENT       (2 * SIZE_SZ)
+#endif
+
+/* The corresponding bit mask value */
+#define MALLOC_ALIGN_MASK      (MALLOC_ALIGNMENT - 1)
+
+
+
+/*
+  REALLOC_ZERO_BYTES_FREES should be set if a call to
+  realloc with zero bytes should be the same as a call to free.
+  Some people think it should. Otherwise, since this malloc
+  returns a unique pointer for malloc(0), so does realloc(p, 0).
+*/
+
+/*   #define REALLOC_ZERO_BYTES_FREES */
+
+/*
+  TRIM_FASTBINS controls whether free() of a very small chunk can
+  immediately lead to trimming. Setting to true (1) can reduce memory
+  footprint, but will almost always slow down programs that use a lot
+  of small chunks.
+
+  Define this only if you are willing to give up some speed to more
+  aggressively reduce system-level memory footprint when releasing
+  memory in programs that use many small chunks.  You can get
+  essentially the same effect by setting MXFAST to 0, but this can
+  lead to even greater slowdowns in programs using many small chunks.
+  TRIM_FASTBINS is an in-between compile-time option, that disables
+  only those chunks bordering topmost memory from being placed in
+  fastbins.
+*/
+
+#ifndef TRIM_FASTBINS
+#define TRIM_FASTBINS  0
+#endif
+
+
+/*
+  USE_DL_PREFIX will prefix all public routines with the string 'dl'.
+  This is necessary when you only want to use this malloc in one part 
+  of a program, using your regular system malloc elsewhere.
+*/
+
+/* #define USE_DL_PREFIX */
+
+
+/*
+  USE_MALLOC_LOCK causes wrapper functions to surround each
+  callable routine with pthread mutex lock/unlock.
+
+  USE_MALLOC_LOCK forces USE_PUBLIC_MALLOC_WRAPPERS to be defined
+*/
+
+
+/* #define USE_MALLOC_LOCK */
+
+
+/*
+  If USE_PUBLIC_MALLOC_WRAPPERS is defined, every public routine is
+  actually a wrapper function that first calls MALLOC_PREACTION, then
+  calls the internal routine, and follows it with
+  MALLOC_POSTACTION. This is needed for locking, but you can also use
+  this, without USE_MALLOC_LOCK, for purposes of interception,
+  instrumentation, etc. It is a sad fact that using wrappers often
+  noticeably degrades performance of malloc-intensive programs.
+*/
+
+#ifdef USE_MALLOC_LOCK
+#define USE_PUBLIC_MALLOC_WRAPPERS
+#else
+/* #define USE_PUBLIC_MALLOC_WRAPPERS */
+#endif
+
+
+/* 
+   Two-phase name translation.
+   All of the actual routines are given mangled names.
+   When wrappers are used, they become the public callable versions.
+   When DL_PREFIX is used, the callable names are prefixed.
+*/
+
+#ifndef USE_PUBLIC_MALLOC_WRAPPERS
+#define cALLOc      public_cALLOc
+#define fREe        public_fREe
+#define cFREe       public_cFREe
+#define mALLOc      public_mALLOc
+#define mEMALIGn    public_mEMALIGn
+#define rEALLOc     public_rEALLOc
+#define vALLOc      public_vALLOc
+#define pVALLOc     public_pVALLOc
+#define mALLINFo    public_mALLINFo
+#define mALLOPt     public_mALLOPt
+#define mTRIm       public_mTRIm
+#define mSTATs      public_mSTATs
+#define mUSABLe     public_mUSABLe
+#define iCALLOc     public_iCALLOc
+#define iCOMALLOc   public_iCOMALLOc
+#endif
+
+#ifdef USE_DL_PREFIX
+#define public_cALLOc    dlcalloc
+#define public_fREe      dlfree
+#define public_cFREe     dlcfree
+#define public_mALLOc    dlmalloc
+#define public_mEMALIGn  dlmemalign
+#define public_rEALLOc   dlrealloc
+#define public_vALLOc    dlvalloc
+#define public_pVALLOc   dlpvalloc
+#define public_mALLINFo  dlmallinfo
+#define public_mALLOPt   dlmallopt
+#define public_mTRIm     dlmalloc_trim
+#define public_mSTATs    dlmalloc_stats
+#define public_mUSABLe   dlmalloc_usable_size
+#define public_iCALLOc   dlindependent_calloc
+#define public_iCOMALLOc dlindependent_comalloc
+#else /* USE_DL_PREFIX */
+#define public_cALLOc    calloc
+#define public_fREe      free
+#define public_cFREe     cfree
+#define public_mALLOc    malloc
+#define public_mEMALIGn  memalign
+#define public_rEALLOc   realloc
+#define public_vALLOc    valloc
+#define public_pVALLOc   pvalloc
+#define public_mALLINFo  mallinfo
+#define public_mALLOPt   mallopt
+#define public_mTRIm     malloc_trim
+#define public_mSTATs    malloc_stats
+#define public_mUSABLe   malloc_usable_size
+#define public_iCALLOc   independent_calloc
+#define public_iCOMALLOc independent_comalloc
+#endif /* USE_DL_PREFIX */
+
+
+/*
+  HAVE_MEMCPY should be defined if you are not otherwise using
+  ANSI STD C, but still have memcpy and memset in your C library
+  and want to use them in calloc and realloc. Otherwise simple
+  macro versions are defined below.
+
+  USE_MEMCPY should be defined as 1 if you actually want to
+  have memset and memcpy called. People report that the macro
+  versions are faster than libc versions on some systems.
+  
+  Even if USE_MEMCPY is set to 1, loops to copy/clear small chunks
+  (of <= 36 bytes) are manually unrolled in realloc and calloc.
+*/
+
+#define HAVE_MEMCPY
+
+#ifndef USE_MEMCPY
+#ifdef HAVE_MEMCPY
+#define USE_MEMCPY 1
+#else
+#define USE_MEMCPY 0
+#endif
+#endif
+
+
+#if (__STD_C || defined(HAVE_MEMCPY))
+
+#ifdef WIN32
+/* On Win32 memset and memcpy are already declared in windows.h */
+#else
+#if __STD_C
+void* memset(void*, int, size_t);
+void* memcpy(void*, const void*, size_t);
+#else
+Void_t* memset();
+Void_t* memcpy();
+#endif
+#endif
+#endif
+
+/*
+  MALLOC_FAILURE_ACTION is the action to take before "return 0" when
+  malloc fails to be able to return memory, either because memory is
+  exhausted or because of illegal arguments.
+  
+  By default, sets errno if running on STD_C platform, else does nothing.  
+*/
+
+#ifndef MALLOC_FAILURE_ACTION
+#if __STD_C
+#define MALLOC_FAILURE_ACTION \
+   errno = ENOMEM;
+
+#else
+#define MALLOC_FAILURE_ACTION
+#endif
+#endif
+
+/*
+  MORECORE-related declarations. By default, rely on sbrk
+*/
+
+
+#ifdef LACKS_UNISTD_H
+#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
+#if __STD_C
+extern Void_t*     sbrk(ptrdiff_t);
+#else
+extern Void_t*     sbrk();
+#endif
+#endif
+#endif
+
+/*
+  MORECORE is the name of the routine to call to obtain more memory
+  from the system.  See below for general guidance on writing
+  alternative MORECORE functions, as well as a version for WIN32 and a
+  sample version for pre-OSX macos.
+*/
+
+#ifndef MORECORE
+#define MORECORE sbrk
+#endif
+
+/*
+  MORECORE_FAILURE is the value returned upon failure of MORECORE
+  as well as mmap. Since it cannot be an otherwise valid memory address,
+  and must reflect values of standard sys calls, you probably ought not
+  try to redefine it.
+*/
+
+#ifndef MORECORE_FAILURE
+#define MORECORE_FAILURE (-1)
+#endif
+
+/*
+  If MORECORE_CONTIGUOUS is true, take advantage of fact that
+  consecutive calls to MORECORE with positive arguments always return
+  contiguous increasing addresses.  This is true of unix sbrk.  Even
+  if not defined, when regions happen to be contiguous, malloc will
+  permit allocations spanning regions obtained from different
+  calls. But defining this when applicable enables some stronger
+  consistency checks and space efficiencies. 
+*/
+
+#ifndef MORECORE_CONTIGUOUS
+#define MORECORE_CONTIGUOUS 1
+#endif
+
+/*
+  Define MORECORE_CANNOT_TRIM if your version of MORECORE
+  cannot release space back to the system when given negative
+  arguments. This is generally necessary only if you are using
+  a hand-crafted MORECORE function that cannot handle negative arguments.
+*/
+
+/* #define MORECORE_CANNOT_TRIM */
+
+
+/*
+  Define HAVE_MMAP as true to optionally make malloc() use mmap() to
+  allocate very large blocks.  These will be returned to the
+  operating system immediately after a free(). Also, if mmap
+  is available, it is used as a backup strategy in cases where
+  MORECORE fails to provide space from system.
+
+  This malloc is best tuned to work with mmap for large requests.
+  If you do not have mmap, operations involving very large chunks (1MB
+  or so) may be slower than you'd like.
+*/
+
+#ifndef HAVE_MMAP
+#define HAVE_MMAP 1
+#endif
+
+#if HAVE_MMAP
+/* 
+   Standard unix mmap using /dev/zero clears memory so calloc doesn't
+   need to.
+*/
+
+#ifndef MMAP_CLEARS
+#define MMAP_CLEARS 1
+#endif
+
+#else /* no mmap */
+#ifndef MMAP_CLEARS
+#define MMAP_CLEARS 0
+#endif
+#endif
+
+
+/* 
+   MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if
+   sbrk fails, and mmap is used as a backup (which is done only if
+   HAVE_MMAP).  The value must be a multiple of page size.  This
+   backup strategy generally applies only when systems have "holes" in
+   address space, so sbrk cannot perform contiguous expansion, but
+   there is still space available on system.  On systems for which
+   this is known to be useful (i.e. most linux kernels), this occurs
+   only when programs allocate huge amounts of memory.  Between this,
+   and the fact that mmap regions tend to be limited, the size should
+   be large, to avoid too many mmap calls and thus avoid running out
+   of kernel resources.
+*/
+
+#ifndef MMAP_AS_MORECORE_SIZE
+#define MMAP_AS_MORECORE_SIZE (1024 * 1024)
+#endif
+
+/*
+  Define HAVE_MREMAP to make realloc() use mremap() to re-allocate
+  large blocks.  This is currently only possible on Linux with
+  kernel versions newer than 1.3.77.
+*/
+
+#ifndef HAVE_MREMAP
+#ifdef linux
+#define HAVE_MREMAP 1
+#else
+#define HAVE_MREMAP 0
+#endif
+
+#endif /* HAVE_MMAP */
+
+
+/*
+  The system page size. To the extent possible, this malloc manages
+  memory from the system in page-size units.  Note that this value is
+  cached during initialization into a field of malloc_state. So even
+  if malloc_getpagesize is a function, it is only called once.
+
+  The following mechanics for getpagesize were adapted from bsd/gnu
+  getpagesize.h. If none of the system-probes here apply, a value of
+  4096 is used, which should be OK: If they don't apply, then using
+  the actual value probably doesn't impact performance.
+*/
+
+
+#ifndef malloc_getpagesize
+
+#ifndef LACKS_UNISTD_H
+#  include <unistd.h>
+#endif
+
+#  ifdef _SC_PAGESIZE         /* some SVR4 systems omit an underscore */
+#    ifndef _SC_PAGE_SIZE
+#      define _SC_PAGE_SIZE _SC_PAGESIZE
+#    endif
+#  endif
+
+#  ifdef _SC_PAGE_SIZE
+#    define malloc_getpagesize sysconf(_SC_PAGE_SIZE)
+#  else
+#    if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE)
+       extern size_t getpagesize();
+#      define malloc_getpagesize getpagesize()
+#    else
+#      ifdef WIN32 /* use supplied emulation of getpagesize */
+#        define malloc_getpagesize getpagesize() 
+#      else
+#        ifndef LACKS_SYS_PARAM_H
+#          include <sys/param.h>
+#        endif
+#        ifdef EXEC_PAGESIZE
+#          define malloc_getpagesize EXEC_PAGESIZE
+#        else
+#          ifdef NBPG
+#            ifndef CLSIZE
+#              define malloc_getpagesize NBPG
+#            else
+#              define malloc_getpagesize (NBPG * CLSIZE)
+#            endif
+#          else
+#            ifdef NBPC
+#              define malloc_getpagesize NBPC
+#            else
+#              ifdef PAGESIZE
+#                define malloc_getpagesize PAGESIZE
+#              else /* just guess */
+#                define malloc_getpagesize (4096) 
+#              endif
+#            endif
+#          endif
+#        endif
+#      endif
+#    endif
+#  endif
+#endif
+
+/*
+  This version of malloc supports the standard SVID/XPG mallinfo
+  routine that returns a struct containing usage properties and
+  statistics. It should work on any SVID/XPG compliant system that has
+  a /usr/include/malloc.h defining struct mallinfo. (If you'd like to
+  install such a thing yourself, cut out the preliminary declarations
+  as described above and below and save them in a malloc.h file. But
+  there's no compelling reason to bother to do this.)
+
+  The main declaration needed is the mallinfo struct that is returned
+  (by-copy) by mallinfo().  The SVID/XPG malloinfo struct contains a
+  bunch of fields that are not even meaningful in this version of
+  malloc.  These fields are are instead filled by mallinfo() with
+  other numbers that might be of interest.
+
+  HAVE_USR_INCLUDE_MALLOC_H should be set if you have a
+  /usr/include/malloc.h file that includes a declaration of struct
+  mallinfo.  If so, it is included; else an SVID2/XPG2 compliant
+  version is declared below.  These must be precisely the same for
+  mallinfo() to work.  The original SVID version of this struct,
+  defined on most systems with mallinfo, declares all fields as
+  ints. But some others define as unsigned long. If your system
+  defines the fields using a type of different width than listed here,
+  you must #include your system version and #define
+  HAVE_USR_INCLUDE_MALLOC_H.
+*/
+
+/* #define HAVE_USR_INCLUDE_MALLOC_H */
+
+#ifdef HAVE_USR_INCLUDE_MALLOC_H
+#include "/usr/include/malloc.h"
+#else
+
+/* SVID2/XPG mallinfo structure */
+
+struct mallinfo {
+  int arena;    /* non-mmapped space allocated from system */
+  int ordblks;  /* number of free chunks */
+  int smblks;   /* number of fastbin blocks */
+  int hblks;    /* number of mmapped regions */
+  int hblkhd;   /* space in mmapped regions */
+  int usmblks;  /* maximum total allocated space */
+  int fsmblks;  /* space available in freed fastbin blocks */
+  int uordblks; /* total allocated space */
+  int fordblks; /* total free space */
+  int keepcost; /* top-most, releasable (via malloc_trim) space */
+};
+
+/*
+  SVID/XPG defines four standard parameter numbers for mallopt,
+  normally defined in malloc.h.  Only one of these (M_MXFAST) is used
+  in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply,
+  so setting them has no effect. But this malloc also supports other
+  options in mallopt described below.
+*/
+#endif
+
+
+/* ---------- description of public routines ------------ */
+
+/*
+  malloc(size_t n)
+  Returns a pointer to a newly allocated chunk of at least n bytes, or null
+  if no space is available. Additionally, on failure, errno is
+  set to ENOMEM on ANSI C systems.
+
+  If n is zero, malloc returns a minumum-sized chunk. (The minimum
+  size is 16 bytes on most 32bit systems, and 24 or 32 bytes on 64bit
+  systems.)  On most systems, size_t is an unsigned type, so calls
+  with negative arguments are interpreted as requests for huge amounts
+  of space, which will often fail. The maximum supported value of n
+  differs across systems, but is in all cases less than the maximum
+  representable value of a size_t.
+*/
+#if __STD_C
+Void_t*  public_mALLOc(size_t);
+#else
+Void_t*  public_mALLOc();
+#endif
+
+/*
+  free(Void_t* p)
+  Releases the chunk of memory pointed to by p, that had been previously
+  allocated using malloc or a related routine such as realloc.
+  It has no effect if p is null. It can have arbitrary (i.e., bad!)
+  effects if p has already been freed.
+
+  Unless disabled (using mallopt), freeing very large spaces will
+  when possible, automatically trigger operations that give
+  back unused memory to the system, thus reducing program footprint.
+*/
+#if __STD_C
+void     public_fREe(Void_t*);
+#else
+void     public_fREe();
+#endif
+
+/*
+  calloc(size_t n_elements, size_t element_size);
+  Returns a pointer to n_elements * element_size bytes, with all locations
+  set to zero.
+*/
+#if __STD_C
+Void_t*  public_cALLOc(size_t, size_t);
+#else
+Void_t*  public_cALLOc();
+#endif
+
+/*
+  realloc(Void_t* p, size_t n)
+  Returns a pointer to a chunk of size n that contains the same data
+  as does chunk p up to the minimum of (n, p's size) bytes, or null
+  if no space is available. 
+
+  The returned pointer may or may not be the same as p. The algorithm
+  prefers extending p when possible, otherwise it employs the
+  equivalent of a malloc-copy-free sequence.
+
+  If p is null, realloc is equivalent to malloc.  
+
+  If space is not available, realloc returns null, errno is set (if on
+  ANSI) and p is NOT freed.
+
+  if n is for fewer bytes than already held by p, the newly unused
+  space is lopped off and freed if possible.  Unless the #define
+  REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of
+  zero (re)allocates a minimum-sized chunk.
+
+  Large chunks that were internally obtained via mmap will always
+  be reallocated using malloc-copy-free sequences unless
+  the system supports MREMAP (currently only linux).
+
+  The old unix realloc convention of allowing the last-free'd chunk
+  to be used as an argument to realloc is not supported.
+*/
+#if __STD_C
+Void_t*  public_rEALLOc(Void_t*, size_t);
+#else
+Void_t*  public_rEALLOc();
+#endif
+
+/*
+  memalign(size_t alignment, size_t n);
+  Returns a pointer to a newly allocated chunk of n bytes, aligned
+  in accord with the alignment argument.
+
+  The alignment argument should be a power of two. If the argument is
+  not a power of two, the nearest greater power is used.
+  8-byte alignment is guaranteed by normal malloc calls, so don't
+  bother calling memalign with an argument of 8 or less.
+
+  Overreliance on memalign is a sure way to fragment space.
+*/
+#if __STD_C
+Void_t*  public_mEMALIGn(size_t, size_t);
+#else
+Void_t*  public_mEMALIGn();
+#endif
+
+/*
+  valloc(size_t n);
+  Equivalent to memalign(pagesize, n), where pagesize is the page
+  size of the system. If the pagesize is unknown, 4096 is used.
+*/
+#if __STD_C
+Void_t*  public_vALLOc(size_t);
+#else
+Void_t*  public_vALLOc();
+#endif
+
+
+
+/*
+  mallopt(int parameter_number, int parameter_value)
+  Sets tunable parameters The format is to provide a
+  (parameter-number, parameter-value) pair.  mallopt then sets the
+  corresponding parameter to the argument value if it can (i.e., so
+  long as the value is meaningful), and returns 1 if successful else
+  0.  SVID/XPG/ANSI defines four standard param numbers for mallopt,
+  normally defined in malloc.h.  Only one of these (M_MXFAST) is used
+  in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply,
+  so setting them has no effect. But this malloc also supports four
+  other options in mallopt. See below for details.  Briefly, supported
+  parameters are as follows (listed defaults are for "typical"
+  configurations).
+
+  Symbol            param #   default    allowed param values
+  M_MXFAST          1         64         0-80  (0 disables fastbins)
+  M_TRIM_THRESHOLD -1         256*1024   any   (-1U disables trimming)
+  M_TOP_PAD        -2         0          any  
+  M_MMAP_THRESHOLD -3         256*1024   any   (or 0 if no MMAP support)
+  M_MMAP_MAX       -4         65536      any   (0 disables use of mmap)
+*/
+#if __STD_C
+int      public_mALLOPt(int, int);
+#else
+int      public_mALLOPt();
+#endif
+
+
+/*
+  mallinfo()
+  Returns (by copy) a struct containing various summary statistics:
+
+  arena:     current total non-mmapped bytes allocated from system 
+  ordblks:   the number of free chunks 
+  smblks:    the number of fastbin blocks (i.e., small chunks that
+               have been freed but not use resused or consolidated)
+  hblks:     current number of mmapped regions 
+  hblkhd:    total bytes held in mmapped regions 
+  usmblks:   the maximum total allocated space. This will be greater
+                than current total if trimming has occurred.
+  fsmblks:   total bytes held in fastbin blocks 
+  uordblks:  current total allocated space (normal or mmapped)
+  fordblks:  total free space 
+  keepcost:  the maximum number of bytes that could ideally be released
+               back to system via malloc_trim. ("ideally" means that
+               it ignores page restrictions etc.)
+
+  Because these fields are ints, but internal bookkeeping may
+  be kept as longs, the reported values may wrap around zero and 
+  thus be inaccurate.
+*/
+#if __STD_C
+struct mallinfo public_mALLINFo(void);
+#else
+struct mallinfo public_mALLINFo();
+#endif
+
+/*
+  independent_calloc(size_t n_elements, size_t element_size, Void_t* chunks[]);
+
+  independent_calloc is similar to calloc, but instead of returning a
+  single cleared space, it returns an array of pointers to n_elements
+  independent elements that can hold contents of size elem_size, each
+  of which starts out cleared, and can be independently freed,
+  realloc'ed etc. The elements are guaranteed to be adjacently
+  allocated (this is not guaranteed to occur with multiple callocs or
+  mallocs), which may also improve cache locality in some
+  applications.
+
+  The "chunks" argument is optional (i.e., may be null, which is
+  probably the most typical usage). If it is null, the returned array
+  is itself dynamically allocated and should also be freed when it is
+  no longer needed. Otherwise, the chunks array must be of at least
+  n_elements in length. It is filled in with the pointers to the
+  chunks.
+
+  In either case, independent_calloc returns this pointer array, or
+  null if the allocation failed.  If n_elements is zero and "chunks"
+  is null, it returns a chunk representing an array with zero elements
+  (which should be freed if not wanted).
+
+  Each element must be individually freed when it is no longer
+  needed. If you'd like to instead be able to free all at once, you
+  should instead use regular calloc and assign pointers into this
+  space to represent elements.  (In this case though, you cannot
+  independently free elements.)
+  
+  independent_calloc simplifies and speeds up implementations of many
+  kinds of pools.  It may also be useful when constructing large data
+  structures that initially have a fixed number of fixed-sized nodes,
+  but the number is not known at compile time, and some of the nodes
+  may later need to be freed. For example:
+
+  struct Node { int item; struct Node* next; };
+  
+  struct Node* build_list() {
+    struct Node** pool;
+    int n = read_number_of_nodes_needed();
+    if (n <= 0) return 0;
+    pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0);
+    if (pool == 0) die(); 
+    // organize into a linked list... 
+    struct Node* first = pool[0];
+    for (i = 0; i < n-1; ++i) 
+      pool[i]->next = pool[i+1];
+    free(pool);     // Can now free the array (or not, if it is needed later)
+    return first;
+  }
+*/
+#if __STD_C
+Void_t** public_iCALLOc(size_t, size_t, Void_t**);
+#else
+Void_t** public_iCALLOc();
+#endif
+
+/*
+  independent_comalloc(size_t n_elements, size_t sizes[], Void_t* chunks[]);
+
+  independent_comalloc allocates, all at once, a set of n_elements
+  chunks with sizes indicated in the "sizes" array.    It returns
+  an array of pointers to these elements, each of which can be
+  independently freed, realloc'ed etc. The elements are guaranteed to
+  be adjacently allocated (this is not guaranteed to occur with
+  multiple callocs or mallocs), which may also improve cache locality
+  in some applications.
+
+  The "chunks" argument is optional (i.e., may be null). If it is null
+  the returned array is itself dynamically allocated and should also
+  be freed when it is no longer needed. Otherwise, the chunks array
+  must be of at least n_elements in length. It is filled in with the
+  pointers to the chunks.
+
+  In either case, independent_comalloc returns this pointer array, or
+  null if the allocation failed.  If n_elements is zero and chunks is
+  null, it returns a chunk representing an array with zero elements
+  (which should be freed if not wanted).
+  
+  Each element must be individually freed when it is no longer
+  needed. If you'd like to instead be able to free all at once, you
+  should instead use a single regular malloc, and assign pointers at
+  particular offsets in the aggregate space. (In this case though, you 
+  cannot independently free elements.)
+
+  independent_comallac differs from independent_calloc in that each
+  element may have a different size, and also that it does not
+  automatically clear elements.
+
+  independent_comalloc can be used to speed up allocation in cases
+  where several structs or objects must always be allocated at the
+  same time.  For example:
+
+  struct Head { ... }
+  struct Foot { ... }
+
+  void send_message(char* msg) {
+    int msglen = strlen(msg);
+    size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) };
+    void* chunks[3];
+    if (independent_comalloc(3, sizes, chunks) == 0)
+      die();
+    struct Head* head = (struct Head*)(chunks[0]);
+    char*        body = (char*)(chunks[1]);
+    struct Foot* foot = (struct Foot*)(chunks[2]);
+    // ...
+  }
+
+  In general though, independent_comalloc is worth using only for
+  larger values of n_elements. For small values, you probably won't
+  detect enough difference from series of malloc calls to bother.
+
+  Overuse of independent_comalloc can increase overall memory usage,
+  since it cannot reuse existing noncontiguous small chunks that
+  might be available for some of the elements.
+*/
+#if __STD_C
+Void_t** public_iCOMALLOc(size_t, size_t*, Void_t**);
+#else
+Void_t** public_iCOMALLOc();
+#endif
+
+
+/*
+  pvalloc(size_t n);
+  Equivalent to valloc(minimum-page-that-holds(n)), that is,
+  round up n to nearest pagesize.
+ */
+#if __STD_C
+Void_t*  public_pVALLOc(size_t);
+#else
+Void_t*  public_pVALLOc();
+#endif
+
+/*
+  cfree(Void_t* p);
+  Equivalent to free(p).
+
+  cfree is needed/defined on some systems that pair it with calloc,
+  for odd historical reasons (such as: cfree is used in example 
+  code in the first edition of K&R).
+*/
+#if __STD_C
+void     public_cFREe(Void_t*);
+#else
+void     public_cFREe();
+#endif
+
+/*
+  malloc_trim(size_t pad);
+
+  If possible, gives memory back to the system (via negative
+  arguments to sbrk) if there is unused memory at the `high' end of
+  the malloc pool. You can call this after freeing large blocks of
+  memory to potentially reduce the system-level memory requirements
+  of a program. However, it cannot guarantee to reduce memory. Under
+  some allocation patterns, some large free blocks of memory will be
+  locked between two used chunks, so they cannot be given back to
+  the system.
+  
+  The `pad' argument to malloc_trim represents the amount of free
+  trailing space to leave untrimmed. If this argument is zero,
+  only the minimum amount of memory to maintain internal data
+  structures will be left (one page or less). Non-zero arguments
+  can be supplied to maintain enough trailing space to service
+  future expected allocations without having to re-obtain memory
+  from the system.
+  
+  Malloc_trim returns 1 if it actually released any memory, else 0.
+  On systems that do not support "negative sbrks", it will always
+  rreturn 0.
+*/
+#if __STD_C
+int      public_mTRIm(size_t);
+#else
+int      public_mTRIm();
+#endif
+
+/*
+  malloc_usable_size(Void_t* p);
+
+  Returns the number of bytes you can actually use in
+  an allocated chunk, which may be more than you requested (although
+  often not) due to alignment and minimum size constraints.
+  You can use this many bytes without worrying about
+  overwriting other allocated objects. This is not a particularly great
+  programming practice. malloc_usable_size can be more useful in
+  debugging and assertions, for example:
+
+  p = malloc(n);
+  assert(malloc_usable_size(p) >= 256);
+
+*/
+#if __STD_C
+size_t   public_mUSABLe(Void_t*);
+#else
+size_t   public_mUSABLe();
+#endif
+
+/*
+  malloc_stats();
+  Prints on stderr the amount of space obtained from the system (both
+  via sbrk and mmap), the maximum amount (which may be more than
+  current if malloc_trim and/or munmap got called), and the current
+  number of bytes allocated via malloc (or realloc, etc) but not yet
+  freed. Note that this is the number of bytes allocated, not the
+  number requested. It will be larger than the number requested
+  because of alignment and bookkeeping overhead. Because it includes
+  alignment wastage as being in use, this figure may be greater than
+  zero even when no user-level chunks are allocated.
+
+  The reported current and maximum system memory can be inaccurate if
+  a program makes other calls to system memory allocation functions
+  (normally sbrk) outside of malloc.
+
+  malloc_stats prints only the most commonly interesting statistics.
+  More information can be obtained by calling mallinfo.
+
+*/
+#if __STD_C
+void     public_mSTATs();
+#else
+void     public_mSTATs();
+#endif
+
+/* mallopt tuning options */
+
+/*
+  M_MXFAST is the maximum request size used for "fastbins", special bins
+  that hold returned chunks without consolidating their spaces. This
+  enables future requests for chunks of the same size to be handled
+  very quickly, but can increase fragmentation, and thus increase the
+  overall memory footprint of a program.
+
+  This malloc manages fastbins very conservatively yet still
+  efficiently, so fragmentation is rarely a problem for values less
+  than or equal to the default.  The maximum supported value of MXFAST
+  is 80. You wouldn't want it any higher than this anyway.  Fastbins
+  are designed especially for use with many small structs, objects or
+  strings -- the default handles structs/objects/arrays with sizes up
+  to 16 4byte fields, or small strings representing words, tokens,
+  etc. Using fastbins for larger objects normally worsens
+  fragmentation without improving speed.
+
+  M_MXFAST is set in REQUEST size units. It is internally used in
+  chunksize units, which adds padding and alignment.  You can reduce
+  M_MXFAST to 0 to disable all use of fastbins.  This causes the malloc
+  algorithm to be a closer approximation of fifo-best-fit in all cases,
+  not just for larger requests, but will generally cause it to be
+  slower.
+*/
+
+
+/* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */
+#ifndef M_MXFAST
+#define M_MXFAST            1    
+#endif
+
+#ifndef DEFAULT_MXFAST
+#define DEFAULT_MXFAST     64
+#endif
+
+
+/*
+  M_TRIM_THRESHOLD is the maximum amount of unused top-most memory
+  to keep before releasing via malloc_trim in free().
+
+  Automatic trimming is mainly useful in long-lived programs.
+  Because trimming via sbrk can be slow on some systems, and can
+  sometimes be wasteful (in cases where programs immediately
+  afterward allocate more large chunks) the value should be high
+  enough so that your overall system performance would improve by
+  releasing this much memory.
+
+  The trim threshold and the mmap control parameters (see below)
+  can be traded off with one another. Trimming and mmapping are
+  two different ways of releasing unused memory back to the
+  system. Between these two, it is often possible to keep
+  system-level demands of a long-lived program down to a bare
+  minimum. For example, in one test suite of sessions measuring
+  the XF86 X server on Linux, using a trim threshold of 128K and a
+  mmap threshold of 192K led to near-minimal long term resource
+  consumption.
+
+  If you are using this malloc in a long-lived program, it should
+  pay to experiment with these values.  As a rough guide, you
+  might set to a value close to the average size of a process
+  (program) running on your system.  Releasing this much memory
+  would allow such a process to run in memory.  Generally, it's
+  worth it to tune for trimming rather tham memory mapping when a
+  program undergoes phases where several large chunks are
+  allocated and released in ways that can reuse each other's
+  storage, perhaps mixed with phases where there are no such
+  chunks at all.  And in well-behaved long-lived programs,
+  controlling release of large blocks via trimming versus mapping
+  is usually faster.
+
+  However, in most programs, these parameters serve mainly as
+  protection against the system-level effects of carrying around
+  massive amounts of unneeded memory. Since frequent calls to
+  sbrk, mmap, and munmap otherwise degrade performance, the default
+  parameters are set to relatively high values that serve only as
+  safeguards.
+
+  The trim value must be greater than page size to have any useful
+  effect.  To disable trimming completely, you can set to 
+  (unsigned long)(-1)
+
+  Trim settings interact with fastbin (MXFAST) settings: Unless
+  TRIM_FASTBINS is defined, automatic trimming never takes place upon
+  freeing a chunk with size less than or equal to MXFAST. Trimming is
+  instead delayed until subsequent freeing of larger chunks. However,
+  you can still force an attempted trim by calling malloc_trim.
+
+  Also, trimming is not generally possible in cases where
+  the main arena is obtained via mmap.
+
+  Note that the trick some people use of mallocing a huge space and
+  then freeing it at program startup, in an attempt to reserve system
+  memory, doesn't have the intended effect under automatic trimming,
+  since that memory will immediately be returned to the system.
+*/
+
+#define M_TRIM_THRESHOLD       -1
+
+#ifndef DEFAULT_TRIM_THRESHOLD
+#define DEFAULT_TRIM_THRESHOLD (256 * 1024)
+#endif
+
+/*
+  M_TOP_PAD is the amount of extra `padding' space to allocate or
+  retain whenever sbrk is called. It is used in two ways internally:
+
+  * When sbrk is called to extend the top of the arena to satisfy
+  a new malloc request, this much padding is added to the sbrk
+  request.
+
+  * When malloc_trim is called automatically from free(),
+  it is used as the `pad' argument.
+
+  In both cases, the actual amount of padding is rounded
+  so that the end of the arena is always a system page boundary.
+
+  The main reason for using padding is to avoid calling sbrk so
+  often. Having even a small pad greatly reduces the likelihood
+  that nearly every malloc request during program start-up (or
+  after trimming) will invoke sbrk, which needlessly wastes
+  time.
+
+  Automatic rounding-up to page-size units is normally sufficient
+  to avoid measurable overhead, so the default is 0.  However, in
+  systems where sbrk is relatively slow, it can pay to increase
+  this value, at the expense of carrying around more memory than
+  the program needs.
+*/
+
+#define M_TOP_PAD              -2
+
+#ifndef DEFAULT_TOP_PAD
+#define DEFAULT_TOP_PAD        (0)
+#endif
+
+/*
+  M_MMAP_THRESHOLD is the request size threshold for using mmap()
+  to service a request. Requests of at least this size that cannot
+  be allocated using already-existing space will be serviced via mmap.
+  (If enough normal freed space already exists it is used instead.)
+
+  Using mmap segregates relatively large chunks of memory so that
+  they can be individually obtained and released from the host
+  system. A request serviced through mmap is never reused by any
+  other request (at least not directly; the system may just so
+  happen to remap successive requests to the same locations).
+
+  Segregating space in this way has the benefits that:
+
+   1. Mmapped space can ALWAYS be individually released back 
+      to the system, which helps keep the system level memory 
+      demands of a long-lived program low. 
+   2. Mapped memory can never become `locked' between
+      other chunks, as can happen with normally allocated chunks, which
+      means that even trimming via malloc_trim would not release them.
+   3. On some systems with "holes" in address spaces, mmap can obtain
+      memory that sbrk cannot.
+
+  However, it has the disadvantages that:
+
+   1. The space cannot be reclaimed, consolidated, and then
+      used to service later requests, as happens with normal chunks.
+   2. It can lead to more wastage because of mmap page alignment
+      requirements
+   3. It causes malloc performance to be more dependent on host
+      system memory management support routines which may vary in
+      implementation quality and may impose arbitrary
+      limitations. Generally, servicing a request via normal
+      malloc steps is faster than going through a system's mmap.
+
+  The advantages of mmap nearly always outweigh disadvantages for
+  "large" chunks, but the value of "large" varies across systems.  The
+  default is an empirically derived value that works well in most
+  systems.
+*/
+
+#define M_MMAP_THRESHOLD      -3
+
+#ifndef DEFAULT_MMAP_THRESHOLD
+#define DEFAULT_MMAP_THRESHOLD (256 * 1024)
+#endif
+
+/*
+  M_MMAP_MAX is the maximum number of requests to simultaneously
+  service using mmap. This parameter exists because
+. Some systems have a limited number of internal tables for
+  use by mmap, and using more than a few of them may degrade
+  performance.
+
+  The default is set to a value that serves only as a safeguard.
+  Setting to 0 disables use of mmap for servicing large requests.  If
+  HAVE_MMAP is not set, the default value is 0, and attempts to set it
+  to non-zero values in mallopt will fail.
+*/
+
+#define M_MMAP_MAX             -4
+
+#ifndef DEFAULT_MMAP_MAX
+#if HAVE_MMAP
+#define DEFAULT_MMAP_MAX       (65536)
+#else
+#define DEFAULT_MMAP_MAX       (0)
+#endif
+#endif
+
+#ifdef __cplusplus
+};  /* end of extern "C" */
+#endif
+
+
+/* RN XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */
+#endif 
+
+/* 
+  ========================================================================
+  To make a fully customizable malloc.h header file, cut everything
+  above this line, put into file malloc.h, edit to suit, and #include it 
+  on the next line, as well as in programs that use this malloc.
+  ========================================================================
+*/
+
+/* #include "malloc.h" */
+
+/* --------------------- public wrappers ---------------------- */
+
+#ifdef USE_PUBLIC_MALLOC_WRAPPERS
+
+/* Declare all routines as internal */
+#if __STD_C
+static Void_t*  mALLOc(size_t);
+static void     fREe(Void_t*);
+static Void_t*  rEALLOc(Void_t*, size_t);
+static Void_t*  mEMALIGn(size_t, size_t);
+static Void_t*  vALLOc(size_t);
+static Void_t*  pVALLOc(size_t);
+static Void_t*  cALLOc(size_t, size_t);
+static Void_t** iCALLOc(size_t, size_t, Void_t**);
+static Void_t** iCOMALLOc(size_t, size_t*, Void_t**);
+static void     cFREe(Void_t*);
+static int      mTRIm(size_t);
+static size_t   mUSABLe(Void_t*);
+static void     mSTATs();
+static int      mALLOPt(int, int);
+static struct mallinfo mALLINFo(void);
+#else
+static Void_t*  mALLOc();
+static void     fREe();
+static Void_t*  rEALLOc();
+static Void_t*  mEMALIGn();
+static Void_t*  vALLOc();
+static Void_t*  pVALLOc();
+static Void_t*  cALLOc();
+static Void_t** iCALLOc();
+static Void_t** iCOMALLOc();
+static void     cFREe();
+static int      mTRIm();
+static size_t   mUSABLe();
+static void     mSTATs();
+static int      mALLOPt();
+static struct mallinfo mALLINFo();
+#endif
+
+/*
+  MALLOC_PREACTION and MALLOC_POSTACTION should be
+  defined to return 0 on success, and nonzero on failure.
+  The return value of MALLOC_POSTACTION is currently ignored
+  in wrapper functions since there is no reasonable default
+  action to take on failure.
+*/
+
+
+#ifdef USE_MALLOC_LOCK
+
+#ifdef WIN32
+
+static int mALLOC_MUTEx;
+#define MALLOC_PREACTION   slwait(&mALLOC_MUTEx)
+#define MALLOC_POSTACTION  slrelease(&mALLOC_MUTEx)
+
+#else
+
+#include <pthread.h>
+
+static pthread_mutex_t mALLOC_MUTEx = PTHREAD_MUTEX_INITIALIZER;
+
+#define MALLOC_PREACTION   pthread_mutex_lock(&mALLOC_MUTEx)
+#define MALLOC_POSTACTION  pthread_mutex_unlock(&mALLOC_MUTEx)
+
+#endif /* USE_MALLOC_LOCK */
+
+#else
+
+/* Substitute anything you like for these */
+
+#define MALLOC_PREACTION   (0)
+#define MALLOC_POSTACTION  (0)
+
+#endif
+
+Void_t* public_mALLOc(size_t bytes) {
+  Void_t* m;
+  if (MALLOC_PREACTION != 0) {
+    return 0;
+  }
+  m = mALLOc(bytes);
+  if (MALLOC_POSTACTION != 0) {
+  }
+  return m;
+}
+
+void public_fREe(Void_t* m) {
+  if (MALLOC_PREACTION != 0) {
+    return;
+  }
+  fREe(m);
+  if (MALLOC_POSTACTION != 0) {
+  }
+}
+
+Void_t* public_rEALLOc(Void_t* m, size_t bytes) {
+  if (MALLOC_PREACTION != 0) {
+    return 0;
+  }
+  m = rEALLOc(m, bytes);
+  if (MALLOC_POSTACTION != 0) {
+  }
+  return m;
+}
+
+Void_t* public_mEMALIGn(size_t alignment, size_t bytes) {
+  Void_t* m;
+  if (MALLOC_PREACTION != 0) {
+    return 0;
+  }
+  m = mEMALIGn(alignment, bytes);
+  if (MALLOC_POSTACTION != 0) {
+  }
+  return m;
+}
+
+Void_t* public_vALLOc(size_t bytes) {
+  Void_t* m;
+  if (MALLOC_PREACTION != 0) {
+    return 0;
+  }
+  m = vALLOc(bytes);
+  if (MALLOC_POSTACTION != 0) {
+  }
+  return m;
+}
+
+Void_t* public_pVALLOc(size_t bytes) {
+  Void_t* m;
+  if (MALLOC_PREACTION != 0) {
+    return 0;
+  }
+  m = pVALLOc(bytes);
+  if (MALLOC_POSTACTION != 0) {
+  }
+  return m;
+}
+
+Void_t* public_cALLOc(size_t n, size_t elem_size) {
+  Void_t* m;
+  if (MALLOC_PREACTION != 0) {
+    return 0;
+  }
+  m = cALLOc(n, elem_size);
+  if (MALLOC_POSTACTION != 0) {
+  }
+  return m;
+}
+
+
+Void_t** public_iCALLOc(size_t n, size_t elem_size, Void_t** chunks) {
+  Void_t** m;
+  if (MALLOC_PREACTION != 0) {
+    return 0;
+  }
+  m = iCALLOc(n, elem_size, chunks);
+  if (MALLOC_POSTACTION != 0) {
+  }
+  return m;
+}
+
+Void_t** public_iCOMALLOc(size_t n, size_t sizes[], Void_t** chunks) {
+  Void_t** m;
+  if (MALLOC_PREACTION != 0) {
+    return 0;
+  }
+  m = iCOMALLOc(n, sizes, chunks);
+  if (MALLOC_POSTACTION != 0) {
+  }
+  return m;
+}
+
+void public_cFREe(Void_t* m) {
+  if (MALLOC_PREACTION != 0) {
+    return;
+  }
+  cFREe(m);
+  if (MALLOC_POSTACTION != 0) {
+  }
+}
+
+int public_mTRIm(size_t s) {
+  int result;
+  if (MALLOC_PREACTION != 0) {
+    return 0;
+  }
+  result = mTRIm(s);
+  if (MALLOC_POSTACTION != 0) {
+  }
+  return result;
+}
+
+size_t public_mUSABLe(Void_t* m) {
+  size_t result;
+  if (MALLOC_PREACTION != 0) {
+    return 0;
+  }
+  result = mUSABLe(m);
+  if (MALLOC_POSTACTION != 0) {
+  }
+  return result;
+}
+
+void public_mSTATs() {
+  if (MALLOC_PREACTION != 0) {
+    return;
+  }
+  mSTATs();
+  if (MALLOC_POSTACTION != 0) {
+  }
+}
+
+struct mallinfo public_mALLINFo() {
+  struct mallinfo m;
+  if (MALLOC_PREACTION != 0) {
+    struct mallinfo nm = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+    return nm;
+  }
+  m = mALLINFo();
+  if (MALLOC_POSTACTION != 0) {
+  }
+  return m;
+}
+
+int public_mALLOPt(int p, int v) {
+  int result;
+  if (MALLOC_PREACTION != 0) {
+    return 0;
+  }
+  result = mALLOPt(p, v);
+  if (MALLOC_POSTACTION != 0) {
+  }
+  return result;
+}
+
+#endif
+
+
+
+/* ------------- Optional versions of memcopy ---------------- */
+
+
+#if USE_MEMCPY
+
+/* 
+  Note: memcpy is ONLY invoked with non-overlapping regions,
+  so the (usually slower) memmove is not needed.
+*/
+
+#define MALLOC_COPY(dest, src, nbytes)  memcpy(dest, src, nbytes)
+#define MALLOC_ZERO(dest, nbytes)       memset(dest, 0,   nbytes)
+
+#else /* !USE_MEMCPY */
+
+/* Use Duff's device for good zeroing/copying performance. */
+
+#define MALLOC_ZERO(charp, nbytes)                                            \
+do {                                                                          \
+  INTERNAL_SIZE_T* mzp = (INTERNAL_SIZE_T*)(charp);                           \
+  CHUNK_SIZE_T  mctmp = (nbytes)/sizeof(INTERNAL_SIZE_T);                     \
+  long mcn;                                                                   \
+  if (mctmp < 8) mcn = 0; else { mcn = (mctmp-1)/8; mctmp %= 8; }             \
+  switch (mctmp) {                                                            \
+    case 0: for(;;) { *mzp++ = 0;                                             \
+    case 7:           *mzp++ = 0;                                             \
+    case 6:           *mzp++ = 0;                                             \
+    case 5:           *mzp++ = 0;                                             \
+    case 4:           *mzp++ = 0;                                             \
+    case 3:           *mzp++ = 0;                                             \
+    case 2:           *mzp++ = 0;                                             \
+    case 1:           *mzp++ = 0; if(mcn <= 0) break; mcn--; }                \
+  }                                                                           \
+} while(0)
+
+#define MALLOC_COPY(dest,src,nbytes)                                          \
+do {                                                                          \
+  INTERNAL_SIZE_T* mcsrc = (INTERNAL_SIZE_T*) src;                            \
+  INTERNAL_SIZE_T* mcdst = (INTERNAL_SIZE_T*) dest;                           \
+  CHUNK_SIZE_T  mctmp = (nbytes)/sizeof(INTERNAL_SIZE_T);                     \
+  long mcn;                                                                   \
+  if (mctmp < 8) mcn = 0; else { mcn = (mctmp-1)/8; mctmp %= 8; }             \
+  switch (mctmp) {                                                            \
+    case 0: for(;;) { *mcdst++ = *mcsrc++;                                    \
+    case 7:           *mcdst++ = *mcsrc++;                                    \
+    case 6:           *mcdst++ = *mcsrc++;                                    \
+    case 5:           *mcdst++ = *mcsrc++;                                    \
+    case 4:           *mcdst++ = *mcsrc++;                                    \
+    case 3:           *mcdst++ = *mcsrc++;                                    \
+    case 2:           *mcdst++ = *mcsrc++;                                    \
+    case 1:           *mcdst++ = *mcsrc++; if(mcn <= 0) break; mcn--; }       \
+  }                                                                           \
+} while(0)
+
+#endif
+
+/* ------------------ MMAP support ------------------  */
+
+
+#if HAVE_MMAP
+
+#ifndef LACKS_FCNTL_H
+#include <fcntl.h>
+#endif
+
+#ifndef LACKS_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+/* 
+   Nearly all versions of mmap support MAP_ANONYMOUS, 
+   so the following is unlikely to be needed, but is
+   supplied just in case.
+*/
+
+#ifndef MAP_ANONYMOUS
+
+static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */
+
+#define MMAP(addr, size, prot, flags) ((dev_zero_fd < 0) ? \
+ (dev_zero_fd = open("/dev/zero", O_RDWR), \
+  mmap((addr), (size), (prot), (flags), dev_zero_fd, 0)) : \
+   mmap((addr), (size), (prot), (flags), dev_zero_fd, 0))
+
+#else
+
+#define MMAP(addr, size, prot, flags) \
+ (mmap((addr), (size), (prot), (flags)|MAP_ANONYMOUS, -1, 0))
+
+#endif
+
+
+#endif /* HAVE_MMAP */
+
+
+/*
+  -----------------------  Chunk representations -----------------------
+*/
+
+
+/*
+  This struct declaration is misleading (but accurate and necessary).
+  It declares a "view" into memory allowing access to necessary
+  fields at known offsets from a given base. See explanation below.
+*/
+
+struct malloc_chunk {
+
+  INTERNAL_SIZE_T      prev_size;  /* Size of previous chunk (if free).  */
+  INTERNAL_SIZE_T      size;       /* Size in bytes, including overhead. */
+
+  struct malloc_chunk* fd;         /* double links -- used only if free. */
+  struct malloc_chunk* bk;
+};
+
+
+typedef struct malloc_chunk* mchunkptr;
+
+/*
+   malloc_chunk details:
+
+    (The following includes lightly edited explanations by Colin Plumb.)
+
+    Chunks of memory are maintained using a `boundary tag' method as
+    described in e.g., Knuth or Standish.  (See the paper by Paul
+    Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a
+    survey of such techniques.)  Sizes of free chunks are stored both
+    in the front of each chunk and at the end.  This makes
+    consolidating fragmented chunks into bigger chunks very fast.  The
+    size fields also hold bits representing whether chunks are free or
+    in use.
+
+    An allocated chunk looks like this:
+
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Size of previous chunk, if allocated            | |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Size of chunk, in bytes                         |P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             User data starts here...                          .
+            .                                                               .
+            .             (malloc_usable_space() bytes)                     .
+            .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Size of chunk                                     |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+    Where "chunk" is the front of the chunk for the purpose of most of
+    the malloc code, but "mem" is the pointer that is returned to the
+    user.  "Nextchunk" is the beginning of the next contiguous chunk.
+
+    Chunks always begin on even word boundries, so the mem portion
+    (which is returned to the user) is also on an even word boundary, and
+    thus at least double-word aligned.
+
+    Free chunks are stored in circular doubly-linked lists, and look like this:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Size of previous chunk                            |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                         |P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Forward pointer to next chunk in list             |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Back pointer to previous chunk in list            |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Unused space (may be 0 bytes long)                .
+            .                                                               .
+            .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    The P (PREV_INUSE) bit, stored in the unused low-order bit of the
+    chunk size (which is always a multiple of two words), is an in-use
+    bit for the *previous* chunk.  If that bit is *clear*, then the
+    word before the current chunk size contains the previous chunk
+    size, and can be used to find the front of the previous chunk.
+    The very first chunk allocated always has this bit set,
+    preventing access to non-existent (or non-owned) memory. If
+    prev_inuse is set for any given chunk, then you CANNOT determine
+    the size of the previous chunk, and might even get a memory
+    addressing fault when trying to do so.
+
+    Note that the `foot' of the current chunk is actually represented
+    as the prev_size of the NEXT chunk. This makes it easier to
+    deal with alignments etc but can be very confusing when trying
+    to extend or adapt this code.
+
+    The two exceptions to all this are
+
+     1. The special chunk `top' doesn't bother using the
+        trailing size field since there is no next contiguous chunk
+        that would have to index off it. After initialization, `top'
+        is forced to always exist.  If it would become less than
+        MINSIZE bytes long, it is replenished.
+
+     2. Chunks allocated via mmap, which have the second-lowest-order
+        bit (IS_MMAPPED) set in their size fields.  Because they are
+        allocated one-by-one, each must contain its own trailing size field.
+
+*/
+
+/*
+  ---------- Size and alignment checks and conversions ----------
+*/
+
+/* conversion from malloc headers to user pointers, and back */
+
+#define chunk2mem(p)   ((Void_t*)((char*)(p) + 2*SIZE_SZ))
+#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ))
+
+/* The smallest possible chunk */
+#define MIN_CHUNK_SIZE        (sizeof(struct malloc_chunk))
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+
+#define MINSIZE  \
+  (CHUNK_SIZE_T)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))
+
+/* Check if m has acceptable alignment */
+
+#define aligned_OK(m)  (((PTR_UINT)((m)) & (MALLOC_ALIGN_MASK)) == 0)
+
+
+/* 
+   Check if a request is so large that it would wrap around zero when
+   padded and aligned. To simplify some other code, the bound is made
+   low enough so that adding MINSIZE will also not wrap around sero.
+*/
+
+#define REQUEST_OUT_OF_RANGE(req)                                 \
+  ((CHUNK_SIZE_T)(req) >=                                        \
+   (CHUNK_SIZE_T)(INTERNAL_SIZE_T)(-2 * MINSIZE))    
+
+/* pad request bytes into a usable size -- internal version */
+
+#define request2size(req)                                         \
+  (((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE)  ?             \
+   MINSIZE :                                                      \
+   ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
+
+/*  Same, except also perform argument check */
+
+#define checked_request2size(req, sz)                             \
+  if (REQUEST_OUT_OF_RANGE(req)) {                                \
+    MALLOC_FAILURE_ACTION;                                        \
+    return 0;                                                     \
+  }                                                               \
+  (sz) = request2size(req);                                              
+
+/*
+  --------------- Physical chunk operations ---------------
+*/
+
+
+/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */
+#define PREV_INUSE 0x1
+
+/* extract inuse bit of previous chunk */
+#define prev_inuse(p)       ((p)->size & PREV_INUSE)
+
+
+/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */
+#define IS_MMAPPED 0x2
+
+/* check for mmap()'ed chunk */
+#define chunk_is_mmapped(p) ((p)->size & IS_MMAPPED)
+
+/* 
+  Bits to mask off when extracting size 
+
+  Note: IS_MMAPPED is intentionally not masked off from size field in
+  macros for which mmapped chunks should never be seen. This should
+  cause helpful core dumps to occur if it is tried by accident by
+  people extending or adapting this malloc.
+*/
+#define SIZE_BITS (PREV_INUSE|IS_MMAPPED)
+
+/* Get size, ignoring use bits */
+#define chunksize(p)         ((p)->size & ~(SIZE_BITS))
+
+
+/* Ptr to next physical malloc_chunk. */
+#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->size & ~PREV_INUSE) ))
+
+/* Ptr to previous physical malloc_chunk */
+#define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_size) ))
+
+/* Treat space at ptr + offset as a chunk */
+#define chunk_at_offset(p, s)  ((mchunkptr)(((char*)(p)) + (s)))
+
+/* extract p's inuse bit */
+#define inuse(p)\
+((((mchunkptr)(((char*)(p))+((p)->size & ~PREV_INUSE)))->size) & PREV_INUSE)
+
+/* set/clear chunk as being inuse without otherwise disturbing */
+#define set_inuse(p)\
+((mchunkptr)(((char*)(p)) + ((p)->size & ~PREV_INUSE)))->size |= PREV_INUSE
+
+#define clear_inuse(p)\
+((mchunkptr)(((char*)(p)) + ((p)->size & ~PREV_INUSE)))->size &= ~(PREV_INUSE)
+
+
+/* check/set/clear inuse bits in known places */
+#define inuse_bit_at_offset(p, s)\
+ (((mchunkptr)(((char*)(p)) + (s)))->size & PREV_INUSE)
+
+#define set_inuse_bit_at_offset(p, s)\
+ (((mchunkptr)(((char*)(p)) + (s)))->size |= PREV_INUSE)
+
+#define clear_inuse_bit_at_offset(p, s)\
+ (((mchunkptr)(((char*)(p)) + (s)))->size &= ~(PREV_INUSE))
+
+
+/* Set size at head, without disturbing its use bit */
+#define set_head_size(p, s)  ((p)->size = (((p)->size & PREV_INUSE) | (s)))
+
+/* Set size/use field */
+#define set_head(p, s)       ((p)->size = (s))
+
+/* Set size at footer (only when chunk is not in use) */
+#define set_foot(p, s)       (((mchunkptr)((char*)(p) + (s)))->prev_size = (s))
+
+
+/*
+  -------------------- Internal data structures --------------------
+
+   All internal state is held in an instance of malloc_state defined
+   below. There are no other static variables, except in two optional
+   cases: 
+   * If USE_MALLOC_LOCK is defined, the mALLOC_MUTEx declared above. 
+   * If HAVE_MMAP is true, but mmap doesn't support
+     MAP_ANONYMOUS, a dummy file descriptor for mmap.
+
+   Beware of lots of tricks that minimize the total bookkeeping space
+   requirements. The result is a little over 1K bytes (for 4byte
+   pointers and size_t.)
+*/
+
+/*
+  Bins
+
+    An array of bin headers for free chunks. Each bin is doubly
+    linked.  The bins are approximately proportionally (log) spaced.
+    There are a lot of these bins (128). This may look excessive, but
+    works very well in practice.  Most bins hold sizes that are
+    unusual as malloc request sizes, but are more usual for fragments
+    and consolidated sets of chunks, which is what these bins hold, so
+    they can be found quickly.  All procedures maintain the invariant
+    that no consolidated chunk physically borders another one, so each
+    chunk in a list is known to be preceeded and followed by either
+    inuse chunks or the ends of memory.
+
+    Chunks in bins are kept in size order, with ties going to the
+    approximately least recently used chunk. Ordering isn't needed
+    for the small bins, which all contain the same-sized chunks, but
+    facilitates best-fit allocation for larger chunks. These lists
+    are just sequential. Keeping them in order almost never requires
+    enough traversal to warrant using fancier ordered data
+    structures.  
+
+    Chunks of the same size are linked with the most
+    recently freed at the front, and allocations are taken from the
+    back.  This results in LRU (FIFO) allocation order, which tends
+    to give each chunk an equal opportunity to be consolidated with
+    adjacent freed chunks, resulting in larger free chunks and less
+    fragmentation.
+
+    To simplify use in double-linked lists, each bin header acts
+    as a malloc_chunk. This avoids special-casing for headers.
+    But to conserve space and improve locality, we allocate
+    only the fd/bk pointers of bins, and then use repositioning tricks
+    to treat these as the fields of a malloc_chunk*.  
+*/
+
+typedef struct malloc_chunk* mbinptr;
+
+/* addressing -- note that bin_at(0) does not exist */
+#define bin_at(m, i) ((mbinptr)((char*)&((m)->bins[(i)<<1]) - (SIZE_SZ<<1)))
+
+/* analog of ++bin */
+#define next_bin(b)  ((mbinptr)((char*)(b) + (sizeof(mchunkptr)<<1)))
+
+/* Reminders about list directionality within bins */
+#define first(b)     ((b)->fd)
+#define last(b)      ((b)->bk)
+
+/* Take a chunk off a bin list */
+#define unlink(P, BK, FD) {                                            \
+  FD = P->fd;                                                          \
+  BK = P->bk;                                                          \
+  FD->bk = BK;                                                         \
+  BK->fd = FD;                                                         \
+}
+
+/*
+  Indexing
+
+    Bins for sizes < 512 bytes contain chunks of all the same size, spaced
+    8 bytes apart. Larger bins are approximately logarithmically spaced:
+
+    64 bins of size       8
+    32 bins of size      64
+    16 bins of size     512
+     8 bins of size    4096
+     4 bins of size   32768
+     2 bins of size  262144
+     1 bin  of size what's left
+
+    The bins top out around 1MB because we expect to service large
+    requests via mmap.
+*/
+
+#define NBINS              96
+#define NSMALLBINS         32
+#define SMALLBIN_WIDTH      8
+#define MIN_LARGE_SIZE    256
+
+#define in_smallbin_range(sz)  \
+  ((CHUNK_SIZE_T)(sz) < (CHUNK_SIZE_T)MIN_LARGE_SIZE)
+
+#define smallbin_index(sz)     (((unsigned)(sz)) >> 3)
+
+/*
+  Compute index for size. We expect this to be inlined when
+  compiled with optimization, else not, which works out well.
+*/
+static int largebin_index(unsigned int sz) {
+  unsigned int  x = sz >> SMALLBIN_WIDTH; 
+  unsigned int m;            /* bit position of highest set bit of m */
+
+  if (x >= 0x10000) return NBINS-1;
+
+  /* On intel, use BSRL instruction to find highest bit */
+#if defined(__GNUC__) && defined(i386)
+
+  __asm__("bsrl %1,%0\n\t"
+          : "=r" (m) 
+          : "g"  (x));
+
+#else
+  {
+    /*
+      Based on branch-free nlz algorithm in chapter 5 of Henry
+      S. Warren Jr's book "Hacker's Delight".
+    */
+
+    unsigned int n = ((x - 0x100) >> 16) & 8;
+    x <<= n; 
+    m = ((x - 0x1000) >> 16) & 4;
+    n += m; 
+    x <<= m; 
+    m = ((x - 0x4000) >> 16) & 2;
+    n += m; 
+    x = (x << m) >> 14;
+    m = 13 - n + (x & ~(x>>1));
+  }
+#endif
+
+  /* Use next 2 bits to create finer-granularity bins */
+  return NSMALLBINS + (m << 2) + ((sz >> (m + 6)) & 3);
+}
+
+#define bin_index(sz) \
+ ((in_smallbin_range(sz)) ? smallbin_index(sz) : largebin_index(sz))
+
+/*
+  FIRST_SORTED_BIN_SIZE is the chunk size corresponding to the
+  first bin that is maintained in sorted order. This must
+  be the smallest size corresponding to a given bin.
+
+  Normally, this should be MIN_LARGE_SIZE. But you can weaken
+  best fit guarantees to sometimes speed up malloc by increasing value.
+  Doing this means that malloc may choose a chunk that is 
+  non-best-fitting by up to the width of the bin.
+
+  Some useful cutoff values:
+      512 - all bins sorted
+     2560 - leaves bins <=     64 bytes wide unsorted  
+    12288 - leaves bins <=    512 bytes wide unsorted
+    65536 - leaves bins <=   4096 bytes wide unsorted
+   262144 - leaves bins <=  32768 bytes wide unsorted
+       -1 - no bins sorted (not recommended!)
+*/
+
+#define FIRST_SORTED_BIN_SIZE MIN_LARGE_SIZE 
+/* #define FIRST_SORTED_BIN_SIZE 65536 */
+
+/*
+  Unsorted chunks
+
+    All remainders from chunk splits, as well as all returned chunks,
+    are first placed in the "unsorted" bin. They are then placed
+    in regular bins after malloc gives them ONE chance to be used before
+    binning. So, basically, the unsorted_chunks list acts as a queue,
+    with chunks being placed on it in free (and malloc_consolidate),
+    and taken off (to be either used or placed in bins) in malloc.
+*/
+
+/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */
+#define unsorted_chunks(M)          (bin_at(M, 1))
+
+/*
+  Top
+
+    The top-most available chunk (i.e., the one bordering the end of
+    available memory) is treated specially. It is never included in
+    any bin, is used only if no other chunk is available, and is
+    released back to the system if it is very large (see
+    M_TRIM_THRESHOLD).  Because top initially
+    points to its own bin with initial zero size, thus forcing
+    extension on the first malloc request, we avoid having any special
+    code in malloc to check whether it even exists yet. But we still
+    need to do so when getting memory from system, so we make
+    initial_top treat the bin as a legal but unusable chunk during the
+    interval between initialization and the first call to
+    sYSMALLOc. (This is somewhat delicate, since it relies on
+    the 2 preceding words to be zero during this interval as well.)
+*/
+
+/* Conveniently, the unsorted bin can be used as dummy top on first call */
+#define initial_top(M)              (unsorted_chunks(M))
+
+/*
+  Binmap
+
+    To help compensate for the large number of bins, a one-level index
+    structure is used for bin-by-bin searching.  `binmap' is a
+    bitvector recording whether bins are definitely empty so they can
+    be skipped over during during traversals.  The bits are NOT always
+    cleared as soon as bins are empty, but instead only
+    when they are noticed to be empty during traversal in malloc.
+*/
+
+/* Conservatively use 32 bits per map word, even if on 64bit system */
+#define BINMAPSHIFT      5
+#define BITSPERMAP       (1U << BINMAPSHIFT)
+#define BINMAPSIZE       (NBINS / BITSPERMAP)
+
+#define idx2block(i)     ((i) >> BINMAPSHIFT)
+#define idx2bit(i)       ((1U << ((i) & ((1U << BINMAPSHIFT)-1))))
+
+#define mark_bin(m,i)    ((m)->binmap[idx2block(i)] |=  idx2bit(i))
+#define unmark_bin(m,i)  ((m)->binmap[idx2block(i)] &= ~(idx2bit(i)))
+#define get_binmap(m,i)  ((m)->binmap[idx2block(i)] &   idx2bit(i))
+
+/*
+  Fastbins
+
+    An array of lists holding recently freed small chunks.  Fastbins
+    are not doubly linked.  It is faster to single-link them, and
+    since chunks are never removed from the middles of these lists,
+    double linking is not necessary. Also, unlike regular bins, they
+    are not even processed in FIFO order (they use faster LIFO) since
+    ordering doesn't much matter in the transient contexts in which
+    fastbins are normally used.
+
+    Chunks in fastbins keep their inuse bit set, so they cannot
+    be consolidated with other free chunks. malloc_consolidate
+    releases all chunks in fastbins and consolidates them with
+    other free chunks. 
+*/
+
+typedef struct malloc_chunk* mfastbinptr;
+
+/* offset 2 to use otherwise unindexable first 2 bins */
+#define fastbin_index(sz)        ((((unsigned int)(sz)) >> 3) - 2)
+
+/* The maximum fastbin request size we support */
+#define MAX_FAST_SIZE     80
+
+#define NFASTBINS  (fastbin_index(request2size(MAX_FAST_SIZE))+1)
+
+/*
+  FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free()
+  that triggers automatic consolidation of possibly-surrounding
+  fastbin chunks. This is a heuristic, so the exact value should not
+  matter too much. It is defined at half the default trim threshold as a
+  compromise heuristic to only attempt consolidation if it is likely
+  to lead to trimming. However, it is not dynamically tunable, since
+  consolidation reduces fragmentation surrounding loarge chunks even 
+  if trimming is not used.
+*/
+
+#define FASTBIN_CONSOLIDATION_THRESHOLD  \
+  ((unsigned long)(DEFAULT_TRIM_THRESHOLD) >> 1)
+
+/*
+  Since the lowest 2 bits in max_fast don't matter in size comparisons, 
+  they are used as flags.
+*/
+
+/*
+  ANYCHUNKS_BIT held in max_fast indicates that there may be any
+  freed chunks at all. It is set true when entering a chunk into any
+  bin.
+*/
+
+#define ANYCHUNKS_BIT        (1U)
+
+#define have_anychunks(M)     (((M)->max_fast &  ANYCHUNKS_BIT))
+#define set_anychunks(M)      ((M)->max_fast |=  ANYCHUNKS_BIT)
+#define clear_anychunks(M)    ((M)->max_fast &= ~ANYCHUNKS_BIT)
+
+/*
+  FASTCHUNKS_BIT held in max_fast indicates that there are probably
+  some fastbin chunks. It is set true on entering a chunk into any
+  fastbin, and cleared only in malloc_consolidate.
+*/
+
+#define FASTCHUNKS_BIT        (2U)
+
+#define have_fastchunks(M)   (((M)->max_fast &  FASTCHUNKS_BIT))
+#define set_fastchunks(M)    ((M)->max_fast |=  (FASTCHUNKS_BIT|ANYCHUNKS_BIT))
+#define clear_fastchunks(M)  ((M)->max_fast &= ~(FASTCHUNKS_BIT))
+
+/* 
+   Set value of max_fast. 
+   Use impossibly small value if 0.
+*/
+
+#define set_max_fast(M, s) \
+  (M)->max_fast = (((s) == 0)? SMALLBIN_WIDTH: request2size(s)) | \
+  ((M)->max_fast &  (FASTCHUNKS_BIT|ANYCHUNKS_BIT))
+
+#define get_max_fast(M) \
+  ((M)->max_fast & ~(FASTCHUNKS_BIT | ANYCHUNKS_BIT))
+
+
+/*
+  morecore_properties is a status word holding dynamically discovered
+  or controlled properties of the morecore function
+*/
+
+#define MORECORE_CONTIGUOUS_BIT  (1U)
+
+#define contiguous(M) \
+        (((M)->morecore_properties &  MORECORE_CONTIGUOUS_BIT))
+#define noncontiguous(M) \
+        (((M)->morecore_properties &  MORECORE_CONTIGUOUS_BIT) == 0)
+#define set_contiguous(M) \
+        ((M)->morecore_properties |=  MORECORE_CONTIGUOUS_BIT)
+#define set_noncontiguous(M) \
+        ((M)->morecore_properties &= ~MORECORE_CONTIGUOUS_BIT)
+
+
+/*
+   ----------- Internal state representation and initialization -----------
+*/
+
+struct malloc_state {
+
+  /* The maximum chunk size to be eligible for fastbin */
+  INTERNAL_SIZE_T  max_fast;   /* low 2 bits used as flags */
+
+  /* Fastbins */
+  mfastbinptr      fastbins[NFASTBINS];
+
+  /* Base of the topmost chunk -- not otherwise kept in a bin */
+  mchunkptr        top;
+
+  /* The remainder from the most recent split of a small request */
+  mchunkptr        last_remainder;
+
+  /* Normal bins packed as described above */
+  mchunkptr        bins[NBINS * 2];
+
+  /* Bitmap of bins. Trailing zero map handles cases of largest binned size */
+  unsigned int     binmap[BINMAPSIZE+1];
+
+  /* Tunable parameters */
+  CHUNK_SIZE_T     trim_threshold;
+  INTERNAL_SIZE_T  top_pad;
+  INTERNAL_SIZE_T  mmap_threshold;
+
+  /* Memory map support */
+  int              n_mmaps;
+  int              n_mmaps_max;
+  int              max_n_mmaps;
+
+  /* Cache malloc_getpagesize */
+  unsigned int     pagesize;    
+
+  /* Track properties of MORECORE */
+  unsigned int     morecore_properties;
+
+  /* Statistics */
+  INTERNAL_SIZE_T  mmapped_mem;
+  INTERNAL_SIZE_T  sbrked_mem;
+  INTERNAL_SIZE_T  max_sbrked_mem;
+  INTERNAL_SIZE_T  max_mmapped_mem;
+  INTERNAL_SIZE_T  max_total_mem;
+};
+
+typedef struct malloc_state *mstate;
+
+/* 
+   There is exactly one instance of this struct in this malloc.
+   If you are adapting this malloc in a way that does NOT use a static
+   malloc_state, you MUST explicitly zero-fill it before using. This
+   malloc relies on the property that malloc_state is initialized to
+   all zeroes (as is true of C statics).
+*/
+
+static struct malloc_state av_;  /* never directly referenced */
+
+/*
+   All uses of av_ are via get_malloc_state().
+   At most one "call" to get_malloc_state is made per invocation of
+   the public versions of malloc and free, but other routines
+   that in turn invoke malloc and/or free may call more then once. 
+   Also, it is called in check* routines if DEBUG is set.
+*/
+
+#define get_malloc_state() (&(av_))
+
+/*
+  Initialize a malloc_state struct.
+
+  This is called only from within malloc_consolidate, which needs
+  be called in the same contexts anyway.  It is never called directly
+  outside of malloc_consolidate because some optimizing compilers try
+  to inline it at all call points, which turns out not to be an
+  optimization at all. (Inlining it in malloc_consolidate is fine though.)
+*/
+
+#if __STD_C
+static void malloc_init_state(mstate av)
+#else
+static void malloc_init_state(av) mstate av;
+#endif
+{
+  int     i;
+  mbinptr bin;
+  
+  /* Establish circular links for normal bins */
+  for (i = 1; i < NBINS; ++i) { 
+    bin = bin_at(av,i);
+    bin->fd = bin->bk = bin;
+  }
+
+  av->top_pad        = DEFAULT_TOP_PAD;
+  av->n_mmaps_max    = DEFAULT_MMAP_MAX;
+  av->mmap_threshold = DEFAULT_MMAP_THRESHOLD;
+  av->trim_threshold = DEFAULT_TRIM_THRESHOLD;
+
+#if MORECORE_CONTIGUOUS
+  set_contiguous(av);
+#else
+  set_noncontiguous(av);
+#endif
+
+
+  set_max_fast(av, DEFAULT_MXFAST);
+
+  av->top            = initial_top(av);
+  av->pagesize       = malloc_getpagesize;
+}
+
+/* 
+   Other internal utilities operating on mstates
+*/
+
+#if __STD_C
+static Void_t*  sYSMALLOc(INTERNAL_SIZE_T, mstate);
+static int      sYSTRIm(size_t, mstate);
+static void     malloc_consolidate(mstate);
+static Void_t** iALLOc(size_t, size_t*, int, Void_t**);
+#else
+static Void_t*  sYSMALLOc();
+static int      sYSTRIm();
+static void     malloc_consolidate();
+static Void_t** iALLOc();
+#endif
+
+/*
+  Debugging support
+
+  These routines make a number of assertions about the states
+  of data structures that should be true at all times. If any
+  are not true, it's very likely that a user program has somehow
+  trashed memory. (It's also possible that there is a coding error
+  in malloc. In which case, please report it!)
+*/
+
+#if ! DEBUG
+
+#define check_chunk(P)
+#define check_free_chunk(P)
+#define check_inuse_chunk(P)
+#define check_remalloced_chunk(P,N)
+#define check_malloced_chunk(P,N)
+#define check_malloc_state()
+
+#else
+#define check_chunk(P)              do_check_chunk(P)
+#define check_free_chunk(P)         do_check_free_chunk(P)
+#define check_inuse_chunk(P)        do_check_inuse_chunk(P)
+#define check_remalloced_chunk(P,N) do_check_remalloced_chunk(P,N)
+#define check_malloced_chunk(P,N)   do_check_malloced_chunk(P,N)
+#define check_malloc_state()        do_check_malloc_state()
+
+/*
+  Properties of all chunks
+*/
+
+#if __STD_C
+static void do_check_chunk(mchunkptr p)
+#else
+static void do_check_chunk(p) mchunkptr p;
+#endif
+{
+  mstate av = get_malloc_state();
+  CHUNK_SIZE_T  sz = chunksize(p);
+  /* min and max possible addresses assuming contiguous allocation */
+  char* max_address = (char*)(av->top) + chunksize(av->top);
+  char* min_address = max_address - av->sbrked_mem;
+
+  if (!chunk_is_mmapped(p)) {
+    
+    /* Has legal address ... */
+    if (p != av->top) {
+      if (contiguous(av)) {
+        assert(((char*)p) >= min_address);
+        assert(((char*)p + sz) <= ((char*)(av->top)));
+      }
+    }
+    else {
+      /* top size is always at least MINSIZE */
+      assert((CHUNK_SIZE_T)(sz) >= MINSIZE);
+      /* top predecessor always marked inuse */
+      assert(prev_inuse(p));
+    }
+      
+  }
+  else {
+#if HAVE_MMAP
+    /* address is outside main heap  */
+    if (contiguous(av) && av->top != initial_top(av)) {
+      assert(((char*)p) < min_address || ((char*)p) > max_address);
+    }
+    /* chunk is page-aligned */
+    assert(((p->prev_size + sz) & (av->pagesize-1)) == 0);
+    /* mem is aligned */
+    assert(aligned_OK(chunk2mem(p)));
+#else
+    /* force an appropriate assert violation if debug set */
+    assert(!chunk_is_mmapped(p));
+#endif
+  }
+}
+
+/*
+  Properties of free chunks
+*/
+
+#if __STD_C
+static void do_check_free_chunk(mchunkptr p)
+#else
+static void do_check_free_chunk(p) mchunkptr p;
+#endif
+{
+  mstate av = get_malloc_state();
+
+  INTERNAL_SIZE_T sz = p->size & ~PREV_INUSE;
+  mchunkptr next = chunk_at_offset(p, sz);
+
+  do_check_chunk(p);
+
+  /* Chunk must claim to be free ... */
+  assert(!inuse(p));
+  assert (!chunk_is_mmapped(p));
+
+  /* Unless a special marker, must have OK fields */
+  if ((CHUNK_SIZE_T)(sz) >= MINSIZE)
+  {
+    assert((sz & MALLOC_ALIGN_MASK) == 0);
+    assert(aligned_OK(chunk2mem(p)));
+    /* ... matching footer field */
+    assert(next->prev_size == sz);
+    /* ... and is fully consolidated */
+    assert(prev_inuse(p));
+    assert (next == av->top || inuse(next));
+
+    /* ... and has minimally sane links */
+    assert(p->fd->bk == p);
+    assert(p->bk->fd == p);
+  }
+  else /* markers are always of size SIZE_SZ */
+    assert(sz == SIZE_SZ);
+}
+
+/*
+  Properties of inuse chunks
+*/
+
+#if __STD_C
+static void do_check_inuse_chunk(mchunkptr p)
+#else
+static void do_check_inuse_chunk(p) mchunkptr p;
+#endif
+{
+  mstate av = get_malloc_state();
+  mchunkptr next;
+  do_check_chunk(p);
+
+  if (chunk_is_mmapped(p))
+    return; /* mmapped chunks have no next/prev */
+
+  /* Check whether it claims to be in use ... */
+  assert(inuse(p));
+
+  next = next_chunk(p);
+
+  /* ... and is surrounded by OK chunks.
+    Since more things can be checked with free chunks than inuse ones,
+    if an inuse chunk borders them and debug is on, it's worth doing them.
+  */
+  if (!prev_inuse(p))  {
+    /* Note that we cannot even look at prev unless it is not inuse */
+    mchunkptr prv = prev_chunk(p);
+    assert(next_chunk(prv) == p);
+    do_check_free_chunk(prv);
+  }
+
+  if (next == av->top) {
+    assert(prev_inuse(next));
+    assert(chunksize(next) >= MINSIZE);
+  }
+  else if (!inuse(next))
+    do_check_free_chunk(next);
+}
+
+/*
+  Properties of chunks recycled from fastbins
+*/
+
+#if __STD_C
+static void do_check_remalloced_chunk(mchunkptr p, INTERNAL_SIZE_T s)
+#else
+static void do_check_remalloced_chunk(p, s) mchunkptr p; INTERNAL_SIZE_T s;
+#endif
+{
+  INTERNAL_SIZE_T sz = p->size & ~PREV_INUSE;
+
+  do_check_inuse_chunk(p);
+
+  /* Legal size ... */
+  assert((sz & MALLOC_ALIGN_MASK) == 0);
+  assert((CHUNK_SIZE_T)(sz) >= MINSIZE);
+  /* ... and alignment */
+  assert(aligned_OK(chunk2mem(p)));
+  /* chunk is less than MINSIZE more than request */
+  assert((long)(sz) - (long)(s) >= 0);
+  assert((long)(sz) - (long)(s + MINSIZE) < 0);
+}
+
+/*
+  Properties of nonrecycled chunks at the point they are malloced
+*/
+
+#if __STD_C
+static void do_check_malloced_chunk(mchunkptr p, INTERNAL_SIZE_T s)
+#else
+static void do_check_malloced_chunk(p, s) mchunkptr p; INTERNAL_SIZE_T s;
+#endif
+{
+  /* same as recycled case ... */
+  do_check_remalloced_chunk(p, s);
+
+  /*
+    ... plus,  must obey implementation invariant that prev_inuse is
+    always true of any allocated chunk; i.e., that each allocated
+    chunk borders either a previously allocated and still in-use
+    chunk, or the base of its memory arena. This is ensured
+    by making all allocations from the the `lowest' part of any found
+    chunk.  This does not necessarily hold however for chunks
+    recycled via fastbins.
+  */
+
+  assert(prev_inuse(p));
+}
+
+
+/*
+  Properties of malloc_state.
+
+  This may be useful for debugging malloc, as well as detecting user
+  programmer errors that somehow write into malloc_state.
+
+  If you are extending or experimenting with this malloc, you can
+  probably figure out how to hack this routine to print out or
+  display chunk addresses, sizes, bins, and other instrumentation.
+*/
+
+static void do_check_malloc_state()
+{
+  mstate av = get_malloc_state();
+  int i;
+  mchunkptr p;
+  mchunkptr q;
+  mbinptr b;
+  unsigned int binbit;
+  int empty;
+  unsigned int idx;
+  INTERNAL_SIZE_T size;
+  CHUNK_SIZE_T  total = 0;
+  int max_fast_bin;
+
+  /* internal size_t must be no wider than pointer type */
+  assert(sizeof(INTERNAL_SIZE_T) <= sizeof(char*));
+
+  /* alignment is a power of 2 */
+  assert((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT-1)) == 0);
+
+  /* cannot run remaining checks until fully initialized */
+  if (av->top == 0 || av->top == initial_top(av))
+    return;
+
+  /* pagesize is a power of 2 */
+  assert((av->pagesize & (av->pagesize-1)) == 0);
+
+  /* properties of fastbins */
+
+  /* max_fast is in allowed range */
+  assert(get_max_fast(av) <= request2size(MAX_FAST_SIZE));
+
+  max_fast_bin = fastbin_index(av->max_fast);
+
+  for (i = 0; i < NFASTBINS; ++i) {
+    p = av->fastbins[i];
+
+    /* all bins past max_fast are empty */
+    if (i > max_fast_bin)
+      assert(p == 0);
+
+    while (p != 0) {
+      /* each chunk claims to be inuse */
+      do_check_inuse_chunk(p);
+      total += chunksize(p);
+      /* chunk belongs in this bin */
+      assert(fastbin_index(chunksize(p)) == i);
+      p = p->fd;
+    }
+  }
+
+  if (total != 0)
+    assert(have_fastchunks(av));
+  else if (!have_fastchunks(av))
+    assert(total == 0);
+
+  /* check normal bins */
+  for (i = 1; i < NBINS; ++i) {
+    b = bin_at(av,i);
+
+    /* binmap is accurate (except for bin 1 == unsorted_chunks) */
+    if (i >= 2) {
+      binbit = get_binmap(av,i);
+      empty = last(b) == b;
+      if (!binbit)
+        assert(empty);
+      else if (!empty)
+        assert(binbit);
+    }
+
+    for (p = last(b); p != b; p = p->bk) {
+      /* each chunk claims to be free */
+      do_check_free_chunk(p);
+      size = chunksize(p);
+      total += size;
+      if (i >= 2) {
+        /* chunk belongs in bin */
+        idx = bin_index(size);
+        assert(idx == i);
+        /* lists are sorted */
+        if ((CHUNK_SIZE_T) size >= (CHUNK_SIZE_T)(FIRST_SORTED_BIN_SIZE)) {
+          assert(p->bk == b || 
+                 (CHUNK_SIZE_T)chunksize(p->bk) >= 
+                 (CHUNK_SIZE_T)chunksize(p));
+        }
+      }
+      /* chunk is followed by a legal chain of inuse chunks */
+      for (q = next_chunk(p);
+           (q != av->top && inuse(q) && 
+             (CHUNK_SIZE_T)(chunksize(q)) >= MINSIZE);
+           q = next_chunk(q))
+        do_check_inuse_chunk(q);
+    }
+  }
+
+  /* top chunk is OK */
+  check_chunk(av->top);
+
+  /* sanity checks for statistics */
+
+  assert(total <= (CHUNK_SIZE_T)(av->max_total_mem));
+  assert(av->n_mmaps >= 0);
+  assert(av->n_mmaps <= av->max_n_mmaps);
+
+  assert((CHUNK_SIZE_T)(av->sbrked_mem) <=
+         (CHUNK_SIZE_T)(av->max_sbrked_mem));
+
+  assert((CHUNK_SIZE_T)(av->mmapped_mem) <=
+         (CHUNK_SIZE_T)(av->max_mmapped_mem));
+
+  assert((CHUNK_SIZE_T)(av->max_total_mem) >=
+         (CHUNK_SIZE_T)(av->mmapped_mem) + (CHUNK_SIZE_T)(av->sbrked_mem));
+}
+#endif
+
+
+/* ----------- Routines dealing with system allocation -------------- */
+
+/*
+  sysmalloc handles malloc cases requiring more memory from the system.
+  On entry, it is assumed that av->top does not have enough
+  space to service request for nb bytes, thus requiring that av->top
+  be extended or replaced.
+*/
+
+#if __STD_C
+static Void_t* sYSMALLOc(INTERNAL_SIZE_T nb, mstate av)
+#else
+static Void_t* sYSMALLOc(nb, av) INTERNAL_SIZE_T nb; mstate av;
+#endif
+{
+  mchunkptr       old_top;        /* incoming value of av->top */
+  INTERNAL_SIZE_T old_size;       /* its size */
+  char*           old_end;        /* its end address */
+
+  long            size;           /* arg to first MORECORE or mmap call */
+  char*           brk;            /* return value from MORECORE */
+
+  long            correction;     /* arg to 2nd MORECORE call */
+  char*           snd_brk;        /* 2nd return val */
+
+  INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */
+  INTERNAL_SIZE_T end_misalign;   /* partial page left at end of new space */
+  char*           aligned_brk;    /* aligned offset into brk */
+
+  mchunkptr       p;              /* the allocated/returned chunk */
+  mchunkptr       remainder;      /* remainder from allocation */
+  CHUNK_SIZE_T    remainder_size; /* its size */
+
+  CHUNK_SIZE_T    sum;            /* for updating stats */
+
+  size_t          pagemask  = av->pagesize - 1;
+
+  /*
+    If there is space available in fastbins, consolidate and retry
+    malloc from scratch rather than getting memory from system.  This
+    can occur only if nb is in smallbin range so we didn't consolidate
+    upon entry to malloc. It is much easier to handle this case here
+    than in malloc proper.
+  */
+
+  if (have_fastchunks(av)) {
+    assert(in_smallbin_range(nb));
+    malloc_consolidate(av);
+    return mALLOc(nb - MALLOC_ALIGN_MASK);
+  }
+
+
+#if HAVE_MMAP
+
+  /*
+    If have mmap, and the request size meets the mmap threshold, and
+    the system supports mmap, and there are few enough currently
+    allocated mmapped regions, try to directly map this request
+    rather than expanding top.
+  */
+
+  if ((CHUNK_SIZE_T)(nb) >= (CHUNK_SIZE_T)(av->mmap_threshold) &&
+      (av->n_mmaps < av->n_mmaps_max)) {
+
+    char* mm;             /* return value from mmap call*/
+
+    /*
+      Round up size to nearest page.  For mmapped chunks, the overhead
+      is one SIZE_SZ unit larger than for normal chunks, because there
+      is no following chunk whose prev_size field could be used.
+    */
+    size = (nb + SIZE_SZ + MALLOC_ALIGN_MASK + pagemask) & ~pagemask;
+
+    /* Don't try if size wraps around 0 */
+    if ((CHUNK_SIZE_T)(size) > (CHUNK_SIZE_T)(nb)) {
+
+      mm = (char*)(MMAP(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE));
+      
+      if (mm != (char*)(MORECORE_FAILURE)) {
+        
+        /*
+          The offset to the start of the mmapped region is stored
+          in the prev_size field of the chunk. This allows us to adjust
+          returned start address to meet alignment requirements here 
+          and in memalign(), and still be able to compute proper
+          address argument for later munmap in free() and realloc().
+        */
+        
+        front_misalign = (INTERNAL_SIZE_T)chunk2mem(mm) & MALLOC_ALIGN_MASK;
+        if (front_misalign > 0) {
+          correction = MALLOC_ALIGNMENT - front_misalign;
+          p = (mchunkptr)(mm + correction);
+          p->prev_size = correction;
+          set_head(p, (size - correction) |IS_MMAPPED);
+        }
+        else {
+          p = (mchunkptr)mm;
+          p->prev_size = 0;
+          set_head(p, size|IS_MMAPPED);
+        }
+        
+        /* update statistics */
+        
+        if (++av->n_mmaps > av->max_n_mmaps) 
+          av->max_n_mmaps = av->n_mmaps;
+        
+        sum = av->mmapped_mem += size;
+        if (sum > (CHUNK_SIZE_T)(av->max_mmapped_mem)) 
+          av->max_mmapped_mem = sum;
+        sum += av->sbrked_mem;
+        if (sum > (CHUNK_SIZE_T)(av->max_total_mem)) 
+          av->max_total_mem = sum;
+
+        check_chunk(p);
+        
+        return chunk2mem(p);
+      }
+    }
+  }
+#endif
+
+  /* Record incoming configuration of top */
+
+  old_top  = av->top;
+  old_size = chunksize(old_top);
+  old_end  = (char*)(chunk_at_offset(old_top, old_size));
+
+  brk = snd_brk = (char*)(MORECORE_FAILURE); 
+
+  /* 
+     If not the first time through, we require old_size to be
+     at least MINSIZE and to have prev_inuse set.
+  */
+
+  assert((old_top == initial_top(av) && old_size == 0) || 
+         ((CHUNK_SIZE_T) (old_size) >= MINSIZE &&
+          prev_inuse(old_top)));
+
+  /* Precondition: not enough current space to satisfy nb request */
+  assert((CHUNK_SIZE_T)(old_size) < (CHUNK_SIZE_T)(nb + MINSIZE));
+
+  /* Precondition: all fastbins are consolidated */
+  assert(!have_fastchunks(av));
+
+
+  /* Request enough space for nb + pad + overhead */
+
+  size = nb + av->top_pad + MINSIZE;
+
+  /*
+    If contiguous, we can subtract out existing space that we hope to
+    combine with new space. We add it back later only if
+    we don't actually get contiguous space.
+  */
+
+  if (contiguous(av))
+    size -= old_size;
+
+  /*
+    Round to a multiple of page size.
+    If MORECORE is not contiguous, this ensures that we only call it
+    with whole-page arguments.  And if MORECORE is contiguous and
+    this is not first time through, this preserves page-alignment of
+    previous calls. Otherwise, we correct to page-align below.
+  */
+
+  size = (size + pagemask) & ~pagemask;
+
+  /*
+    Don't try to call MORECORE if argument is so big as to appear
+    negative. Note that since mmap takes size_t arg, it may succeed
+    below even if we cannot call MORECORE.
+  */
+
+  if (size > 0) 
+    brk = (char*)(MORECORE(size));
+
+  /*
+    If have mmap, try using it as a backup when MORECORE fails or
+    cannot be used. This is worth doing on systems that have "holes" in
+    address space, so sbrk cannot extend to give contiguous space, but
+    space is available elsewhere.  Note that we ignore mmap max count
+    and threshold limits, since the space will not be used as a
+    segregated mmap region.
+  */
+
+#if HAVE_MMAP
+  if (brk == (char*)(MORECORE_FAILURE)) {
+
+    /* Cannot merge with old top, so add its size back in */
+    if (contiguous(av))
+      size = (size + old_size + pagemask) & ~pagemask;
+
+    /* If we are relying on mmap as backup, then use larger units */
+    if ((CHUNK_SIZE_T)(size) < (CHUNK_SIZE_T)(MMAP_AS_MORECORE_SIZE))
+      size = MMAP_AS_MORECORE_SIZE;
+
+    /* Don't try if size wraps around 0 */
+    if ((CHUNK_SIZE_T)(size) > (CHUNK_SIZE_T)(nb)) {
+
+      brk = (char*)(MMAP(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE));
+      
+      if (brk != (char*)(MORECORE_FAILURE)) {
+        
+        /* We do not need, and cannot use, another sbrk call to find end */
+        snd_brk = brk + size;
+        
+        /* 
+           Record that we no longer have a contiguous sbrk region. 
+           After the first time mmap is used as backup, we do not
+           ever rely on contiguous space since this could incorrectly
+           bridge regions.
+        */
+        set_noncontiguous(av);
+      }
+    }
+  }
+#endif
+
+  if (brk != (char*)(MORECORE_FAILURE)) {
+    av->sbrked_mem += size;
+
+    /*
+      If MORECORE extends previous space, we can likewise extend top size.
+    */
+    
+    if (brk == old_end && snd_brk == (char*)(MORECORE_FAILURE)) {
+      set_head(old_top, (size + old_size) | PREV_INUSE);
+    }
+
+    /*
+      Otherwise, make adjustments:
+      
+      * If the first time through or noncontiguous, we need to call sbrk
+        just to find out where the end of memory lies.
+
+      * We need to ensure that all returned chunks from malloc will meet
+        MALLOC_ALIGNMENT
+
+      * If there was an intervening foreign sbrk, we need to adjust sbrk
+        request size to account for fact that we will not be able to
+        combine new space with existing space in old_top.
+
+      * Almost all systems internally allocate whole pages at a time, in
+        which case we might as well use the whole last page of request.
+        So we allocate enough more memory to hit a page boundary now,
+        which in turn causes future contiguous calls to page-align.
+    */
+    
+    else {
+      front_misalign = 0;
+      end_misalign = 0;
+      correction = 0;
+      aligned_brk = brk;
+
+      /*
+        If MORECORE returns an address lower than we have seen before,
+        we know it isn't really contiguous.  This and some subsequent
+        checks help cope with non-conforming MORECORE functions and
+        the presence of "foreign" calls to MORECORE from outside of
+        malloc or by other threads.  We cannot guarantee to detect
+        these in all cases, but cope with the ones we do detect.
+      */
+      if (contiguous(av) && old_size != 0 && brk < old_end) {
+        set_noncontiguous(av);
+      }
+      
+      /* handle contiguous cases */
+      if (contiguous(av)) { 
+
+        /* 
+           We can tolerate forward non-contiguities here (usually due
+           to foreign calls) but treat them as part of our space for
+           stats reporting.
+        */
+        if (old_size != 0) 
+          av->sbrked_mem += brk - old_end;
+        
+        /* Guarantee alignment of first new chunk made from this space */
+
+        front_misalign = (INTERNAL_SIZE_T)chunk2mem(brk) & MALLOC_ALIGN_MASK;
+        if (front_misalign > 0) {
+
+          /*
+            Skip over some bytes to arrive at an aligned position.
+            We don't need to specially mark these wasted front bytes.
+            They will never be accessed anyway because
+            prev_inuse of av->top (and any chunk created from its start)
+            is always true after initialization.
+          */
+
+          correction = MALLOC_ALIGNMENT - front_misalign;
+          aligned_brk += correction;
+        }
+        
+        /*
+          If this isn't adjacent to existing space, then we will not
+          be able to merge with old_top space, so must add to 2nd request.
+        */
+        
+        correction += old_size;
+        
+        /* Extend the end address to hit a page boundary */
+        end_misalign = (INTERNAL_SIZE_T)(brk + size + correction);
+        correction += ((end_misalign + pagemask) & ~pagemask) - end_misalign;
+        
+        assert(correction >= 0);
+        snd_brk = (char*)(MORECORE(correction));
+        
+        if (snd_brk == (char*)(MORECORE_FAILURE)) {
+          /*
+            If can't allocate correction, try to at least find out current
+            brk.  It might be enough to proceed without failing.
+          */
+          correction = 0;
+          snd_brk = (char*)(MORECORE(0));
+        }
+        else if (snd_brk < brk) {
+          /*
+            If the second call gives noncontiguous space even though
+            it says it won't, the only course of action is to ignore
+            results of second call, and conservatively estimate where
+            the first call left us. Also set noncontiguous, so this
+            won't happen again, leaving at most one hole.
+            
+            Note that this check is intrinsically incomplete.  Because
+            MORECORE is allowed to give more space than we ask for,
+            there is no reliable way to detect a noncontiguity
+            producing a forward gap for the second call.
+          */
+          snd_brk = brk + size;
+          correction = 0;
+          set_noncontiguous(av);
+        }
+
+      }
+      
+      /* handle non-contiguous cases */
+      else { 
+        /* MORECORE/mmap must correctly align */
+        assert(aligned_OK(chunk2mem(brk)));
+        
+        /* Find out current end of memory */
+        if (snd_brk == (char*)(MORECORE_FAILURE)) {
+          snd_brk = (char*)(MORECORE(0));
+          av->sbrked_mem += snd_brk - brk - size;
+        }
+      }
+      
+      /* Adjust top based on results of second sbrk */
+      if (snd_brk != (char*)(MORECORE_FAILURE)) {
+        av->top = (mchunkptr)aligned_brk;
+        set_head(av->top, (snd_brk - aligned_brk + correction) | PREV_INUSE);
+        av->sbrked_mem += correction;
+     
+        /*
+          If not the first time through, we either have a
+          gap due to foreign sbrk or a non-contiguous region.  Insert a
+          double fencepost at old_top to prevent consolidation with space
+          we don't own. These fenceposts are artificial chunks that are
+          marked as inuse and are in any case too small to use.  We need
+          two to make sizes and alignments work out.
+        */
+   
+        if (old_size != 0) {
+          /* 
+             Shrink old_top to insert fenceposts, keeping size a
+             multiple of MALLOC_ALIGNMENT. We know there is at least
+             enough space in old_top to do this.
+          */
+          old_size = (old_size - 3*SIZE_SZ) & ~MALLOC_ALIGN_MASK;
+          set_head(old_top, old_size | PREV_INUSE);
+          
+          /*
+            Note that the following assignments completely overwrite
+            old_top when old_size was previously MINSIZE.  This is
+            intentional. We need the fencepost, even if old_top otherwise gets
+            lost.
+          */
+          chunk_at_offset(old_top, old_size          )->size =
+            SIZE_SZ|PREV_INUSE;
+
+          chunk_at_offset(old_top, old_size + SIZE_SZ)->size =
+            SIZE_SZ|PREV_INUSE;
+
+          /* 
+             If possible, release the rest, suppressing trimming.
+          */
+          if (old_size >= MINSIZE) {
+            INTERNAL_SIZE_T tt = av->trim_threshold;
+            av->trim_threshold = (INTERNAL_SIZE_T)(-1);
+            fREe(chunk2mem(old_top));
+            av->trim_threshold = tt;
+          }
+        }
+      }
+    }
+    
+    /* Update statistics */
+    sum = av->sbrked_mem;
+    if (sum > (CHUNK_SIZE_T)(av->max_sbrked_mem))
+      av->max_sbrked_mem = sum;
+    
+    sum += av->mmapped_mem;
+    if (sum > (CHUNK_SIZE_T)(av->max_total_mem))
+      av->max_total_mem = sum;
+
+    check_malloc_state();
+    
+    /* finally, do the allocation */
+
+    p = av->top;
+    size = chunksize(p);
+    
+    /* check that one of the above allocation paths succeeded */
+    if ((CHUNK_SIZE_T)(size) >= (CHUNK_SIZE_T)(nb + MINSIZE)) {
+      remainder_size = size - nb;
+      remainder = chunk_at_offset(p, nb);
+      av->top = remainder;
+      set_head(p, nb | PREV_INUSE);
+      set_head(remainder, remainder_size | PREV_INUSE);
+      check_malloced_chunk(p, nb);
+      return chunk2mem(p);
+    }
+
+  }
+
+  /* catch all failure paths */
+  MALLOC_FAILURE_ACTION;
+  return 0;
+}
+
+
+
+
+/*
+  sYSTRIm is an inverse of sorts to sYSMALLOc.  It gives memory back
+  to the system (via negative arguments to sbrk) if there is unused
+  memory at the `high' end of the malloc pool. It is called
+  automatically by free() when top space exceeds the trim
+  threshold. It is also called by the public malloc_trim routine.  It
+  returns 1 if it actually released any memory, else 0.
+*/
+
+#if __STD_C
+static int sYSTRIm(size_t pad, mstate av)
+#else
+static int sYSTRIm(pad, av) size_t pad; mstate av;
+#endif
+{
+  long  top_size;        /* Amount of top-most memory */
+  long  extra;           /* Amount to release */
+  long  released;        /* Amount actually released */
+  char* current_brk;     /* address returned by pre-check sbrk call */
+  char* new_brk;         /* address returned by post-check sbrk call */
+  size_t pagesz;
+
+  pagesz = av->pagesize;
+  top_size = chunksize(av->top);
+  
+  /* Release in pagesize units, keeping at least one page */
+  extra = ((top_size - pad - MINSIZE + (pagesz-1)) / pagesz - 1) * pagesz;
+  
+  if (extra > 0) {
+    
+    /*
+      Only proceed if end of memory is where we last set it.
+      This avoids problems if there were foreign sbrk calls.
+    */
+    current_brk = (char*)(MORECORE(0));
+    if (current_brk == (char*)(av->top) + top_size) {
+      
+      /*
+        Attempt to release memory. We ignore MORECORE return value,
+        and instead call again to find out where new end of memory is.
+        This avoids problems if first call releases less than we asked,
+        of if failure somehow altered brk value. (We could still
+        encounter problems if it altered brk in some very bad way,
+        but the only thing we can do is adjust anyway, which will cause
+        some downstream failure.)
+      */
+      
+      MORECORE(-extra);
+      new_brk = (char*)(MORECORE(0));
+      
+      if (new_brk != (char*)MORECORE_FAILURE) {
+        released = (long)(current_brk - new_brk);
+        
+        if (released != 0) {
+          /* Success. Adjust top. */
+          av->sbrked_mem -= released;
+          set_head(av->top, (top_size - released) | PREV_INUSE);
+          check_malloc_state();
+          return 1;
+        }
+      }
+    }
+  }
+  return 0;
+}
+
+/*
+  ------------------------------ malloc ------------------------------
+*/
+
+
+#if __STD_C
+Void_t* mALLOc(size_t bytes)
+#else
+  Void_t* mALLOc(bytes) size_t bytes;
+#endif
+{
+  mstate av = get_malloc_state();
+
+  INTERNAL_SIZE_T nb;               /* normalized request size */
+  unsigned int    idx;              /* associated bin index */
+  mbinptr         bin;              /* associated bin */
+  mfastbinptr*    fb;               /* associated fastbin */
+
+  mchunkptr       victim;           /* inspected/selected chunk */
+  INTERNAL_SIZE_T size;             /* its size */
+  int             victim_index;     /* its bin index */
+
+  mchunkptr       remainder;        /* remainder from a split */
+  CHUNK_SIZE_T    remainder_size;   /* its size */
+
+  unsigned int    block;            /* bit map traverser */
+  unsigned int    bit;              /* bit map traverser */
+  unsigned int    map;              /* current word of binmap */
+
+  mchunkptr       fwd;              /* misc temp for linking */
+  mchunkptr       bck;              /* misc temp for linking */
+
+  /*
+    Convert request size to internal form by adding SIZE_SZ bytes
+    overhead plus possibly more to obtain necessary alignment and/or
+    to obtain a size of at least MINSIZE, the smallest allocatable
+    size. Also, checked_request2size traps (returning 0) request sizes
+    that are so large that they wrap around zero when padded and
+    aligned.
+  */
+
+  checked_request2size(bytes, nb);
+
+  /*
+    Bypass search if no frees yet
+   */
+  if (!have_anychunks(av)) {
+    if (av->max_fast == 0) /* initialization check */
+      malloc_consolidate(av);
+    goto use_top;
+  }
+
+  /*
+    If the size qualifies as a fastbin, first check corresponding bin.
+  */
+
+  if ((CHUNK_SIZE_T)(nb) <= (CHUNK_SIZE_T)(av->max_fast)) { 
+    fb = &(av->fastbins[(fastbin_index(nb))]);
+    if ( (victim = *fb) != 0) {
+      *fb = victim->fd;
+      check_remalloced_chunk(victim, nb);
+      return chunk2mem(victim);
+    }
+  }
+
+  /*
+    If a small request, check regular bin.  Since these "smallbins"
+    hold one size each, no searching within bins is necessary.
+    (For a large request, we need to wait until unsorted chunks are
+    processed to find best fit. But for small ones, fits are exact
+    anyway, so we can check now, which is faster.)
+  */
+
+  if (in_smallbin_range(nb)) {
+    idx = smallbin_index(nb);
+    bin = bin_at(av,idx);
+
+    if ( (victim = last(bin)) != bin) {
+      bck = victim->bk;
+      set_inuse_bit_at_offset(victim, nb);
+      bin->bk = bck;
+      bck->fd = bin;
+      
+      check_malloced_chunk(victim, nb);
+      return chunk2mem(victim);
+    }
+  }
+
+  /* 
+     If this is a large request, consolidate fastbins before continuing.
+     While it might look excessive to kill all fastbins before
+     even seeing if there is space available, this avoids
+     fragmentation problems normally associated with fastbins.
+     Also, in practice, programs tend to have runs of either small or
+     large requests, but less often mixtures, so consolidation is not 
+     invoked all that often in most programs. And the programs that
+     it is called frequently in otherwise tend to fragment.
+  */
+
+  else {
+    idx = largebin_index(nb);
+    if (have_fastchunks(av)) 
+      malloc_consolidate(av);
+  }
+
+  /*
+    Process recently freed or remaindered chunks, taking one only if
+    it is exact fit, or, if this a small request, the chunk is remainder from
+    the most recent non-exact fit.  Place other traversed chunks in
+    bins.  Note that this step is the only place in any routine where
+    chunks are placed in bins.
+  */
+    
+  while ( (victim = unsorted_chunks(av)->bk) != unsorted_chunks(av)) {
+    bck = victim->bk;
+    size = chunksize(victim);
+    
+    /* 
+       If a small request, try to use last remainder if it is the
+       only chunk in unsorted bin.  This helps promote locality for
+       runs of consecutive small requests. This is the only
+       exception to best-fit, and applies only when there is
+       no exact fit for a small chunk.
+    */
+    
+    if (in_smallbin_range(nb) && 
+        bck == unsorted_chunks(av) &&
+        victim == av->last_remainder &&
+        (CHUNK_SIZE_T)(size) > (CHUNK_SIZE_T)(nb + MINSIZE)) {
+      
+      /* split and reattach remainder */
+      remainder_size = size - nb;
+      remainder = chunk_at_offset(victim, nb);
+      unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder;
+      av->last_remainder = remainder; 
+      remainder->bk = remainder->fd = unsorted_chunks(av);
+      
+      set_head(victim, nb | PREV_INUSE);
+      set_head(remainder, remainder_size | PREV_INUSE);
+      set_foot(remainder, remainder_size);
+      
+      check_malloced_chunk(victim, nb);
+      return chunk2mem(victim);
+    }
+    
+    /* remove from unsorted list */
+    unsorted_chunks(av)->bk = bck;
+    bck->fd = unsorted_chunks(av);
+    
+    /* Take now instead of binning if exact fit */
+    
+    if (size == nb) {
+      set_inuse_bit_at_offset(victim, size);
+      check_malloced_chunk(victim, nb);
+      return chunk2mem(victim);
+    }
+    
+    /* place chunk in bin */
+    
+    if (in_smallbin_range(size)) {
+      victim_index = smallbin_index(size);
+      bck = bin_at(av, victim_index);
+      fwd = bck->fd;
+    }
+    else {
+      victim_index = largebin_index(size);
+      bck = bin_at(av, victim_index);
+      fwd = bck->fd;
+      
+      if (fwd != bck) {
+        /* if smaller than smallest, place first */
+        if ((CHUNK_SIZE_T)(size) < (CHUNK_SIZE_T)(bck->bk->size)) {
+          fwd = bck;
+          bck = bck->bk;
+        }
+        else if ((CHUNK_SIZE_T)(size) >= 
+                 (CHUNK_SIZE_T)(FIRST_SORTED_BIN_SIZE)) {
+          
+          /* maintain large bins in sorted order */
+          size |= PREV_INUSE; /* Or with inuse bit to speed comparisons */
+          while ((CHUNK_SIZE_T)(size) < (CHUNK_SIZE_T)(fwd->size)) 
+            fwd = fwd->fd;
+          bck = fwd->bk;
+        }
+      }
+    }
+      
+    mark_bin(av, victim_index);
+    victim->bk = bck;
+    victim->fd = fwd;
+    fwd->bk = victim;
+    bck->fd = victim;
+  }
+  
+  /*
+    If a large request, scan through the chunks of current bin to
+    find one that fits.  (This will be the smallest that fits unless
+    FIRST_SORTED_BIN_SIZE has been changed from default.)  This is
+    the only step where an unbounded number of chunks might be
+    scanned without doing anything useful with them. However the
+    lists tend to be short.
+  */
+  
+  if (!in_smallbin_range(nb)) {
+    bin = bin_at(av, idx);
+    
+    for (victim = last(bin); victim != bin; victim = victim->bk) {
+      size = chunksize(victim);
+      
+      if ((CHUNK_SIZE_T)(size) >= (CHUNK_SIZE_T)(nb)) {
+        remainder_size = size - nb;
+        unlink(victim, bck, fwd);
+        
+        /* Exhaust */
+        if (remainder_size < MINSIZE)  {
+          set_inuse_bit_at_offset(victim, size);
+          check_malloced_chunk(victim, nb);
+          return chunk2mem(victim);
+        }
+        /* Split */
+        else {
+          remainder = chunk_at_offset(victim, nb);
+          unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder;
+          remainder->bk = remainder->fd = unsorted_chunks(av);
+          set_head(victim, nb | PREV_INUSE);
+          set_head(remainder, remainder_size | PREV_INUSE);
+          set_foot(remainder, remainder_size);
+          check_malloced_chunk(victim, nb);
+          return chunk2mem(victim);
+        } 
+      }
+    }    
+  }
+
+  /*
+    Search for a chunk by scanning bins, starting with next largest
+    bin. This search is strictly by best-fit; i.e., the smallest
+    (with ties going to approximately the least recently used) chunk
+    that fits is selected.
+    
+    The bitmap avoids needing to check that most blocks are nonempty.
+  */
+    
+  ++idx;
+  bin = bin_at(av,idx);
+  block = idx2block(idx);
+  map = av->binmap[block];
+  bit = idx2bit(idx);
+  
+  for (;;) {
+    
+    /* Skip rest of block if there are no more set bits in this block.  */
+    if (bit > map || bit == 0) {
+      do {
+        if (++block >= BINMAPSIZE)  /* out of bins */
+          goto use_top;
+      } while ( (map = av->binmap[block]) == 0);
+      
+      bin = bin_at(av, (block << BINMAPSHIFT));
+      bit = 1;
+    }
+    
+    /* Advance to bin with set bit. There must be one. */
+    while ((bit & map) == 0) {
+      bin = next_bin(bin);
+      bit <<= 1;
+      assert(bit != 0);
+    }
+    
+    /* Inspect the bin. It is likely to be non-empty */
+    victim = last(bin);
+    
+    /*  If a false alarm (empty bin), clear the bit. */
+    if (victim == bin) {
+      av->binmap[block] = map &= ~bit; /* Write through */
+      bin = next_bin(bin);
+      bit <<= 1;
+    }
+    
+    else {
+      size = chunksize(victim);
+      
+      /*  We know the first chunk in this bin is big enough to use. */
+      assert((CHUNK_SIZE_T)(size) >= (CHUNK_SIZE_T)(nb));
+      
+      remainder_size = size - nb;
+      
+      /* unlink */
+      bck = victim->bk;
+      bin->bk = bck;
+      bck->fd = bin;
+      
+      /* Exhaust */
+      if (remainder_size < MINSIZE) {
+        set_inuse_bit_at_offset(victim, size);
+        check_malloced_chunk(victim, nb);
+        return chunk2mem(victim);
+      }
+      
+      /* Split */
+      else {
+        remainder = chunk_at_offset(victim, nb);
+        
+        unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder;
+        remainder->bk = remainder->fd = unsorted_chunks(av);
+        /* advertise as last remainder */
+        if (in_smallbin_range(nb)) 
+          av->last_remainder = remainder; 
+        
+        set_head(victim, nb | PREV_INUSE);
+        set_head(remainder, remainder_size | PREV_INUSE);
+        set_foot(remainder, remainder_size);
+        check_malloced_chunk(victim, nb);
+        return chunk2mem(victim);
+      }
+    }
+  }
+
+  use_top:    
+  /*
+    If large enough, split off the chunk bordering the end of memory
+    (held in av->top). Note that this is in accord with the best-fit
+    search rule.  In effect, av->top is treated as larger (and thus
+    less well fitting) than any other available chunk since it can
+    be extended to be as large as necessary (up to system
+    limitations).
+    
+    We require that av->top always exists (i.e., has size >=
+    MINSIZE) after initialization, so if it would otherwise be
+    exhuasted by current request, it is replenished. (The main
+    reason for ensuring it exists is that we may need MINSIZE space
+    to put in fenceposts in sysmalloc.)
+  */
+  
+  victim = av->top;
+  size = chunksize(victim);
+  
+  if ((CHUNK_SIZE_T)(size) >= (CHUNK_SIZE_T)(nb + MINSIZE)) {
+    remainder_size = size - nb;
+    remainder = chunk_at_offset(victim, nb);
+    av->top = remainder;
+    set_head(victim, nb | PREV_INUSE);
+    set_head(remainder, remainder_size | PREV_INUSE);
+    
+    check_malloced_chunk(victim, nb);
+    return chunk2mem(victim);
+  }
+  
+  /* 
+     If no space in top, relay to handle system-dependent cases 
+  */
+  return sYSMALLOc(nb, av);    
+}
+
+/*
+  ------------------------------ free ------------------------------
+*/
+
+#if __STD_C
+void fREe(Void_t* mem)
+#else
+void fREe(mem) Void_t* mem;
+#endif
+{
+  mstate av = get_malloc_state();
+
+  mchunkptr       p;           /* chunk corresponding to mem */
+  INTERNAL_SIZE_T size;        /* its size */
+  mfastbinptr*    fb;          /* associated fastbin */
+  mchunkptr       nextchunk;   /* next contiguous chunk */
+  INTERNAL_SIZE_T nextsize;    /* its size */
+  int             nextinuse;   /* true if nextchunk is used */
+  INTERNAL_SIZE_T prevsize;    /* size of previous contiguous chunk */
+  mchunkptr       bck;         /* misc temp for linking */
+  mchunkptr       fwd;         /* misc temp for linking */
+
+  /* free(0) has no effect */
+  if (mem != 0) {
+    p = mem2chunk(mem);
+    size = chunksize(p);
+
+    check_inuse_chunk(p);
+
+    /*
+      If eligible, place chunk on a fastbin so it can be found
+      and used quickly in malloc.
+    */
+
+    if ((CHUNK_SIZE_T)(size) <= (CHUNK_SIZE_T)(av->max_fast)
+
+#if TRIM_FASTBINS
+        /* 
+           If TRIM_FASTBINS set, don't place chunks
+           bordering top into fastbins
+        */
+        && (chunk_at_offset(p, size) != av->top)
+#endif
+        ) {
+
+      set_fastchunks(av);
+      fb = &(av->fastbins[fastbin_index(size)]);
+      p->fd = *fb;
+      *fb = p;
+    }
+
+    /*
+       Consolidate other non-mmapped chunks as they arrive.
+    */
+
+    else if (!chunk_is_mmapped(p)) {
+      set_anychunks(av);
+
+      nextchunk = chunk_at_offset(p, size);
+      nextsize = chunksize(nextchunk);
+
+      /* consolidate backward */
+      if (!prev_inuse(p)) {
+        prevsize = p->prev_size;
+        size += prevsize;
+        p = chunk_at_offset(p, -((long) prevsize));
+        unlink(p, bck, fwd);
+      }
+
+      if (nextchunk != av->top) {
+        /* get and clear inuse bit */
+        nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+        set_head(nextchunk, nextsize);
+
+        /* consolidate forward */
+        if (!nextinuse) {
+          unlink(nextchunk, bck, fwd);
+          size += nextsize;
+        }
+
+        /*
+          Place the chunk in unsorted chunk list. Chunks are
+          not placed into regular bins until after they have
+          been given one chance to be used in malloc.
+        */
+
+        bck = unsorted_chunks(av);
+        fwd = bck->fd;
+        p->bk = bck;
+        p->fd = fwd;
+        bck->fd = p;
+        fwd->bk = p;
+
+        set_head(p, size | PREV_INUSE);
+        set_foot(p, size);
+        
+        check_free_chunk(p);
+      }
+
+      /*
+         If the chunk borders the current high end of memory,
+         consolidate into top
+      */
+
+      else {
+        size += nextsize;
+        set_head(p, size | PREV_INUSE);
+        av->top = p;
+        check_chunk(p);
+      }
+
+      /*
+        If freeing a large space, consolidate possibly-surrounding
+        chunks. Then, if the total unused topmost memory exceeds trim
+        threshold, ask malloc_trim to reduce top.
+
+        Unless max_fast is 0, we don't know if there are fastbins
+        bordering top, so we cannot tell for sure whether threshold
+        has been reached unless fastbins are consolidated.  But we
+        don't want to consolidate on each free.  As a compromise,
+        consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
+        is reached.
+      */
+
+      if ((CHUNK_SIZE_T)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) { 
+        if (have_fastchunks(av)) 
+          malloc_consolidate(av);
+
+#ifndef MORECORE_CANNOT_TRIM        
+        if ((CHUNK_SIZE_T)(chunksize(av->top)) >= 
+            (CHUNK_SIZE_T)(av->trim_threshold))
+          sYSTRIm(av->top_pad, av);
+#endif
+      }
+
+    }
+    /*
+      If the chunk was allocated via mmap, release via munmap()
+      Note that if HAVE_MMAP is false but chunk_is_mmapped is
+      true, then user must have overwritten memory. There's nothing
+      we can do to catch this error unless DEBUG is set, in which case
+      check_inuse_chunk (above) will have triggered error.
+    */
+
+    else {
+#if HAVE_MMAP
+      int ret;
+      INTERNAL_SIZE_T offset = p->prev_size;
+      av->n_mmaps--;
+      av->mmapped_mem -= (size + offset);
+      ret = munmap((char*)p - offset, size + offset);
+      /* munmap returns non-zero on failure */
+      assert(ret == 0);
+#endif
+    }
+  }
+}
+
+/*
+  ------------------------- malloc_consolidate -------------------------
+
+  malloc_consolidate is a specialized version of free() that tears
+  down chunks held in fastbins.  Free itself cannot be used for this
+  purpose since, among other things, it might place chunks back onto
+  fastbins.  So, instead, we need to use a minor variant of the same
+  code.
+  
+  Also, because this routine needs to be called the first time through
+  malloc anyway, it turns out to be the perfect place to trigger
+  initialization code.
+*/
+
+#if __STD_C
+static void malloc_consolidate(mstate av)
+#else
+static void malloc_consolidate(av) mstate av;
+#endif
+{
+  mfastbinptr*    fb;                 /* current fastbin being consolidated */
+  mfastbinptr*    maxfb;              /* last fastbin (for loop control) */
+  mchunkptr       p;                  /* current chunk being consolidated */
+  mchunkptr       nextp;              /* next chunk to consolidate */
+  mchunkptr       unsorted_bin;       /* bin header */
+  mchunkptr       first_unsorted;     /* chunk to link to */
+
+  /* These have same use as in free() */
+  mchunkptr       nextchunk;
+  INTERNAL_SIZE_T size;
+  INTERNAL_SIZE_T nextsize;
+  INTERNAL_SIZE_T prevsize;
+  int             nextinuse;
+  mchunkptr       bck;
+  mchunkptr       fwd;
+
+  /*
+    If max_fast is 0, we know that av hasn't
+    yet been initialized, in which case do so below
+  */
+
+  if (av->max_fast != 0) {
+    clear_fastchunks(av);
+
+    unsorted_bin = unsorted_chunks(av);
+
+    /*
+      Remove each chunk from fast bin and consolidate it, placing it
+      then in unsorted bin. Among other reasons for doing this,
+      placing in unsorted bin avoids needing to calculate actual bins
+      until malloc is sure that chunks aren't immediately going to be
+      reused anyway.
+    */
+    
+    maxfb = &(av->fastbins[fastbin_index(av->max_fast)]);
+    fb = &(av->fastbins[0]);
+    do {
+      if ( (p = *fb) != 0) {
+        *fb = 0;
+        
+        do {
+          check_inuse_chunk(p);
+          nextp = p->fd;
+          
+          /* Slightly streamlined version of consolidation code in free() */
+          size = p->size & ~PREV_INUSE;
+          nextchunk = chunk_at_offset(p, size);
+          nextsize = chunksize(nextchunk);
+          
+          if (!prev_inuse(p)) {
+            prevsize = p->prev_size;
+            size += prevsize;
+            p = chunk_at_offset(p, -((long) prevsize));
+            unlink(p, bck, fwd);
+          }
+          
+          if (nextchunk != av->top) {
+            nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+            set_head(nextchunk, nextsize);
+            
+            if (!nextinuse) {
+              size += nextsize;
+              unlink(nextchunk, bck, fwd);
+            }
+            
+            first_unsorted = unsorted_bin->fd;
+            unsorted_bin->fd = p;
+            first_unsorted->bk = p;
+            
+            set_head(p, size | PREV_INUSE);
+            p->bk = unsorted_bin;
+            p->fd = first_unsorted;
+            set_foot(p, size);
+          }
+          
+          else {
+            size += nextsize;
+            set_head(p, size | PREV_INUSE);
+            av->top = p;
+          }
+          
+        } while ( (p = nextp) != 0);
+        
+      }
+    } while (fb++ != maxfb);
+  }
+  else {
+    malloc_init_state(av);
+    check_malloc_state();
+  }
+}
+
+/*
+  ------------------------------ realloc ------------------------------
+*/
+
+
+#if __STD_C
+Void_t* rEALLOc(Void_t* oldmem, size_t bytes)
+#else
+Void_t* rEALLOc(oldmem, bytes) Void_t* oldmem; size_t bytes;
+#endif
+{
+  mstate av = get_malloc_state();
+
+  INTERNAL_SIZE_T  nb;              /* padded request size */
+
+  mchunkptr        oldp;            /* chunk corresponding to oldmem */
+  INTERNAL_SIZE_T  oldsize;         /* its size */
+
+  mchunkptr        newp;            /* chunk to return */
+  INTERNAL_SIZE_T  newsize;         /* its size */
+  Void_t*          newmem;          /* corresponding user mem */
+
+  mchunkptr        next;            /* next contiguous chunk after oldp */
+
+  mchunkptr        remainder;       /* extra space at end of newp */
+  CHUNK_SIZE_T     remainder_size;  /* its size */
+
+  mchunkptr        bck;             /* misc temp for linking */
+  mchunkptr        fwd;             /* misc temp for linking */
+
+  CHUNK_SIZE_T     copysize;        /* bytes to copy */
+  unsigned int     ncopies;         /* INTERNAL_SIZE_T words to copy */
+  INTERNAL_SIZE_T* s;               /* copy source */ 
+  INTERNAL_SIZE_T* d;               /* copy destination */
+
+
+#ifdef REALLOC_ZERO_BYTES_FREES
+  if (bytes == 0) {
+    fREe(oldmem);
+    return 0;
+  }
+#endif
+
+  /* realloc of null is supposed to be same as malloc */
+  if (oldmem == 0) return mALLOc(bytes);
+
+  checked_request2size(bytes, nb);
+
+  oldp    = mem2chunk(oldmem);
+  oldsize = chunksize(oldp);
+
+  check_inuse_chunk(oldp);
+
+  if (!chunk_is_mmapped(oldp)) {
+
+    if ((CHUNK_SIZE_T)(oldsize) >= (CHUNK_SIZE_T)(nb)) {
+      /* already big enough; split below */
+      newp = oldp;
+      newsize = oldsize;
+    }
+
+    else {
+      next = chunk_at_offset(oldp, oldsize);
+
+      /* Try to expand forward into top */
+      if (next == av->top &&
+          (CHUNK_SIZE_T)(newsize = oldsize + chunksize(next)) >=
+          (CHUNK_SIZE_T)(nb + MINSIZE)) {
+        set_head_size(oldp, nb);
+        av->top = chunk_at_offset(oldp, nb);
+        set_head(av->top, (newsize - nb) | PREV_INUSE);
+        return chunk2mem(oldp);
+      }
+      
+      /* Try to expand forward into next chunk;  split off remainder below */
+      else if (next != av->top && 
+               !inuse(next) &&
+               (CHUNK_SIZE_T)(newsize = oldsize + chunksize(next)) >=
+               (CHUNK_SIZE_T)(nb)) {
+        newp = oldp;
+        unlink(next, bck, fwd);
+      }
+
+      /* allocate, copy, free */
+      else {
+        newmem = mALLOc(nb - MALLOC_ALIGN_MASK);
+        if (newmem == 0)
+          return 0; /* propagate failure */
+      
+        newp = mem2chunk(newmem);
+        newsize = chunksize(newp);
+        
+        /*
+          Avoid copy if newp is next chunk after oldp.
+        */
+        if (newp == next) {
+          newsize += oldsize;
+          newp = oldp;
+        }
+        else {
+          /*
+            Unroll copy of <= 36 bytes (72 if 8byte sizes)
+            We know that contents have an odd number of
+            INTERNAL_SIZE_T-sized words; minimally 3.
+          */
+          
+          copysize = oldsize - SIZE_SZ;
+          s = (INTERNAL_SIZE_T*)(oldmem);
+          d = (INTERNAL_SIZE_T*)(newmem);
+          ncopies = copysize / sizeof(INTERNAL_SIZE_T);
+          assert(ncopies >= 3);
+          
+          if (ncopies > 9)
+            MALLOC_COPY(d, s, copysize);
+          
+          else {
+            *(d+0) = *(s+0);
+            *(d+1) = *(s+1);
+            *(d+2) = *(s+2);
+            if (ncopies > 4) {
+              *(d+3) = *(s+3);
+              *(d+4) = *(s+4);
+              if (ncopies > 6) {
+                *(d+5) = *(s+5);
+                *(d+6) = *(s+6);
+                if (ncopies > 8) {
+                  *(d+7) = *(s+7);
+                  *(d+8) = *(s+8);
+                }
+              }
+            }
+          }
+          
+          fREe(oldmem);
+          check_inuse_chunk(newp);
+          return chunk2mem(newp);
+        }
+      }
+    }
+
+    /* If possible, free extra space in old or extended chunk */
+
+    assert((CHUNK_SIZE_T)(newsize) >= (CHUNK_SIZE_T)(nb));
+
+    remainder_size = newsize - nb;
+
+    if (remainder_size < MINSIZE) { /* not enough extra to split off */
+      set_head_size(newp, newsize);
+      set_inuse_bit_at_offset(newp, newsize);
+    }
+    else { /* split remainder */
+      remainder = chunk_at_offset(newp, nb);
+      set_head_size(newp, nb);
+      set_head(remainder, remainder_size | PREV_INUSE);
+      /* Mark remainder as inuse so free() won't complain */
+      set_inuse_bit_at_offset(remainder, remainder_size);
+      fREe(chunk2mem(remainder)); 
+    }
+
+    check_inuse_chunk(newp);
+    return chunk2mem(newp);
+  }
+
+  /*
+    Handle mmap cases
+  */
+
+  else {
+#if HAVE_MMAP
+
+#if HAVE_MREMAP
+    INTERNAL_SIZE_T offset = oldp->prev_size;
+    size_t pagemask = av->pagesize - 1;
+    char *cp;
+    CHUNK_SIZE_T  sum;
+    
+    /* Note the extra SIZE_SZ overhead */
+    newsize = (nb + offset + SIZE_SZ + pagemask) & ~pagemask;
+
+    /* don't need to remap if still within same page */
+    if (oldsize == newsize - offset) 
+      return oldmem;
+
+    cp = (char*)mremap((char*)oldp - offset, oldsize + offset, newsize, 1);
+    
+    if (cp != (char*)MORECORE_FAILURE) {
+
+      newp = (mchunkptr)(cp + offset);
+      set_head(newp, (newsize - offset)|IS_MMAPPED);
+      
+      assert(aligned_OK(chunk2mem(newp)));
+      assert((newp->prev_size == offset));
+      
+      /* update statistics */
+      sum = av->mmapped_mem += newsize - oldsize;
+      if (sum > (CHUNK_SIZE_T)(av->max_mmapped_mem)) 
+        av->max_mmapped_mem = sum;
+      sum += av->sbrked_mem;
+      if (sum > (CHUNK_SIZE_T)(av->max_total_mem)) 
+        av->max_total_mem = sum;
+      
+      return chunk2mem(newp);
+    }
+#endif
+
+    /* Note the extra SIZE_SZ overhead. */
+    if ((CHUNK_SIZE_T)(oldsize) >= (CHUNK_SIZE_T)(nb + SIZE_SZ)) 
+      newmem = oldmem; /* do nothing */
+    else {
+      /* Must alloc, copy, free. */
+      newmem = mALLOc(nb - MALLOC_ALIGN_MASK);
+      if (newmem != 0) {
+        MALLOC_COPY(newmem, oldmem, oldsize - 2*SIZE_SZ);
+        fREe(oldmem);
+      }
+    }
+    return newmem;
+
+#else 
+    /* If !HAVE_MMAP, but chunk_is_mmapped, user must have overwritten mem */
+    check_malloc_state();
+    MALLOC_FAILURE_ACTION;
+    return 0;
+#endif
+  }
+}
+
+/*
+  ------------------------------ memalign ------------------------------
+*/
+
+#if __STD_C
+Void_t* mEMALIGn(size_t alignment, size_t bytes)
+#else
+Void_t* mEMALIGn(alignment, bytes) size_t alignment; size_t bytes;
+#endif
+{
+  INTERNAL_SIZE_T nb;             /* padded  request size */
+  char*           m;              /* memory returned by malloc call */
+  mchunkptr       p;              /* corresponding chunk */
+  char*           brk;            /* alignment point within p */
+  mchunkptr       newp;           /* chunk to return */
+  INTERNAL_SIZE_T newsize;        /* its size */
+  INTERNAL_SIZE_T leadsize;       /* leading space before alignment point */
+  mchunkptr       remainder;      /* spare room at end to split off */
+  CHUNK_SIZE_T    remainder_size; /* its size */
+  INTERNAL_SIZE_T size;
+
+  /* If need less alignment than we give anyway, just relay to malloc */
+
+  if (alignment <= MALLOC_ALIGNMENT) return mALLOc(bytes);
+
+  /* Otherwise, ensure that it is at least a minimum chunk size */
+
+  if (alignment <  MINSIZE) alignment = MINSIZE;
+
+  /* Make sure alignment is power of 2 (in case MINSIZE is not).  */
+  if ((alignment & (alignment - 1)) != 0) {
+    size_t a = MALLOC_ALIGNMENT * 2;
+    while ((CHUNK_SIZE_T)a < (CHUNK_SIZE_T)alignment) a <<= 1;
+    alignment = a;
+  }
+
+  checked_request2size(bytes, nb);
+
+  /*
+    Strategy: find a spot within that chunk that meets the alignment
+    request, and then possibly free the leading and trailing space.
+  */
+
+
+  /* Call malloc with worst case padding to hit alignment. */
+
+  m  = (char*)(mALLOc(nb + alignment + MINSIZE));
+
+  if (m == 0) return 0; /* propagate failure */
+
+  p = mem2chunk(m);
+
+  if ((((PTR_UINT)(m)) % alignment) != 0) { /* misaligned */
+
+    /*
+      Find an aligned spot inside chunk.  Since we need to give back
+      leading space in a chunk of at least MINSIZE, if the first
+      calculation places us at a spot with less than MINSIZE leader,
+      we can move to the next aligned spot -- we've allocated enough
+      total room so that this is always possible.
+    */
+
+    brk = (char*)mem2chunk((PTR_UINT)(((PTR_UINT)(m + alignment - 1)) &
+                           -((signed long) alignment)));
+    if ((CHUNK_SIZE_T)(brk - (char*)(p)) < MINSIZE)
+      brk += alignment;
+
+    newp = (mchunkptr)brk;
+    leadsize = brk - (char*)(p);
+    newsize = chunksize(p) - leadsize;
+
+    /* For mmapped chunks, just adjust offset */
+    if (chunk_is_mmapped(p)) {
+      newp->prev_size = p->prev_size + leadsize;
+      set_head(newp, newsize|IS_MMAPPED);
+      return chunk2mem(newp);
+    }
+
+    /* Otherwise, give back leader, use the rest */
+    set_head(newp, newsize | PREV_INUSE);
+    set_inuse_bit_at_offset(newp, newsize);
+    set_head_size(p, leadsize);
+    fREe(chunk2mem(p));
+    p = newp;
+
+    assert (newsize >= nb &&
+            (((PTR_UINT)(chunk2mem(p))) % alignment) == 0);
+  }
+
+  /* Also give back spare room at the end */
+  if (!chunk_is_mmapped(p)) {
+    size = chunksize(p);
+    if ((CHUNK_SIZE_T)(size) > (CHUNK_SIZE_T)(nb + MINSIZE)) {
+      remainder_size = size - nb;
+      remainder = chunk_at_offset(p, nb);
+      set_head(remainder, remainder_size | PREV_INUSE);
+      set_head_size(p, nb);
+      fREe(chunk2mem(remainder));
+    }
+  }
+
+  check_inuse_chunk(p);
+  return chunk2mem(p);
+}
+
+/*
+  ------------------------------ calloc ------------------------------
+*/
+
+#if __STD_C
+Void_t* cALLOc(size_t n_elements, size_t elem_size)
+#else
+Void_t* cALLOc(n_elements, elem_size) size_t n_elements; size_t elem_size;
+#endif
+{
+  mchunkptr p;
+  CHUNK_SIZE_T  clearsize;
+  CHUNK_SIZE_T  nclears;
+  INTERNAL_SIZE_T* d;
+
+  Void_t* mem = mALLOc(n_elements * elem_size);
+
+  if (mem != 0) {
+    p = mem2chunk(mem);
+
+    if (!chunk_is_mmapped(p))
+    {  
+      /*
+        Unroll clear of <= 36 bytes (72 if 8byte sizes)
+        We know that contents have an odd number of
+        INTERNAL_SIZE_T-sized words; minimally 3.
+      */
+
+      d = (INTERNAL_SIZE_T*)mem;
+      clearsize = chunksize(p) - SIZE_SZ;
+      nclears = clearsize / sizeof(INTERNAL_SIZE_T);
+      assert(nclears >= 3);
+
+      if (nclears > 9)
+        MALLOC_ZERO(d, clearsize);
+
+      else {
+        *(d+0) = 0;
+        *(d+1) = 0;
+        *(d+2) = 0;
+        if (nclears > 4) {
+          *(d+3) = 0;
+          *(d+4) = 0;
+          if (nclears > 6) {
+            *(d+5) = 0;
+            *(d+6) = 0;
+            if (nclears > 8) {
+              *(d+7) = 0;
+              *(d+8) = 0;
+            }
+          }
+        }
+      }
+    }
+#if ! MMAP_CLEARS
+    else
+    {
+      d = (INTERNAL_SIZE_T*)mem;
+      /*
+        Note the additional SIZE_SZ
+      */
+      clearsize = chunksize(p) - 2*SIZE_SZ;
+      MALLOC_ZERO(d, clearsize);
+    }
+#endif
+  }
+  return mem;
+}
+
+/*
+  ------------------------------ cfree ------------------------------
+*/
+
+#if __STD_C
+void cFREe(Void_t *mem)
+#else
+void cFREe(mem) Void_t *mem;
+#endif
+{
+  fREe(mem);
+}
+
+/*
+  ------------------------- independent_calloc -------------------------
+*/
+
+#if __STD_C
+Void_t** iCALLOc(size_t n_elements, size_t elem_size, Void_t* chunks[])
+#else
+Void_t** iCALLOc(n_elements, elem_size, chunks) size_t n_elements; size_t elem_size; Void_t* chunks[];
+#endif
+{
+  size_t sz = elem_size; /* serves as 1-element array */
+  /* opts arg of 3 means all elements are same size, and should be cleared */
+  return iALLOc(n_elements, &sz, 3, chunks);
+}
+
+/*
+  ------------------------- independent_comalloc -------------------------
+*/
+
+#if __STD_C
+Void_t** iCOMALLOc(size_t n_elements, size_t sizes[], Void_t* chunks[])
+#else
+Void_t** iCOMALLOc(n_elements, sizes, chunks) size_t n_elements; size_t sizes[]; Void_t* chunks[];
+#endif
+{
+  return iALLOc(n_elements, sizes, 0, chunks);
+}
+
+
+/*
+  ------------------------------ ialloc ------------------------------
+  ialloc provides common support for independent_X routines, handling all of
+  the combinations that can result.
+
+  The opts arg has:
+    bit 0 set if all elements are same size (using sizes[0])
+    bit 1 set if elements should be zeroed
+*/
+
+
+#if __STD_C
+static Void_t** iALLOc(size_t n_elements, 
+                       size_t* sizes,  
+                       int opts,
+                       Void_t* chunks[])
+#else
+static Void_t** iALLOc(n_elements, sizes, opts, chunks) size_t n_elements; size_t* sizes; int opts; Void_t* chunks[];
+#endif
+{
+  mstate av = get_malloc_state();
+  INTERNAL_SIZE_T element_size;   /* chunksize of each element, if all same */
+  INTERNAL_SIZE_T contents_size;  /* total size of elements */
+  INTERNAL_SIZE_T array_size;     /* request size of pointer array */
+  Void_t*         mem;            /* malloced aggregate space */
+  mchunkptr       p;              /* corresponding chunk */
+  INTERNAL_SIZE_T remainder_size; /* remaining bytes while splitting */
+  Void_t**        marray;         /* either "chunks" or malloced ptr array */
+  mchunkptr       array_chunk;    /* chunk for malloced ptr array */
+  int             mmx;            /* to disable mmap */
+  INTERNAL_SIZE_T size;           
+  size_t          i;
+
+  /* Ensure initialization */
+  if (av->max_fast == 0) malloc_consolidate(av);
+
+  /* compute array length, if needed */
+  if (chunks != 0) {
+    if (n_elements == 0)
+      return chunks; /* nothing to do */
+    marray = chunks;
+    array_size = 0;
+  }
+  else {
+    /* if empty req, must still return chunk representing empty array */
+    if (n_elements == 0) 
+      return (Void_t**) mALLOc(0);
+    marray = 0;
+    array_size = request2size(n_elements * (sizeof(Void_t*)));
+  }
+
+  /* compute total element size */
+  if (opts & 0x1) { /* all-same-size */
+    element_size = request2size(*sizes);
+    contents_size = n_elements * element_size;
+  }
+  else { /* add up all the sizes */
+    element_size = 0;
+    contents_size = 0;
+    for (i = 0; i != n_elements; ++i) 
+      contents_size += request2size(sizes[i]);     
+  }
+
+  /* subtract out alignment bytes from total to minimize overallocation */
+  size = contents_size + array_size - MALLOC_ALIGN_MASK;
+  
+  /* 
+     Allocate the aggregate chunk.
+     But first disable mmap so malloc won't use it, since
+     we would not be able to later free/realloc space internal
+     to a segregated mmap region.
+ */
+  mmx = av->n_mmaps_max;   /* disable mmap */
+  av->n_mmaps_max = 0;
+  mem = mALLOc(size);
+  av->n_mmaps_max = mmx;   /* reset mmap */
+  if (mem == 0) 
+    return 0;
+
+  p = mem2chunk(mem);
+  assert(!chunk_is_mmapped(p)); 
+  remainder_size = chunksize(p);
+
+  if (opts & 0x2) {       /* optionally clear the elements */
+    MALLOC_ZERO(mem, remainder_size - SIZE_SZ - array_size);
+  }
+
+  /* If not provided, allocate the pointer array as final part of chunk */
+  if (marray == 0) {
+    array_chunk = chunk_at_offset(p, contents_size);
+    marray = (Void_t**) (chunk2mem(array_chunk));
+    set_head(array_chunk, (remainder_size - contents_size) | PREV_INUSE);
+    remainder_size = contents_size;
+  }
+
+  /* split out elements */
+  for (i = 0; ; ++i) {
+    marray[i] = chunk2mem(p);
+    if (i != n_elements-1) {
+      if (element_size != 0) 
+        size = element_size;
+      else
+        size = request2size(sizes[i]);          
+      remainder_size -= size;
+      set_head(p, size | PREV_INUSE);
+      p = chunk_at_offset(p, size);
+    }
+    else { /* the final element absorbs any overallocation slop */
+      set_head(p, remainder_size | PREV_INUSE);
+      break;
+    }
+  }
+
+#if DEBUG
+  if (marray != chunks) {
+    /* final element must have exactly exhausted chunk */
+    if (element_size != 0) 
+      assert(remainder_size == element_size);
+    else
+      assert(remainder_size == request2size(sizes[i]));
+    check_inuse_chunk(mem2chunk(marray));
+  }
+
+  for (i = 0; i != n_elements; ++i)
+    check_inuse_chunk(mem2chunk(marray[i]));
+#endif
+
+  return marray;
+}
+
+
+/*
+  ------------------------------ valloc ------------------------------
+*/
+
+#if __STD_C
+Void_t* vALLOc(size_t bytes)
+#else
+Void_t* vALLOc(bytes) size_t bytes;
+#endif
+{
+  /* Ensure initialization */
+  mstate av = get_malloc_state();
+  if (av->max_fast == 0) malloc_consolidate(av);
+  return mEMALIGn(av->pagesize, bytes);
+}
+
+/*
+  ------------------------------ pvalloc ------------------------------
+*/
+
+
+#if __STD_C
+Void_t* pVALLOc(size_t bytes)
+#else
+Void_t* pVALLOc(bytes) size_t bytes;
+#endif
+{
+  mstate av = get_malloc_state();
+  size_t pagesz;
+
+  /* Ensure initialization */
+  if (av->max_fast == 0) malloc_consolidate(av);
+  pagesz = av->pagesize;
+  return mEMALIGn(pagesz, (bytes + pagesz - 1) & ~(pagesz - 1));
+}
+   
+
+/*
+  ------------------------------ malloc_trim ------------------------------
+*/
+
+#if __STD_C
+int mTRIm(size_t pad)
+#else
+int mTRIm(pad) size_t pad;
+#endif
+{
+  mstate av = get_malloc_state();
+  /* Ensure initialization/consolidation */
+  malloc_consolidate(av);
+
+#ifndef MORECORE_CANNOT_TRIM        
+  return sYSTRIm(pad, av);
+#else
+  return 0;
+#endif
+}
+
+
+/*
+  ------------------------- malloc_usable_size -------------------------
+*/
+
+#if __STD_C
+size_t mUSABLe(Void_t* mem)
+#else
+size_t mUSABLe(mem) Void_t* mem;
+#endif
+{
+  mchunkptr p;
+  if (mem != 0) {
+    p = mem2chunk(mem);
+    if (chunk_is_mmapped(p))
+      return chunksize(p) - 2*SIZE_SZ;
+    else if (inuse(p))
+      return chunksize(p) - SIZE_SZ;
+  }
+  return 0;
+}
+
+/*
+  ------------------------------ mallinfo ------------------------------
+*/
+
+struct mallinfo mALLINFo()
+{
+  mstate av = get_malloc_state();
+  struct mallinfo mi;
+  int i;
+  mbinptr b;
+  mchunkptr p;
+  INTERNAL_SIZE_T avail;
+  INTERNAL_SIZE_T fastavail;
+  int nblocks;
+  int nfastblocks;
+
+  /* Ensure initialization */
+  if (av->top == 0)  malloc_consolidate(av);
+
+  check_malloc_state();
+
+  /* Account for top */
+  avail = chunksize(av->top);
+  nblocks = 1;  /* top always exists */
+
+  /* traverse fastbins */
+  nfastblocks = 0;
+  fastavail = 0;
+
+  for (i = 0; i < NFASTBINS; ++i) {
+    for (p = av->fastbins[i]; p != 0; p = p->fd) {
+      ++nfastblocks;
+      fastavail += chunksize(p);
+    }
+  }
+
+  avail += fastavail;
+
+  /* traverse regular bins */
+  for (i = 1; i < NBINS; ++i) {
+    b = bin_at(av, i);
+    for (p = last(b); p != b; p = p->bk) {
+      ++nblocks;
+      avail += chunksize(p);
+    }
+  }
+
+  mi.smblks = nfastblocks;
+  mi.ordblks = nblocks;
+  mi.fordblks = avail;
+  mi.uordblks = av->sbrked_mem - avail;
+  mi.arena = av->sbrked_mem;
+  mi.hblks = av->n_mmaps;
+  mi.hblkhd = av->mmapped_mem;
+  mi.fsmblks = fastavail;
+  mi.keepcost = chunksize(av->top);
+  mi.usmblks = av->max_total_mem;
+  return mi;
+}
+
+/*
+  ------------------------------ malloc_stats ------------------------------
+*/
+
+void mSTATs()
+{
+  struct mallinfo mi = mALLINFo();
+
+#ifdef WIN32
+  {
+    CHUNK_SIZE_T  free, reserved, committed;
+    vminfo (&free, &reserved, &committed);
+    fprintf(stderr, "free bytes       = %10lu\n", 
+            free);
+    fprintf(stderr, "reserved bytes   = %10lu\n", 
+            reserved);
+    fprintf(stderr, "committed bytes  = %10lu\n", 
+            committed);
+  }
+#endif
+
+/* RN XXX  */
+  printf("max system bytes = %10lu\n",
+          (CHUNK_SIZE_T)(mi.usmblks));
+  printf("system bytes     = %10lu\n",
+          (CHUNK_SIZE_T)(mi.arena + mi.hblkhd));
+  printf("in use bytes     = %10lu\n",
+          (CHUNK_SIZE_T)(mi.uordblks + mi.hblkhd));
+
+#ifdef WIN32 
+  {
+    CHUNK_SIZE_T  kernel, user;
+    if (cpuinfo (TRUE, &kernel, &user)) {
+      fprintf(stderr, "kernel ms        = %10lu\n", 
+              kernel);
+      fprintf(stderr, "user ms          = %10lu\n", 
+              user);
+    }
+  }
+#endif
+}
+
+
+/*
+  ------------------------------ mallopt ------------------------------
+*/
+
+#if __STD_C
+int mALLOPt(int param_number, int value)
+#else
+int mALLOPt(param_number, value) int param_number; int value;
+#endif
+{
+  mstate av = get_malloc_state();
+  /* Ensure initialization/consolidation */
+  malloc_consolidate(av);
+
+  switch(param_number) {
+  case M_MXFAST:
+    if (value >= 0 && value <= MAX_FAST_SIZE) {
+      set_max_fast(av, value);
+      return 1;
+    }
+    else
+      return 0;
+
+  case M_TRIM_THRESHOLD:
+    av->trim_threshold = value;
+    return 1;
+
+  case M_TOP_PAD:
+    av->top_pad = value;
+    return 1;
+
+  case M_MMAP_THRESHOLD:
+    av->mmap_threshold = value;
+    return 1;
+
+  case M_MMAP_MAX:
+#if !HAVE_MMAP
+    if (value != 0)
+      return 0;
+#endif
+    av->n_mmaps_max = value;
+    return 1;
+
+  default:
+    return 0;
+  }
+}
+
+
+/* 
+  -------------------- Alternative MORECORE functions --------------------
+*/
+
+
+/*
+  General Requirements for MORECORE.
+
+  The MORECORE function must have the following properties:
+
+  If MORECORE_CONTIGUOUS is false:
+
+    * MORECORE must allocate in multiples of pagesize. It will
+      only be called with arguments that are multiples of pagesize.
+
+    * MORECORE(0) must return an address that is at least 
+      MALLOC_ALIGNMENT aligned. (Page-aligning always suffices.)
+
+  else (i.e. If MORECORE_CONTIGUOUS is true):
+
+    * Consecutive calls to MORECORE with positive arguments
+      return increasing addresses, indicating that space has been
+      contiguously extended. 
+
+    * MORECORE need not allocate in multiples of pagesize.
+      Calls to MORECORE need not have args of multiples of pagesize.
+
+    * MORECORE need not page-align.
+
+  In either case:
+
+    * MORECORE may allocate more memory than requested. (Or even less,
+      but this will generally result in a malloc failure.)
+
+    * MORECORE must not allocate memory when given argument zero, but
+      instead return one past the end address of memory from previous
+      nonzero call. This malloc does NOT call MORECORE(0)
+      until at least one call with positive arguments is made, so
+      the initial value returned is not important.
+
+    * Even though consecutive calls to MORECORE need not return contiguous
+      addresses, it must be OK for malloc'ed chunks to span multiple
+      regions in those cases where they do happen to be contiguous.
+
+    * MORECORE need not handle negative arguments -- it may instead
+      just return MORECORE_FAILURE when given negative arguments.
+      Negative arguments are always multiples of pagesize. MORECORE
+      must not misinterpret negative args as large positive unsigned
+      args. You can suppress all such calls from even occurring by defining
+      MORECORE_CANNOT_TRIM,
+
+  There is some variation across systems about the type of the
+  argument to sbrk/MORECORE. If size_t is unsigned, then it cannot
+  actually be size_t, because sbrk supports negative args, so it is
+  normally the signed type of the same width as size_t (sometimes
+  declared as "intptr_t", and sometimes "ptrdiff_t").  It doesn't much
+  matter though. Internally, we use "long" as arguments, which should
+  work across all reasonable possibilities.
+
+  Additionally, if MORECORE ever returns failure for a positive
+  request, and HAVE_MMAP is true, then mmap is used as a noncontiguous
+  system allocator. This is a useful backup strategy for systems with
+  holes in address spaces -- in this case sbrk cannot contiguously
+  expand the heap, but mmap may be able to map noncontiguous space.
+
+  If you'd like mmap to ALWAYS be used, you can define MORECORE to be
+  a function that always returns MORECORE_FAILURE.
+
+  Malloc only has limited ability to detect failures of MORECORE
+  to supply contiguous space when it says it can. In particular,
+  multithreaded programs that do not use locks may result in
+  rece conditions across calls to MORECORE that result in gaps
+  that cannot be detected as such, and subsequent corruption.
+
+  If you are using this malloc with something other than sbrk (or its
+  emulation) to supply memory regions, you probably want to set
+  MORECORE_CONTIGUOUS as false.  As an example, here is a custom
+  allocator kindly contributed for pre-OSX macOS.  It uses virtually
+  but not necessarily physically contiguous non-paged memory (locked
+  in, present and won't get swapped out).  You can use it by
+  uncommenting this section, adding some #includes, and setting up the
+  appropriate defines above:
+
+      #define MORECORE osMoreCore
+      #define MORECORE_CONTIGUOUS 0
+
+  There is also a shutdown routine that should somehow be called for
+  cleanup upon program exit.
+
+  #define MAX_POOL_ENTRIES 100
+  #define MINIMUM_MORECORE_SIZE  (64 * 1024)
+  static int next_os_pool;
+  void *our_os_pools[MAX_POOL_ENTRIES];
+
+  void *osMoreCore(int size)
+  {
+    void *ptr = 0;
+    static void *sbrk_top = 0;
+
+    if (size > 0)
+    {
+      if (size < MINIMUM_MORECORE_SIZE)
+         size = MINIMUM_MORECORE_SIZE;
+      if (CurrentExecutionLevel() == kTaskLevel)
+         ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
+      if (ptr == 0)
+      {
+        return (void *) MORECORE_FAILURE;
+      }
+      // save ptrs so they can be freed during cleanup
+      our_os_pools[next_os_pool] = ptr;
+      next_os_pool++;
+      ptr = (void *) ((((CHUNK_SIZE_T) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
+      sbrk_top = (char *) ptr + size;
+      return ptr;
+    }
+    else if (size < 0)
+    {
+      // we don't currently support shrink behavior
+      return (void *) MORECORE_FAILURE;
+    }
+    else
+    {
+      return sbrk_top;
+    }
+  }
+
+  // cleanup any allocated memory pools
+  // called as last thing before shutting down driver
+
+  void osCleanupMem(void)
+  {
+    void **ptr;
+
+    for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
+      if (*ptr)
+      {
+         PoolDeallocate(*ptr);
+         *ptr = 0;
+      }
+  }
+
+*/
+
+
+/* 
+  -------------------------------------------------------------- 
+
+  Emulation of sbrk for win32. 
+  Donated by J. Walter <Walter@GeNeSys-e.de>.
+  For additional information about this code, and malloc on Win32, see 
+     http://www.genesys-e.de/jwalter/
+*/
+
+
+#ifdef WIN32
+
+#ifdef _DEBUG
+/* #define TRACE */
+#endif
+
+/* Support for USE_MALLOC_LOCK */
+#ifdef USE_MALLOC_LOCK
+
+/* Wait for spin lock */
+static int slwait (int *sl) {
+    while (InterlockedCompareExchange ((void **) sl, (void *) 1, (void *) 0) != 0) 
+	    Sleep (0);
+    return 0;
+}
+
+/* Release spin lock */
+static int slrelease (int *sl) {
+    InterlockedExchange (sl, 0);
+    return 0;
+}
+
+#ifdef NEEDED
+/* Spin lock for emulation code */
+static int g_sl;
+#endif
+
+#endif /* USE_MALLOC_LOCK */
+
+/* getpagesize for windows */
+static long getpagesize (void) {
+    static long g_pagesize = 0;
+    if (! g_pagesize) {
+        SYSTEM_INFO system_info;
+        GetSystemInfo (&system_info);
+        g_pagesize = system_info.dwPageSize;
+    }
+    return g_pagesize;
+}
+static long getregionsize (void) {
+    static long g_regionsize = 0;
+    if (! g_regionsize) {
+        SYSTEM_INFO system_info;
+        GetSystemInfo (&system_info);
+        g_regionsize = system_info.dwAllocationGranularity;
+    }
+    return g_regionsize;
+}
+
+/* A region list entry */
+typedef struct _region_list_entry {
+    void *top_allocated;
+    void *top_committed;
+    void *top_reserved;
+    long reserve_size;
+    struct _region_list_entry *previous;
+} region_list_entry;
+
+/* Allocate and link a region entry in the region list */
+static int region_list_append (region_list_entry **last, void *base_reserved, long reserve_size) {
+    region_list_entry *next = HeapAlloc (GetProcessHeap (), 0, sizeof (region_list_entry));
+    if (! next)
+        return FALSE;
+    next->top_allocated = (char *) base_reserved;
+    next->top_committed = (char *) base_reserved;
+    next->top_reserved = (char *) base_reserved + reserve_size;
+    next->reserve_size = reserve_size;
+    next->previous = *last;
+    *last = next;
+    return TRUE;
+}
+/* Free and unlink the last region entry from the region list */
+static int region_list_remove (region_list_entry **last) {
+    region_list_entry *previous = (*last)->previous;
+    if (! HeapFree (GetProcessHeap (), sizeof (region_list_entry), *last))
+        return FALSE;
+    *last = previous;
+    return TRUE;
+}
+
+#define CEIL(size,to)	(((size)+(to)-1)&~((to)-1))
+#define FLOOR(size,to)	((size)&~((to)-1))
+
+#define SBRK_SCALE  0
+/* #define SBRK_SCALE  1 */
+/* #define SBRK_SCALE  2 */
+/* #define SBRK_SCALE  4  */
+
+/* sbrk for windows */
+static void *sbrk (long size) {
+    static long g_pagesize, g_my_pagesize;
+    static long g_regionsize, g_my_regionsize;
+    static region_list_entry *g_last;
+    void *result = (void *) MORECORE_FAILURE;
+#ifdef TRACE
+    printf ("sbrk %d\n", size);
+#endif
+#if defined (USE_MALLOC_LOCK) && defined (NEEDED)
+    /* Wait for spin lock */
+    slwait (&g_sl);
+#endif
+    /* First time initialization */
+    if (! g_pagesize) {
+        g_pagesize = getpagesize ();
+        g_my_pagesize = g_pagesize << SBRK_SCALE;
+    }
+    if (! g_regionsize) {
+        g_regionsize = getregionsize ();
+        g_my_regionsize = g_regionsize << SBRK_SCALE;
+    }
+    if (! g_last) {
+        if (! region_list_append (&g_last, 0, 0)) 
+           goto sbrk_exit;
+    }
+    /* Assert invariants */
+    assert (g_last);
+    assert ((char *) g_last->top_reserved - g_last->reserve_size <= (char *) g_last->top_allocated &&
+            g_last->top_allocated <= g_last->top_committed);
+    assert ((char *) g_last->top_reserved - g_last->reserve_size <= (char *) g_last->top_committed &&
+            g_last->top_committed <= g_last->top_reserved &&
+            (unsigned) g_last->top_committed % g_pagesize == 0);
+    assert ((unsigned) g_last->top_reserved % g_regionsize == 0);
+    assert ((unsigned) g_last->reserve_size % g_regionsize == 0);
+    /* Allocation requested? */
+    if (size >= 0) {
+        /* Allocation size is the requested size */
+        long allocate_size = size;
+        /* Compute the size to commit */
+        long to_commit = (char *) g_last->top_allocated + allocate_size - (char *) g_last->top_committed;
+        /* Do we reach the commit limit? */
+        if (to_commit > 0) {
+            /* Round size to commit */
+            long commit_size = CEIL (to_commit, g_my_pagesize);
+            /* Compute the size to reserve */
+            long to_reserve = (char *) g_last->top_committed + commit_size - (char *) g_last->top_reserved;
+            /* Do we reach the reserve limit? */
+            if (to_reserve > 0) {
+                /* Compute the remaining size to commit in the current region */
+                long remaining_commit_size = (char *) g_last->top_reserved - (char *) g_last->top_committed;
+                if (remaining_commit_size > 0) {
+                    /* Assert preconditions */
+                    assert ((unsigned) g_last->top_committed % g_pagesize == 0);
+                    assert (0 < remaining_commit_size && remaining_commit_size % g_pagesize == 0); {
+                        /* Commit this */
+                        void *base_committed = VirtualAlloc (g_last->top_committed, remaining_commit_size,
+							                                 MEM_COMMIT, PAGE_READWRITE);
+                        /* Check returned pointer for consistency */
+                        if (base_committed != g_last->top_committed)
+                            goto sbrk_exit;
+                        /* Assert postconditions */
+                        assert ((unsigned) base_committed % g_pagesize == 0);
+#ifdef TRACE
+                        printf ("Commit %p %d\n", base_committed, remaining_commit_size);
+#endif
+                        /* Adjust the regions commit top */
+                        g_last->top_committed = (char *) base_committed + remaining_commit_size;
+                    }
+                } {
+                    /* Now we are going to search and reserve. */
+                    int contiguous = -1;
+                    int found = FALSE;
+                    MEMORY_BASIC_INFORMATION memory_info;
+                    void *base_reserved;
+                    long reserve_size;
+                    do {
+                        /* Assume contiguous memory */
+                        contiguous = TRUE;
+                        /* Round size to reserve */
+                        reserve_size = CEIL (to_reserve, g_my_regionsize);
+                        /* Start with the current region's top */
+                        memory_info.BaseAddress = g_last->top_reserved;
+                        /* Assert preconditions */
+                        assert ((unsigned) memory_info.BaseAddress % g_pagesize == 0);
+                        assert (0 < reserve_size && reserve_size % g_regionsize == 0);
+                        while (VirtualQuery (memory_info.BaseAddress, &memory_info, sizeof (memory_info))) {
+                            /* Assert postconditions */
+                            assert ((unsigned) memory_info.BaseAddress % g_pagesize == 0);
+#ifdef TRACE
+                            printf ("Query %p %d %s\n", memory_info.BaseAddress, memory_info.RegionSize, 
+                                    memory_info.State == MEM_FREE ? "FREE": 
+                                    (memory_info.State == MEM_RESERVE ? "RESERVED":
+                                     (memory_info.State == MEM_COMMIT ? "COMMITTED": "?")));
+#endif
+                            /* Region is free, well aligned and big enough: we are done */
+                            if (memory_info.State == MEM_FREE &&
+                                (unsigned) memory_info.BaseAddress % g_regionsize == 0 &&
+                                memory_info.RegionSize >= (unsigned) reserve_size) {
+                                found = TRUE;
+                                break;
+                            }
+                            /* From now on we can't get contiguous memory! */
+                            contiguous = FALSE;
+                            /* Recompute size to reserve */
+                            reserve_size = CEIL (allocate_size, g_my_regionsize);
+                            memory_info.BaseAddress = (char *) memory_info.BaseAddress + memory_info.RegionSize;
+                            /* Assert preconditions */
+                            assert ((unsigned) memory_info.BaseAddress % g_pagesize == 0);
+                            assert (0 < reserve_size && reserve_size % g_regionsize == 0);
+                        }
+                        /* Search failed? */
+                        if (! found) 
+                            goto sbrk_exit;
+                        /* Assert preconditions */
+                        assert ((unsigned) memory_info.BaseAddress % g_regionsize == 0);
+                        assert (0 < reserve_size && reserve_size % g_regionsize == 0);
+                        /* Try to reserve this */
+                        base_reserved = VirtualAlloc (memory_info.BaseAddress, reserve_size, 
+					                                  MEM_RESERVE, PAGE_NOACCESS);
+                        if (! base_reserved) {
+                            int rc = GetLastError ();
+                            if (rc != ERROR_INVALID_ADDRESS) 
+                                goto sbrk_exit;
+                        }
+                        /* A null pointer signals (hopefully) a race condition with another thread. */
+                        /* In this case, we try again. */
+                    } while (! base_reserved);
+                    /* Check returned pointer for consistency */
+                    if (memory_info.BaseAddress && base_reserved != memory_info.BaseAddress)
+                        goto sbrk_exit;
+                    /* Assert postconditions */
+                    assert ((unsigned) base_reserved % g_regionsize == 0);
+#ifdef TRACE
+                    printf ("Reserve %p %d\n", base_reserved, reserve_size);
+#endif
+                    /* Did we get contiguous memory? */
+                    if (contiguous) {
+                        long start_size = (char *) g_last->top_committed - (char *) g_last->top_allocated;
+                        /* Adjust allocation size */
+                        allocate_size -= start_size;
+                        /* Adjust the regions allocation top */
+                        g_last->top_allocated = g_last->top_committed;
+                        /* Recompute the size to commit */
+                        to_commit = (char *) g_last->top_allocated + allocate_size - (char *) g_last->top_committed;
+                        /* Round size to commit */
+                        commit_size = CEIL (to_commit, g_my_pagesize);
+                    } 
+                    /* Append the new region to the list */
+                    if (! region_list_append (&g_last, base_reserved, reserve_size))
+                        goto sbrk_exit;
+                    /* Didn't we get contiguous memory? */
+                    if (! contiguous) {
+                        /* Recompute the size to commit */
+                        to_commit = (char *) g_last->top_allocated + allocate_size - (char *) g_last->top_committed;
+                        /* Round size to commit */
+                        commit_size = CEIL (to_commit, g_my_pagesize);
+                    }
+                }
+            } 
+            /* Assert preconditions */
+            assert ((unsigned) g_last->top_committed % g_pagesize == 0);
+            assert (0 < commit_size && commit_size % g_pagesize == 0); {
+                /* Commit this */
+                void *base_committed = VirtualAlloc (g_last->top_committed, commit_size, 
+				    			                     MEM_COMMIT, PAGE_READWRITE);
+                /* Check returned pointer for consistency */
+                if (base_committed != g_last->top_committed)
+                    goto sbrk_exit;
+                /* Assert postconditions */
+                assert ((unsigned) base_committed % g_pagesize == 0);
+#ifdef TRACE
+                printf ("Commit %p %d\n", base_committed, commit_size);
+#endif
+                /* Adjust the regions commit top */
+                g_last->top_committed = (char *) base_committed + commit_size;
+            }
+        } 
+        /* Adjust the regions allocation top */
+        g_last->top_allocated = (char *) g_last->top_allocated + allocate_size;
+        result = (char *) g_last->top_allocated - size;
+    /* Deallocation requested? */
+    } else if (size < 0) {
+        long deallocate_size = - size;
+        /* As long as we have a region to release */
+        while ((char *) g_last->top_allocated - deallocate_size < (char *) g_last->top_reserved - g_last->reserve_size) {
+            /* Get the size to release */
+            long release_size = g_last->reserve_size;
+            /* Get the base address */
+            void *base_reserved = (char *) g_last->top_reserved - release_size;
+            /* Assert preconditions */
+            assert ((unsigned) base_reserved % g_regionsize == 0); 
+            assert (0 < release_size && release_size % g_regionsize == 0); {
+                /* Release this */
+                int rc = VirtualFree (base_reserved, 0, 
+                                      MEM_RELEASE);
+                /* Check returned code for consistency */
+                if (! rc)
+                    goto sbrk_exit;
+#ifdef TRACE
+                printf ("Release %p %d\n", base_reserved, release_size);
+#endif
+            }
+            /* Adjust deallocation size */
+            deallocate_size -= (char *) g_last->top_allocated - (char *) base_reserved;
+            /* Remove the old region from the list */
+            if (! region_list_remove (&g_last))
+                goto sbrk_exit;
+        } {
+            /* Compute the size to decommit */
+            long to_decommit = (char *) g_last->top_committed - ((char *) g_last->top_allocated - deallocate_size);
+            if (to_decommit >= g_my_pagesize) {
+                /* Compute the size to decommit */
+                long decommit_size = FLOOR (to_decommit, g_my_pagesize);
+                /*  Compute the base address */
+                void *base_committed = (char *) g_last->top_committed - decommit_size;
+                /* Assert preconditions */
+                assert ((unsigned) base_committed % g_pagesize == 0);
+                assert (0 < decommit_size && decommit_size % g_pagesize == 0); {
+                    /* Decommit this */
+                    int rc = VirtualFree ((char *) base_committed, decommit_size, 
+                                          MEM_DECOMMIT);
+                    /* Check returned code for consistency */
+                    if (! rc)
+                        goto sbrk_exit;
+#ifdef TRACE
+                    printf ("Decommit %p %d\n", base_committed, decommit_size);
+#endif
+                }
+                /* Adjust deallocation size and regions commit and allocate top */
+                deallocate_size -= (char *) g_last->top_allocated - (char *) base_committed;
+                g_last->top_committed = base_committed;
+                g_last->top_allocated = base_committed;
+            }
+        }
+        /* Adjust regions allocate top */
+        g_last->top_allocated = (char *) g_last->top_allocated - deallocate_size;
+        /* Check for underflow */
+        if ((char *) g_last->top_reserved - g_last->reserve_size > (char *) g_last->top_allocated ||
+            g_last->top_allocated > g_last->top_committed) {
+            /* Adjust regions allocate top */
+            g_last->top_allocated = (char *) g_last->top_reserved - g_last->reserve_size;
+            goto sbrk_exit;
+        }
+        result = g_last->top_allocated;
+    }
+    /* Assert invariants */
+    assert (g_last);
+    assert ((char *) g_last->top_reserved - g_last->reserve_size <= (char *) g_last->top_allocated &&
+            g_last->top_allocated <= g_last->top_committed);
+    assert ((char *) g_last->top_reserved - g_last->reserve_size <= (char *) g_last->top_committed &&
+            g_last->top_committed <= g_last->top_reserved &&
+            (unsigned) g_last->top_committed % g_pagesize == 0);
+    assert ((unsigned) g_last->top_reserved % g_regionsize == 0);
+    assert ((unsigned) g_last->reserve_size % g_regionsize == 0);
+
+sbrk_exit:
+#if defined (USE_MALLOC_LOCK) && defined (NEEDED)
+    /* Release spin lock */
+    slrelease (&g_sl);
+#endif
+    return result;
+}
+
+/* mmap for windows */
+static void *mmap (void *ptr, long size, long prot, long type, long handle, long arg) {
+    static long g_pagesize;
+    static long g_regionsize;
+#ifdef TRACE
+    printf ("mmap %d\n", size);
+#endif
+#if defined (USE_MALLOC_LOCK) && defined (NEEDED)
+    /* Wait for spin lock */
+    slwait (&g_sl);
+#endif
+    /* First time initialization */
+    if (! g_pagesize) 
+        g_pagesize = getpagesize ();
+    if (! g_regionsize) 
+        g_regionsize = getregionsize ();
+    /* Assert preconditions */
+    assert ((unsigned) ptr % g_regionsize == 0);
+    assert (size % g_pagesize == 0);
+    /* Allocate this */
+    ptr = VirtualAlloc (ptr, size,
+					    MEM_RESERVE | MEM_COMMIT | MEM_TOP_DOWN, PAGE_READWRITE);
+    if (! ptr) {
+        ptr = (void *) MORECORE_FAILURE;
+        goto mmap_exit;
+    }
+    /* Assert postconditions */
+    assert ((unsigned) ptr % g_regionsize == 0);
+#ifdef TRACE
+    printf ("Commit %p %d\n", ptr, size);
+#endif
+mmap_exit:
+#if defined (USE_MALLOC_LOCK) && defined (NEEDED)
+    /* Release spin lock */
+    slrelease (&g_sl);
+#endif
+    return ptr;
+}
+
+/* munmap for windows */
+static long munmap (void *ptr, long size) {
+    static long g_pagesize;
+    static long g_regionsize;
+    int rc = MUNMAP_FAILURE;
+#ifdef TRACE
+    printf ("munmap %p %d\n", ptr, size);
+#endif
+#if defined (USE_MALLOC_LOCK) && defined (NEEDED)
+    /* Wait for spin lock */
+    slwait (&g_sl);
+#endif
+    /* First time initialization */
+    if (! g_pagesize) 
+        g_pagesize = getpagesize ();
+    if (! g_regionsize) 
+        g_regionsize = getregionsize ();
+    /* Assert preconditions */
+    assert ((unsigned) ptr % g_regionsize == 0);
+    assert (size % g_pagesize == 0);
+    /* Free this */
+    if (! VirtualFree (ptr, 0, 
+                       MEM_RELEASE))
+        goto munmap_exit;
+    rc = 0;
+#ifdef TRACE
+    printf ("Release %p %d\n", ptr, size);
+#endif
+munmap_exit:
+#if defined (USE_MALLOC_LOCK) && defined (NEEDED)
+    /* Release spin lock */
+    slrelease (&g_sl);
+#endif
+    return rc;
+}
+
+static void vminfo (CHUNK_SIZE_T  *free, CHUNK_SIZE_T  *reserved, CHUNK_SIZE_T  *committed) {
+    MEMORY_BASIC_INFORMATION memory_info;
+    memory_info.BaseAddress = 0;
+    *free = *reserved = *committed = 0;
+    while (VirtualQuery (memory_info.BaseAddress, &memory_info, sizeof (memory_info))) {
+        switch (memory_info.State) {
+        case MEM_FREE:
+            *free += memory_info.RegionSize;
+            break;
+        case MEM_RESERVE:
+            *reserved += memory_info.RegionSize;
+            break;
+        case MEM_COMMIT:
+            *committed += memory_info.RegionSize;
+            break;
+        }
+        memory_info.BaseAddress = (char *) memory_info.BaseAddress + memory_info.RegionSize;
+    }
+}
+
+static int cpuinfo (int whole, CHUNK_SIZE_T  *kernel, CHUNK_SIZE_T  *user) {
+    if (whole) {
+        __int64 creation64, exit64, kernel64, user64;
+        int rc = GetProcessTimes (GetCurrentProcess (), 
+                                  (FILETIME *) &creation64,  
+                                  (FILETIME *) &exit64, 
+                                  (FILETIME *) &kernel64, 
+                                  (FILETIME *) &user64);
+        if (! rc) {
+            *kernel = 0;
+            *user = 0;
+            return FALSE;
+        } 
+        *kernel = (CHUNK_SIZE_T) (kernel64 / 10000);
+        *user = (CHUNK_SIZE_T) (user64 / 10000);
+        return TRUE;
+    } else {
+        __int64 creation64, exit64, kernel64, user64;
+        int rc = GetThreadTimes (GetCurrentThread (), 
+                                 (FILETIME *) &creation64,  
+                                 (FILETIME *) &exit64, 
+                                 (FILETIME *) &kernel64, 
+                                 (FILETIME *) &user64);
+        if (! rc) {
+            *kernel = 0;
+            *user = 0;
+            return FALSE;
+        } 
+        *kernel = (CHUNK_SIZE_T) (kernel64 / 10000);
+        *user = (CHUNK_SIZE_T) (user64 / 10000);
+        return TRUE;
+    }
+}
+
+#endif /* WIN32 */
+
+/* ------------------------------------------------------------
+History:
+    V2.7.2 Sat Aug 17 09:07:30 2002  Doug Lea  (dl at gee)
+      * Fix malloc_state bitmap array misdeclaration
+
+    V2.7.1 Thu Jul 25 10:58:03 2002  Doug Lea  (dl at gee)
+      * Allow tuning of FIRST_SORTED_BIN_SIZE
+      * Use PTR_UINT as type for all ptr->int casts. Thanks to John Belmonte.
+      * Better detection and support for non-contiguousness of MORECORE. 
+        Thanks to Andreas Mueller, Conal Walsh, and Wolfram Gloger
+      * Bypass most of malloc if no frees. Thanks To Emery Berger.
+      * Fix freeing of old top non-contiguous chunk im sysmalloc.
+      * Raised default trim and map thresholds to 256K.
+      * Fix mmap-related #defines. Thanks to Lubos Lunak.
+      * Fix copy macros; added LACKS_FCNTL_H. Thanks to Neal Walfield.
+      * Branch-free bin calculation
+      * Default trim and mmap thresholds now 256K.
+
+    V2.7.0 Sun Mar 11 14:14:06 2001  Doug Lea  (dl at gee)
+      * Introduce independent_comalloc and independent_calloc.
+        Thanks to Michael Pachos for motivation and help.
+      * Make optional .h file available
+      * Allow > 2GB requests on 32bit systems.
+      * new WIN32 sbrk, mmap, munmap, lock code from <Walter@GeNeSys-e.de>.
+        Thanks also to Andreas Mueller <a.mueller at paradatec.de>,
+        and Anonymous.
+      * Allow override of MALLOC_ALIGNMENT (Thanks to Ruud Waij for 
+        helping test this.)
+      * memalign: check alignment arg
+      * realloc: don't try to shift chunks backwards, since this
+        leads to  more fragmentation in some programs and doesn't
+        seem to help in any others.
+      * Collect all cases in malloc requiring system memory into sYSMALLOc
+      * Use mmap as backup to sbrk
+      * Place all internal state in malloc_state
+      * Introduce fastbins (although similar to 2.5.1)
+      * Many minor tunings and cosmetic improvements
+      * Introduce USE_PUBLIC_MALLOC_WRAPPERS, USE_MALLOC_LOCK 
+      * Introduce MALLOC_FAILURE_ACTION, MORECORE_CONTIGUOUS
+        Thanks to Tony E. Bennett <tbennett@nvidia.com> and others.
+      * Include errno.h to support default failure action.
+
+    V2.6.6 Sun Dec  5 07:42:19 1999  Doug Lea  (dl at gee)
+      * return null for negative arguments
+      * Added Several WIN32 cleanups from Martin C. Fong <mcfong at yahoo.com>
+         * Add 'LACKS_SYS_PARAM_H' for those systems without 'sys/param.h'
+          (e.g. WIN32 platforms)
+         * Cleanup header file inclusion for WIN32 platforms
+         * Cleanup code to avoid Microsoft Visual C++ compiler complaints
+         * Add 'USE_DL_PREFIX' to quickly allow co-existence with existing
+           memory allocation routines
+         * Set 'malloc_getpagesize' for WIN32 platforms (needs more work)
+         * Use 'assert' rather than 'ASSERT' in WIN32 code to conform to
+           usage of 'assert' in non-WIN32 code
+         * Improve WIN32 'sbrk()' emulation's 'findRegion()' routine to
+           avoid infinite loop
+      * Always call 'fREe()' rather than 'free()'
+
+    V2.6.5 Wed Jun 17 15:57:31 1998  Doug Lea  (dl at gee)
+      * Fixed ordering problem with boundary-stamping
+
+    V2.6.3 Sun May 19 08:17:58 1996  Doug Lea  (dl at gee)
+      * Added pvalloc, as recommended by H.J. Liu
+      * Added 64bit pointer support mainly from Wolfram Gloger
+      * Added anonymously donated WIN32 sbrk emulation
+      * Malloc, calloc, getpagesize: add optimizations from Raymond Nijssen
+      * malloc_extend_top: fix mask error that caused wastage after
+        foreign sbrks
+      * Add linux mremap support code from HJ Liu
+
+    V2.6.2 Tue Dec  5 06:52:55 1995  Doug Lea  (dl at gee)
+      * Integrated most documentation with the code.
+      * Add support for mmap, with help from
+        Wolfram Gloger (Gloger@lrz.uni-muenchen.de).
+      * Use last_remainder in more cases.
+      * Pack bins using idea from  colin@nyx10.cs.du.edu
+      * Use ordered bins instead of best-fit threshhold
+      * Eliminate block-local decls to simplify tracing and debugging.
+      * Support another case of realloc via move into top
+      * Fix error occuring when initial sbrk_base not word-aligned.
+      * Rely on page size for units instead of SBRK_UNIT to
+        avoid surprises about sbrk alignment conventions.
+      * Add mallinfo, mallopt. Thanks to Raymond Nijssen
+        (raymond@es.ele.tue.nl) for the suggestion.
+      * Add `pad' argument to malloc_trim and top_pad mallopt parameter.
+      * More precautions for cases where other routines call sbrk,
+        courtesy of Wolfram Gloger (Gloger@lrz.uni-muenchen.de).
+      * Added macros etc., allowing use in linux libc from
+        H.J. Lu (hjl@gnu.ai.mit.edu)
+      * Inverted this history list
+
+    V2.6.1 Sat Dec  2 14:10:57 1995  Doug Lea  (dl at gee)
+      * Re-tuned and fixed to behave more nicely with V2.6.0 changes.
+      * Removed all preallocation code since under current scheme
+        the work required to undo bad preallocations exceeds
+        the work saved in good cases for most test programs.
+      * No longer use return list or unconsolidated bins since
+        no scheme using them consistently outperforms those that don't
+        given above changes.
+      * Use best fit for very large chunks to prevent some worst-cases.
+      * Added some support for debugging
+
+    V2.6.0 Sat Nov  4 07:05:23 1995  Doug Lea  (dl at gee)
+      * Removed footers when chunks are in use. Thanks to
+        Paul Wilson (wilson@cs.texas.edu) for the suggestion.
+
+    V2.5.4 Wed Nov  1 07:54:51 1995  Doug Lea  (dl at gee)
+      * Added malloc_trim, with help from Wolfram Gloger
+        (wmglo@Dent.MED.Uni-Muenchen.DE).
+
+    V2.5.3 Tue Apr 26 10:16:01 1994  Doug Lea  (dl at g)
+
+    V2.5.2 Tue Apr  5 16:20:40 1994  Doug Lea  (dl at g)
+      * realloc: try to expand in both directions
+      * malloc: swap order of clean-bin strategy;
+      * realloc: only conditionally expand backwards
+      * Try not to scavenge used bins
+      * Use bin counts as a guide to preallocation
+      * Occasionally bin return list chunks in first scan
+      * Add a few optimizations from colin@nyx10.cs.du.edu
+
+    V2.5.1 Sat Aug 14 15:40:43 1993  Doug Lea  (dl at g)
+      * faster bin computation & slightly different binning
+      * merged all consolidations to one part of malloc proper
+         (eliminating old malloc_find_space & malloc_clean_bin)
+      * Scan 2 returns chunks (not just 1)
+      * Propagate failure in realloc if malloc returns 0
+      * Add stuff to allow compilation on non-ANSI compilers
+          from kpv@research.att.com
+
+    V2.5 Sat Aug  7 07:41:59 1993  Doug Lea  (dl at g.oswego.edu)
+      * removed potential for odd address access in prev_chunk
+      * removed dependency on getpagesize.h
+      * misc cosmetics and a bit more internal documentation
+      * anticosmetics: mangled names in macros to evade debugger strangeness
+      * tested on sparc, hp-700, dec-mips, rs6000
+          with gcc & native cc (hp, dec only) allowing
+          Detlefs & Zorn comparison study (in SIGPLAN Notices.)
+
+    Trial version Fri Aug 28 13:14:29 1992  Doug Lea  (dl at g.oswego.edu)
+      * Based loosely on libg++-1.2X malloc. (It retains some of the overall
+         structure of old version,  but most details differ.)
+
+*/
diff --git a/mini-os/lib/math.c b/mini-os/lib/math.c
new file mode 100644
index 0000000000..29e1cc933e
--- /dev/null
+++ b/mini-os/lib/math.c
@@ -0,0 +1,385 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ *        File: math.c
+ *      Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ *     Changes: 
+ *              
+ *        Date: Aug 2003
+ * 
+ * Environment: Xen Minimal OS
+ * Description:  Library functions for 64bit arith and other
+ *               from freebsd, files in sys/libkern/ (qdivrem.c, etc)
+ *
+ ****************************************************************************
+ * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
+ ****************************************************************************
+ *-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/libkern/divdi3.c,v 1.6 1999/08/28 00:46:31 peter Exp $
+*/
+
+#include <types.h>
+
+/*
+ * Depending on the desired operation, we view a `long long' (aka quad_t) in
+ * one or more of the following formats.
+ */
+union uu {
+        s64            q;              /* as a (signed) quad */
+        s64            uq;             /* as an unsigned quad */
+        long           sl[2];          /* as two signed longs */
+        unsigned long  ul[2];          /* as two unsigned longs */
+};
+/* XXX RN: Yuck hardcoded endianess :) */
+#define _QUAD_HIGHWORD 1
+#define _QUAD_LOWWORD 0
+/*
+ * Define high and low longwords.
+ */
+#define H               _QUAD_HIGHWORD
+#define L               _QUAD_LOWWORD
+
+/*
+ * Total number of bits in a quad_t and in the pieces that make it up.
+ * These are used for shifting, and also below for halfword extraction
+ * and assembly.
+ */
+#define CHAR_BIT        8               /* number of bits in a char */
+#define QUAD_BITS       (sizeof(s64) * CHAR_BIT)
+#define LONG_BITS       (sizeof(long) * CHAR_BIT)
+#define HALF_BITS       (sizeof(long) * CHAR_BIT / 2)
+
+/*
+ * Extract high and low shortwords from longword, and move low shortword of
+ * longword to upper half of long, i.e., produce the upper longword of
+ * ((quad_t)(x) << (number_of_bits_in_long/2)).  (`x' must actually be u_long.)
+ *
+ * These are used in the multiply code, to split a longword into upper
+ * and lower halves, and to reassemble a product as a quad_t, shifted left
+ * (sizeof(long)*CHAR_BIT/2).
+ */
+#define HHALF(x)        ((x) >> HALF_BITS)
+#define LHALF(x)        ((x) & ((1 << HALF_BITS) - 1))
+#define LHUP(x)         ((x) << HALF_BITS)
+
+/*
+ * Multiprecision divide.  This algorithm is from Knuth vol. 2 (2nd ed),
+ * section 4.3.1, pp. 257--259.
+ */
+#define	B	(1 << HALF_BITS)	/* digit base */
+
+/* Combine two `digits' to make a single two-digit number. */
+#define	COMBINE(a, b) (((u_long)(a) << HALF_BITS) | (b))
+
+/* select a type for digits in base B: use unsigned short if they fit */
+#if ULONG_MAX == 0xffffffff && USHRT_MAX >= 0xffff
+typedef unsigned short digit;
+#else
+typedef u_long digit;
+#endif
+
+
+/*
+ * Shift p[0]..p[len] left `sh' bits, ignoring any bits that
+ * `fall out' the left (there never will be any such anyway).
+ * We may assume len >= 0.  NOTE THAT THIS WRITES len+1 DIGITS.
+ */
+static void
+shl(register digit *p, register int len, register int sh)
+{
+	register int i;
+
+	for (i = 0; i < len; i++)
+		p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh));
+	p[i] = LHALF(p[i] << sh);
+}
+
+/*
+ * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v.
+ *
+ * We do this in base 2-sup-HALF_BITS, so that all intermediate products
+ * fit within u_long.  As a consequence, the maximum length dividend and
+ * divisor are 4 `digits' in this base (they are shorter if they have
+ * leading zeros).
+ */
+u64
+__qdivrem(uq, vq, arq)
+	u64 uq, vq, *arq;
+{
+	union uu tmp;
+	digit *u, *v, *q;
+	register digit v1, v2;
+	u_long qhat, rhat, t;
+	int m, n, d, j, i;
+	digit uspace[5], vspace[5], qspace[5];
+
+	/*
+	 * Take care of special cases: divide by zero, and u < v.
+	 */
+	if (vq == 0) {
+		/* divide by zero. */
+		static volatile const unsigned int zero = 0;
+
+		tmp.ul[H] = tmp.ul[L] = 1 / zero;
+		if (arq)
+			*arq = uq;
+		return (tmp.q);
+	}
+	if (uq < vq) {
+		if (arq)
+			*arq = uq;
+		return (0);
+	}
+	u = &uspace[0];
+	v = &vspace[0];
+	q = &qspace[0];
+
+	/*
+	 * Break dividend and divisor into digits in base B, then
+	 * count leading zeros to determine m and n.  When done, we
+	 * will have:
+	 *	u = (u[1]u[2]...u[m+n]) sub B
+	 *	v = (v[1]v[2]...v[n]) sub B
+	 *	v[1] != 0
+	 *	1 < n <= 4 (if n = 1, we use a different division algorithm)
+	 *	m >= 0 (otherwise u < v, which we already checked)
+	 *	m + n = 4
+	 * and thus
+	 *	m = 4 - n <= 2
+	 */
+	tmp.uq = uq;
+	u[0] = 0;
+	u[1] = HHALF(tmp.ul[H]);
+	u[2] = LHALF(tmp.ul[H]);
+	u[3] = HHALF(tmp.ul[L]);
+	u[4] = LHALF(tmp.ul[L]);
+	tmp.uq = vq;
+	v[1] = HHALF(tmp.ul[H]);
+	v[2] = LHALF(tmp.ul[H]);
+	v[3] = HHALF(tmp.ul[L]);
+	v[4] = LHALF(tmp.ul[L]);
+	for (n = 4; v[1] == 0; v++) {
+		if (--n == 1) {
+			u_long rbj;	/* r*B+u[j] (not root boy jim) */
+			digit q1, q2, q3, q4;
+
+			/*
+			 * Change of plan, per exercise 16.
+			 *	r = 0;
+			 *	for j = 1..4:
+			 *		q[j] = floor((r*B + u[j]) / v),
+			 *		r = (r*B + u[j]) % v;
+			 * We unroll this completely here.
+			 */
+			t = v[2];	/* nonzero, by definition */
+			q1 = u[1] / t;
+			rbj = COMBINE(u[1] % t, u[2]);
+			q2 = rbj / t;
+			rbj = COMBINE(rbj % t, u[3]);
+			q3 = rbj / t;
+			rbj = COMBINE(rbj % t, u[4]);
+			q4 = rbj / t;
+			if (arq)
+				*arq = rbj % t;
+			tmp.ul[H] = COMBINE(q1, q2);
+			tmp.ul[L] = COMBINE(q3, q4);
+			return (tmp.q);
+		}
+	}
+
+	/*
+	 * By adjusting q once we determine m, we can guarantee that
+	 * there is a complete four-digit quotient at &qspace[1] when
+	 * we finally stop.
+	 */
+	for (m = 4 - n; u[1] == 0; u++)
+		m--;
+	for (i = 4 - m; --i >= 0;)
+		q[i] = 0;
+	q += 4 - m;
+
+	/*
+	 * Here we run Program D, translated from MIX to C and acquiring
+	 * a few minor changes.
+	 *
+	 * D1: choose multiplier 1 << d to ensure v[1] >= B/2.
+	 */
+	d = 0;
+	for (t = v[1]; t < B / 2; t <<= 1)
+		d++;
+	if (d > 0) {
+		shl(&u[0], m + n, d);		/* u <<= d */
+		shl(&v[1], n - 1, d);		/* v <<= d */
+	}
+	/*
+	 * D2: j = 0.
+	 */
+	j = 0;
+	v1 = v[1];	/* for D3 -- note that v[1..n] are constant */
+	v2 = v[2];	/* for D3 */
+	do {
+		register digit uj0, uj1, uj2;
+
+		/*
+		 * D3: Calculate qhat (\^q, in TeX notation).
+		 * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and
+		 * let rhat = (u[j]*B + u[j+1]) mod v[1].
+		 * While rhat < B and v[2]*qhat > rhat*B+u[j+2],
+		 * decrement qhat and increase rhat correspondingly.
+		 * Note that if rhat >= B, v[2]*qhat < rhat*B.
+		 */
+		uj0 = u[j + 0];	/* for D3 only -- note that u[j+...] change */
+		uj1 = u[j + 1];	/* for D3 only */
+		uj2 = u[j + 2];	/* for D3 only */
+		if (uj0 == v1) {
+			qhat = B;
+			rhat = uj1;
+			goto qhat_too_big;
+		} else {
+			u_long nn = COMBINE(uj0, uj1);
+			qhat = nn / v1;
+			rhat = nn % v1;
+		}
+		while (v2 * qhat > COMBINE(rhat, uj2)) {
+	qhat_too_big:
+			qhat--;
+			if ((rhat += v1) >= B)
+				break;
+		}
+		/*
+		 * D4: Multiply and subtract.
+		 * The variable `t' holds any borrows across the loop.
+		 * We split this up so that we do not require v[0] = 0,
+		 * and to eliminate a final special case.
+		 */
+		for (t = 0, i = n; i > 0; i--) {
+			t = u[i + j] - v[i] * qhat - t;
+			u[i + j] = LHALF(t);
+			t = (B - HHALF(t)) & (B - 1);
+		}
+		t = u[j] - t;
+		u[j] = LHALF(t);
+		/*
+		 * D5: test remainder.
+		 * There is a borrow if and only if HHALF(t) is nonzero;
+		 * in that (rare) case, qhat was too large (by exactly 1).
+		 * Fix it by adding v[1..n] to u[j..j+n].
+		 */
+		if (HHALF(t)) {
+			qhat--;
+			for (t = 0, i = n; i > 0; i--) { /* D6: add back. */
+				t += u[i + j] + v[i];
+				u[i + j] = LHALF(t);
+				t = HHALF(t);
+			}
+			u[j] = LHALF(u[j] + t);
+		}
+		q[j] = qhat;
+	} while (++j <= m);		/* D7: loop on j. */
+
+	/*
+	 * If caller wants the remainder, we have to calculate it as
+	 * u[m..m+n] >> d (this is at most n digits and thus fits in
+	 * u[m+1..m+n], but we may need more source digits).
+	 */
+	if (arq) {
+		if (d) {
+			for (i = m + n; i > m; --i)
+				u[i] = (u[i] >> d) |
+				    LHALF(u[i - 1] << (HALF_BITS - d));
+			u[i] = 0;
+		}
+		tmp.ul[H] = COMBINE(uspace[1], uspace[2]);
+		tmp.ul[L] = COMBINE(uspace[3], uspace[4]);
+		*arq = tmp.q;
+	}
+
+	tmp.ul[H] = COMBINE(qspace[1], qspace[2]);
+	tmp.ul[L] = COMBINE(qspace[3], qspace[4]);
+	return (tmp.q);
+}
+
+
+/*
+ * Divide two signed quads.
+ * ??? if -1/2 should produce -1 on this machine, this code is wrong
+ */
+s64
+__divdi3(s64 a, s64 b)
+{
+	u64 ua, ub, uq;
+	int neg;
+
+	if (a < 0)
+		ua = -(u64)a, neg = 1;
+	else
+		ua = a, neg = 0;
+	if (b < 0)
+		ub = -(u64)b, neg ^= 1;
+	else
+		ub = b;
+	uq = __qdivrem(ua, ub, (u64 *)0);
+	return (neg ? -uq : uq);
+}
+
+/*
+ * Divide two unsigned quads.
+ */
+u64
+__udivdi3(a, b)
+        u64 a, b;
+{
+        return (__qdivrem(a, b, (u64 *)0));
+}
+
+
+/*
+ * Return remainder after dividing two unsigned quads.
+ */
+u_quad_t
+__umoddi3(a, b)
+        u_quad_t a, b;
+{
+        u_quad_t r;
+
+        (void)__qdivrem(a, b, &r);
+        return (r);
+}
+
diff --git a/mini-os/lib/printf.c b/mini-os/lib/printf.c
new file mode 100644
index 0000000000..bd7beba2c5
--- /dev/null
+++ b/mini-os/lib/printf.c
@@ -0,0 +1,470 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ *        File: printf.c
+ *      Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ *     Changes: 
+ *              
+ *        Date: Aug 2003
+ * 
+ * Environment: Xen Minimal OS
+ * Description: Library functions for printing
+ *              (freebsd port, mainly sys/subr_prf.c)
+ *
+ ****************************************************************************
+ * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
+ ****************************************************************************
+ *
+ *-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/libkern/divdi3.c,v 1.6 1999/08/28 00:46:31 peter Exp $
+ */
+
+#include <os.h>
+#include <types.h>
+#include <hypervisor.h>
+#include <lib.h>
+
+/****************************************************************************
+ * RN: printf family of routines
+ * taken mainly from sys/subr_prf.c
+ ****************************************************************************/
+char const hex2ascii_data[] = "0123456789abcdefghijklmnopqrstuvwxyz";
+#define hex2ascii(hex)  (hex2ascii_data[hex])
+#define NBBY    8               /* number of bits in a byte */
+#define MAXNBUF    (sizeof(quad_t) * NBBY + 1)
+
+static int kvprintf(char const *fmt, void *arg, int radix, va_list ap);
+
+
+int
+printf(const char *fmt, ...)
+{
+	va_list ap;
+	int retval;
+    static char printk_buf[1024];
+
+	va_start(ap, fmt);
+	retval = kvprintf(fmt, printk_buf, 10, ap);
+    printk_buf[retval] = '\0';
+	va_end(ap);
+    (void)HYPERVISOR_console_write(printk_buf, strlen(printk_buf));
+	return retval;
+}
+
+int
+vprintf(const char *fmt, va_list ap)
+{
+	int retval;
+    static char printk_buf[1024];
+	retval = kvprintf(fmt, printk_buf, 10, ap);
+    printk_buf[retval] = '\0';
+    (void)HYPERVISOR_console_write(printk_buf, strlen(printk_buf));
+	return retval;
+}
+
+int
+sprintf(char *buf, const char *cfmt, ...)
+{
+	int retval;
+	va_list ap;
+
+	va_start(ap, cfmt);
+	retval = kvprintf(cfmt, (void *)buf, 10, ap);
+	buf[retval] = '\0';
+	va_end(ap);
+	return retval;
+}
+
+int
+vsprintf(char *buf, const char *cfmt, va_list ap)
+{
+	int retval;
+
+	retval = kvprintf(cfmt, (void *)buf, 10, ap);
+	buf[retval] = '\0';
+	return retval;
+}
+
+
+/*
+ * Put a NUL-terminated ASCII number (base <= 36) in a buffer in reverse
+ * order; return an optional length and a pointer to the last character
+ * written in the buffer (i.e., the first character of the string).
+ * The buffer pointed to by `nbuf' must have length >= MAXNBUF.
+ */
+static char *
+ksprintn(nbuf, ul, base, lenp)
+	char *nbuf;
+	u_long ul;
+	int base, *lenp;
+{
+	char *p;
+
+	p = nbuf;
+	*p = '\0';
+	do {
+		*++p = hex2ascii(ul % base);
+	} while (ul /= base);
+	if (lenp)
+		*lenp = p - nbuf;
+	return (p);
+}
+/* ksprintn, but for a quad_t. */
+static char *
+ksprintqn(nbuf, uq, base, lenp)
+	char *nbuf;
+	u_quad_t uq;
+	int base, *lenp;
+{
+	char *p;
+
+	p = nbuf;
+	*p = '\0';
+	do {
+		*++p = hex2ascii(uq % base);
+	} while (uq /= base);
+	if (lenp)
+		*lenp = p - nbuf;
+	return (p);
+}
+
+/*
+ * Scaled down version of printf(3).
+ *
+ * Two additional formats:
+ *
+ * The format %b is supported to decode error registers.
+ * Its usage is:
+ *
+ *	printf("reg=%b\n", regval, "<base><arg>*");
+ *
+ * where <base> is the output base expressed as a control character, e.g.
+ * \10 gives octal; \20 gives hex.  Each arg is a sequence of characters,
+ * the first of which gives the bit number to be inspected (origin 1), and
+ * the next characters (up to a control character, i.e. a character <= 32),
+ * give the name of the register.  Thus:
+ *
+ *	kvprintf("reg=%b\n", 3, "\10\2BITTWO\1BITONE\n");
+ *
+ * would produce output:
+ *
+ *	reg=3<BITTWO,BITONE>
+ *
+ * XXX:  %D  -- Hexdump, takes pointer and separator string:
+ *		("%6D", ptr, ":")   -> XX:XX:XX:XX:XX:XX
+ *		("%*D", len, ptr, " " -> XX XX XX XX ...
+ */
+
+/* RN: This normally takes a function for output. 
+ * we always print to a string and the use HYPERCALL for write to console */
+static int
+kvprintf(char const *fmt, void *arg, int radix, va_list ap)
+{
+
+#define PCHAR(c) {int cc=(c); *d++ = cc; retval++; }
+
+	char nbuf[MAXNBUF];
+	char *p, *q, *d;
+	u_char *up;
+	int ch, n;
+	u_long ul;
+	u_quad_t uq;
+	int base, lflag, qflag, tmp, width, ladjust, sharpflag, neg, sign, dot;
+	int dwidth;
+	char padc;
+	int retval = 0;
+
+	ul = 0;
+	uq = 0;
+    d = (char *) arg;
+
+	if (fmt == NULL)
+		fmt = "(fmt null)\n";
+
+	if (radix < 2 || radix > 36)
+		radix = 10;
+
+	for (;;) {
+		padc = ' ';
+		width = 0;
+		while ((ch = (u_char)*fmt++) != '%') {
+			if (ch == '\0') 
+				return retval;
+			PCHAR(ch);
+		}
+		qflag = 0; lflag = 0; ladjust = 0; sharpflag = 0; neg = 0;
+		sign = 0; dot = 0; dwidth = 0;
+reswitch:	switch (ch = (u_char)*fmt++) {
+		case '.':
+			dot = 1;
+			goto reswitch;
+		case '#':
+			sharpflag = 1;
+			goto reswitch;
+		case '+':
+			sign = 1;
+			goto reswitch;
+		case '-':
+			ladjust = 1;
+			goto reswitch;
+		case '%':
+			PCHAR(ch);
+			break;
+		case '*':
+			if (!dot) {
+				width = va_arg(ap, int);
+				if (width < 0) {
+					ladjust = !ladjust;
+					width = -width;
+				}
+			} else {
+				dwidth = va_arg(ap, int);
+			}
+			goto reswitch;
+		case '0':
+			if (!dot) {
+				padc = '0';
+				goto reswitch;
+			}
+		case '1': case '2': case '3': case '4':
+		case '5': case '6': case '7': case '8': case '9':
+				for (n = 0;; ++fmt) {
+					n = n * 10 + ch - '0';
+					ch = *fmt;
+					if (ch < '0' || ch > '9')
+						break;
+				}
+			if (dot)
+				dwidth = n;
+			else
+				width = n;
+			goto reswitch;
+		case 'b':
+			ul = va_arg(ap, int);
+			p = va_arg(ap, char *);
+			for (q = ksprintn(nbuf, ul, *p++, NULL); *q;)
+				PCHAR(*q--);
+
+			if (!ul)
+				break;
+
+			for (tmp = 0; *p;) {
+				n = *p++;
+				if (ul & (1 << (n - 1))) {
+					PCHAR(tmp ? ',' : '<');
+					for (; (n = *p) > ' '; ++p)
+						PCHAR(n);
+					tmp = 1;
+				} else
+					for (; *p > ' '; ++p)
+						continue;
+			}
+			if (tmp)
+				PCHAR('>');
+			break;
+		case 'c':
+			PCHAR(va_arg(ap, int));
+			break;
+		case 'D':
+			up = va_arg(ap, u_char *);
+			p = va_arg(ap, char *);
+			if (!width)
+				width = 16;
+			while(width--) {
+				PCHAR(hex2ascii(*up >> 4));
+				PCHAR(hex2ascii(*up & 0x0f));
+				up++;
+				if (width)
+					for (q=p;*q;q++)
+						PCHAR(*q);
+			}
+			break;
+		case 'd':
+			if (qflag)
+				uq = va_arg(ap, quad_t);
+			else if (lflag)
+				ul = va_arg(ap, long);
+			else
+				ul = va_arg(ap, int);
+			sign = 1;
+			base = 10;
+			goto number;
+		case 'l':
+			if (lflag) {
+				lflag = 0;
+				qflag = 1;
+			} else
+				lflag = 1;
+			goto reswitch;
+		case 'o':
+			if (qflag)
+				uq = va_arg(ap, u_quad_t);
+			else if (lflag)
+				ul = va_arg(ap, u_long);
+			else
+				ul = va_arg(ap, u_int);
+			base = 8;
+			goto nosign;
+		case 'p':
+			ul = (uintptr_t)va_arg(ap, void *);
+			base = 16;
+			sharpflag = (width == 0);
+			goto nosign;
+		case 'q':
+			qflag = 1;
+			goto reswitch;
+		case 'n':
+		case 'r':
+			if (qflag)
+				uq = va_arg(ap, u_quad_t);
+			else if (lflag)
+				ul = va_arg(ap, u_long);
+			else
+				ul = sign ?
+				    (u_long)va_arg(ap, int) : va_arg(ap, u_int);
+			base = radix;
+			goto number;
+		case 's':
+			p = va_arg(ap, char *);
+			if (p == NULL)
+				p = "(null)";
+			if (!dot)
+				n = strlen (p);
+			else
+				for (n = 0; n < dwidth && p[n]; n++)
+					continue;
+
+			width -= n;
+
+			if (!ladjust && width > 0)
+				while (width--)
+					PCHAR(padc);
+			while (n--)
+				PCHAR(*p++);
+			if (ladjust && width > 0)
+				while (width--)
+					PCHAR(padc);
+			break;
+		case 'u':
+			if (qflag)
+				uq = va_arg(ap, u_quad_t);
+			else if (lflag)
+				ul = va_arg(ap, u_long);
+			else
+				ul = va_arg(ap, u_int);
+			base = 10;
+			goto nosign;
+		case 'x':
+		case 'X':
+			if (qflag)
+				uq = va_arg(ap, u_quad_t);
+			else if (lflag)
+				ul = va_arg(ap, u_long);
+			else
+				ul = va_arg(ap, u_int);
+			base = 16;
+			goto nosign;
+		case 'z':
+			if (qflag)
+				uq = va_arg(ap, u_quad_t);
+			else if (lflag)
+				ul = va_arg(ap, u_long);
+			else
+				ul = sign ?
+				    (u_long)va_arg(ap, int) : va_arg(ap, u_int);
+			base = 16;
+			goto number;
+nosign:			sign = 0;
+number:			
+			if (qflag) {
+				if (sign && (quad_t)uq < 0) {
+					neg = 1;
+					uq = -(quad_t)uq;
+				}
+				p = ksprintqn(nbuf, uq, base, &tmp);
+			} else {
+				if (sign && (long)ul < 0) {
+					neg = 1;
+					ul = -(long)ul;
+				}
+				p = ksprintn(nbuf, ul, base, &tmp);
+			}
+			if (sharpflag && (qflag ? uq != 0 : ul != 0)) {
+				if (base == 8)
+					tmp++;
+				else if (base == 16)
+					tmp += 2;
+			}
+			if (neg)
+				tmp++;
+
+			if (!ladjust && width && (width -= tmp) > 0)
+				while (width--)
+					PCHAR(padc);
+			if (neg)
+				PCHAR('-');
+			if (sharpflag && (qflag ? uq != 0 : ul != 0)) {
+				if (base == 8) {
+					PCHAR('0');
+				} else if (base == 16) {
+					PCHAR('0');
+					PCHAR('x');
+				}
+			}
+
+			while (*p)
+				PCHAR(*p--);
+
+			if (ladjust && width && (width -= tmp) > 0)
+				while (width--)
+					PCHAR(padc);
+
+			break;
+		default:
+			PCHAR('%');
+			if (lflag)
+				PCHAR('l');
+			PCHAR(ch);
+			break;
+		}
+	}
+#undef PCHAR
+}
+
diff --git a/mini-os/lib/string.c b/mini-os/lib/string.c
new file mode 100644
index 0000000000..c1b9dbfc8e
--- /dev/null
+++ b/mini-os/lib/string.c
@@ -0,0 +1,142 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ *        File: string.c
+ *      Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ *     Changes: 
+ *              
+ *        Date: Aug 2003
+ * 
+ * Environment: Xen Minimal OS
+ * Description: Library function for string and memory manipulation
+ *              Origin unknown
+ *
+ ****************************************************************************
+ * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
+ ****************************************************************************
+ */
+
+#include <os.h>
+#include <types.h>
+#include <lib.h>
+
+int memcmp(const void * cs,const void * ct,size_t count)
+{
+	const unsigned char *su1, *su2;
+	signed char res = 0;
+
+	for( su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--)
+		if ((res = *su1 - *su2) != 0)
+			break;
+	return res;
+}
+
+void * memcpy(void * dest,const void *src,size_t count)
+{
+	char *tmp = (char *) dest, *s = (char *) src;
+
+	while (count--)
+		*tmp++ = *s++;
+
+	return dest;
+}
+
+int strncmp(const char * cs,const char * ct,size_t count)
+{
+	register signed char __res = 0;
+
+	while (count) {
+		if ((__res = *cs - *ct++) != 0 || !*cs++)
+			break;
+		count--;
+	}
+
+	return __res;
+}
+
+int strcmp(const char * cs,const char * ct)
+{
+        register signed char __res;
+
+        while (1) {
+                if ((__res = *cs - *ct++) != 0 || !*cs++)
+                        break;
+        }
+
+        return __res;
+}
+
+char * strcpy(char * dest,const char *src)
+{
+        char *tmp = dest;
+
+        while ((*dest++ = *src++) != '\0')
+                /* nothing */;
+        return tmp;
+}
+
+char * strncpy(char * dest,const char *src,size_t count)
+{
+        char *tmp = dest;
+
+        while (count-- && (*dest++ = *src++) != '\0')
+                /* nothing */;
+
+        return tmp;
+}
+
+void * memset(void * s,int c,size_t count)
+{
+        char *xs = (char *) s;
+
+        while (count--)
+                *xs++ = c;
+
+        return s;
+}
+
+size_t strnlen(const char * s, size_t count)
+{
+        const char *sc;
+
+        for (sc = s; count-- && *sc != '\0'; ++sc)
+                /* nothing */;
+        return sc - s;
+}
+
+size_t strlen(const char * s)
+{
+	const char *sc;
+
+	for (sc = s; *sc != '\0'; ++sc)
+		/* nothing */;
+	return sc - s;
+}
+
+char * strchr(const char * s, int c)
+{
+        for(; *s != (char) c; ++s)
+                if (*s == '\0')
+                        return NULL;
+        return (char *) s;
+}
+
+char * strstr(const char * s1,const char * s2)
+{
+        int l1, l2;
+
+        l2 = strlen(s2);
+        if (!l2)
+                return (char *) s1;
+        l1 = strlen(s1);
+        while (l1 >= l2) {
+                l1--;
+                if (!memcmp(s1,s2,l2))
+                        return (char *) s1;
+                s1++;
+        }
+        return NULL;
+}
+
diff --git a/mini-os/mm.c b/mini-os/mm.c
new file mode 100644
index 0000000000..8ded35cc66
--- /dev/null
+++ b/mini-os/mm.c
@@ -0,0 +1,375 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ *        File: mm.c
+ *      Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ *     Changes: 
+ *              
+ *        Date: Aug 2003
+ * 
+ * Environment: Xen Minimal OS
+ * Description: memory management related functions
+ *              contains buddy page allocator from Xen.
+ *
+ ****************************************************************************
+ * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
+ ****************************************************************************
+ */
+
+
+#include <os.h>
+#include <hypervisor.h>
+#include <mm.h>
+#include <types.h>
+#include <lib.h>
+
+unsigned long *phys_to_machine_mapping;
+extern char *stack;
+extern char _text, _etext, _edata, _end;
+
+static void init_page_allocator(unsigned long min, unsigned long max);
+
+void init_mm(void)
+{
+
+    unsigned long start_pfn, max_pfn, max_free_pfn;
+
+    unsigned long *pgd = (unsigned long *)start_info.pt_base;
+
+    printk("MM: Init\n");
+
+    printk("  _text:        %p\n", &_text);
+    printk("  _etext:       %p\n", &_etext);
+    printk("  _edata:       %p\n", &_edata);
+    printk("  stack start:  %p\n", &stack);
+    printk("  _end:         %p\n", &_end);
+
+    /* set up minimal memory infos */
+    start_pfn = PFN_UP(__pa(&_end));
+    max_pfn = start_info.nr_pages;
+
+    printk("  start_pfn:    %lx\n", start_pfn);
+    printk("  max_pfn:      %lx\n", max_pfn);
+
+    /*
+     * we know where free tables start (start_pfn) and how many we 
+     * have (max_pfn). 
+     * 
+     * Currently the hypervisor stores page tables it providesin the
+     * high region of the this memory range.
+     * 
+     * next we work out how far down this goes (max_free_pfn)
+     * 
+     * XXX this assumes the hypervisor provided page tables to be in
+     * the upper region of our initial memory. I don't know if this 
+     * is always true.
+     */
+
+    max_free_pfn = PFN_DOWN(__pa(pgd));
+    {
+        unsigned long *pgd = (unsigned long *)start_info.pt_base;
+        unsigned long  pte;
+        int i;
+        printk("  pgd(pa(pgd)): %lx(%lx)", (u_long)pgd, __pa(pgd));
+
+        for ( i = 0; i < (HYPERVISOR_VIRT_START>>22); i++ )
+        {
+            unsigned long pgde = *pgd++;
+            if ( !(pgde & 1) ) continue;
+            pte = machine_to_phys(pgde & PAGE_MASK);
+            printk("  PT(%x): %lx(%lx)", i, (u_long)__va(pte), pte);
+            if (PFN_DOWN(pte) <= max_free_pfn) 
+                max_free_pfn = PFN_DOWN(pte);
+        }
+    }
+    max_free_pfn--;
+    printk("  max_free_pfn: %lx\n", max_free_pfn);
+
+    /*
+     * now we can initialise the page allocator
+     */
+    printk("MM: Initialise page allocator for %lx(%lx)-%lx(%lx)\n",
+           (u_long)__va(PFN_PHYS(start_pfn)), PFN_PHYS(start_pfn), 
+           (u_long)__va(PFN_PHYS(max_free_pfn)), PFN_PHYS(max_free_pfn));
+    init_page_allocator(PFN_PHYS(start_pfn), PFN_PHYS(max_free_pfn));   
+
+
+    /* Now initialise the physical->machine mapping table. */
+
+
+    printk("MM: done\n");
+
+    
+}
+
+/*********************
+ * ALLOCATION BITMAP
+ *  One bit per page of memory. Bit set => page is allocated.
+ */
+
+static unsigned long *alloc_bitmap;
+#define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8)
+
+#define allocated_in_map(_pn) \
+(alloc_bitmap[(_pn)/PAGES_PER_MAPWORD] & (1<<((_pn)&(PAGES_PER_MAPWORD-1))))
+
+
+/*
+ * Hint regarding bitwise arithmetic in map_{alloc,free}:
+ *  -(1<<n)  sets all bits >= n. 
+ *  (1<<n)-1 sets all bits <  n.
+ * Variable names in map_{alloc,free}:
+ *  *_idx == Index into `alloc_bitmap' array.
+ *  *_off == Bit offset within an element of the `alloc_bitmap' array.
+ */
+
+static void map_alloc(unsigned long first_page, unsigned long nr_pages)
+{
+    unsigned long start_off, end_off, curr_idx, end_idx;
+
+    curr_idx  = first_page / PAGES_PER_MAPWORD;
+    start_off = first_page & (PAGES_PER_MAPWORD-1);
+    end_idx   = (first_page + nr_pages) / PAGES_PER_MAPWORD;
+    end_off   = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
+
+    if ( curr_idx == end_idx )
+    {
+        alloc_bitmap[curr_idx] |= ((1<<end_off)-1) & -(1<<start_off);
+    }
+    else 
+    {
+        alloc_bitmap[curr_idx] |= -(1<<start_off);
+        while ( ++curr_idx < end_idx ) alloc_bitmap[curr_idx] = ~0L;
+        alloc_bitmap[curr_idx] |= (1<<end_off)-1;
+    }
+}
+
+
+static void map_free(unsigned long first_page, unsigned long nr_pages)
+{
+    unsigned long start_off, end_off, curr_idx, end_idx;
+
+    curr_idx = first_page / PAGES_PER_MAPWORD;
+    start_off = first_page & (PAGES_PER_MAPWORD-1);
+    end_idx   = (first_page + nr_pages) / PAGES_PER_MAPWORD;
+    end_off   = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
+
+    if ( curr_idx == end_idx )
+    {
+        alloc_bitmap[curr_idx] &= -(1<<end_off) | ((1<<start_off)-1);
+    }
+    else 
+    {
+        alloc_bitmap[curr_idx] &= (1<<start_off)-1;
+        while ( ++curr_idx != end_idx ) alloc_bitmap[curr_idx] = 0;
+        alloc_bitmap[curr_idx] &= -(1<<end_off);
+    }
+}
+
+
+
+/*************************
+ * BINARY BUDDY ALLOCATOR
+ */
+
+typedef struct chunk_head_st chunk_head_t;
+typedef struct chunk_tail_st chunk_tail_t;
+
+struct chunk_head_st {
+    chunk_head_t  *next;
+    chunk_head_t **pprev;
+    int            level;
+};
+
+struct chunk_tail_st {
+    int level;
+};
+
+/* Linked lists of free chunks of different powers-of-two in size. */
+#define FREELIST_SIZE ((sizeof(void*)<<3)-PAGE_SHIFT)
+static chunk_head_t *free_head[FREELIST_SIZE];
+static chunk_head_t  free_tail[FREELIST_SIZE];
+#define FREELIST_EMPTY(_l) ((_l)->next == NULL)
+
+#define round_pgdown(_p)  ((_p)&PAGE_MASK)
+#define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
+
+
+/*
+ * Initialise allocator, placing addresses [@min,@max] in free pool.
+ * @min and @max are PHYSICAL addresses.
+ */
+static void init_page_allocator(unsigned long min, unsigned long max)
+{
+    int i;
+    unsigned long range, bitmap_size;
+    chunk_head_t *ch;
+    chunk_tail_t *ct;
+
+    for ( i = 0; i < FREELIST_SIZE; i++ )
+    {
+        free_head[i]       = &free_tail[i];
+        free_tail[i].pprev = &free_head[i];
+        free_tail[i].next  = NULL;
+    }
+
+    min = round_pgup  (min);
+    max = round_pgdown(max);
+
+    /* Allocate space for the allocation bitmap. */
+    bitmap_size  = (max+1) >> (PAGE_SHIFT+3);
+    bitmap_size  = round_pgup(bitmap_size);
+    alloc_bitmap = (unsigned long *)__va(min);
+    min         += bitmap_size;
+    range        = max - min;
+
+    /* All allocated by default. */
+    memset(alloc_bitmap, ~0, bitmap_size);
+    /* Free up the memory we've been given to play with. */
+    map_free(min>>PAGE_SHIFT, range>>PAGE_SHIFT);
+
+    /* The buddy lists are addressed in high memory. */
+    min += PAGE_OFFSET;
+    max += PAGE_OFFSET;
+
+    while ( range != 0 )
+    {
+        /*
+         * Next chunk is limited by alignment of min, but also
+         * must not be bigger than remaining range.
+         */
+        for ( i = PAGE_SHIFT; (1<<(i+1)) <= range; i++ )
+            if ( min & (1<<i) ) break;
+
+
+        ch = (chunk_head_t *)min;
+        min   += (1<<i);
+        range -= (1<<i);
+        ct = (chunk_tail_t *)min-1;
+        i -= PAGE_SHIFT;
+        ch->level       = i;
+        ch->next        = free_head[i];
+        ch->pprev       = &free_head[i];
+        ch->next->pprev = &ch->next;
+        free_head[i]    = ch;
+        ct->level       = i;
+    }
+}
+
+
+/* Release a PHYSICAL address range to the allocator. */
+void release_bytes_to_allocator(unsigned long min, unsigned long max)
+{
+    min = round_pgup  (min) + PAGE_OFFSET;
+    max = round_pgdown(max) + PAGE_OFFSET;
+
+    while ( min < max )
+    {
+        __free_pages(min, 0);
+        min += PAGE_SIZE;
+    }
+}
+
+
+/* Allocate 2^@order contiguous pages. Returns a VIRTUAL address. */
+unsigned long __get_free_pages(int order)
+{
+    int i;
+    chunk_head_t *alloc_ch, *spare_ch;
+    chunk_tail_t            *spare_ct;
+
+
+    /* Find smallest order which can satisfy the request. */
+    for ( i = order; i < FREELIST_SIZE; i++ ) {
+	if ( !FREELIST_EMPTY(free_head[i]) ) 
+	    break;
+    }
+
+    if ( i == FREELIST_SIZE ) goto no_memory;
+ 
+    /* Unlink a chunk. */
+    alloc_ch = free_head[i];
+    free_head[i] = alloc_ch->next;
+    alloc_ch->next->pprev = alloc_ch->pprev;
+
+    /* We may have to break the chunk a number of times. */
+    while ( i != order )
+    {
+        /* Split into two equal parts. */
+        i--;
+        spare_ch = (chunk_head_t *)((char *)alloc_ch + (1<<(i+PAGE_SHIFT)));
+        spare_ct = (chunk_tail_t *)((char *)spare_ch + (1<<(i+PAGE_SHIFT)))-1;
+
+        /* Create new header for spare chunk. */
+        spare_ch->level = i;
+        spare_ch->next  = free_head[i];
+        spare_ch->pprev = &free_head[i];
+        spare_ct->level = i;
+
+        /* Link in the spare chunk. */
+        spare_ch->next->pprev = &spare_ch->next;
+        free_head[i] = spare_ch;
+    }
+    
+    map_alloc(__pa(alloc_ch)>>PAGE_SHIFT, 1<<order);
+
+    return((unsigned long)alloc_ch);
+
+ no_memory:
+
+    printk("Cannot handle page request order %d!\n", order);
+
+    return 0;
+}
+
+
+/* Free 2^@order pages at VIRTUAL address @p. */
+void __free_pages(unsigned long p, int order)
+{
+    unsigned long size = 1 << (order + PAGE_SHIFT);
+    chunk_head_t *ch;
+    chunk_tail_t *ct;
+    unsigned long pagenr = __pa(p) >> PAGE_SHIFT;
+
+    map_free(pagenr, 1<<order);
+    
+    /* Merge chunks as far as possible. */
+    for ( ; ; )
+    {
+        if ( (p & size) )
+        {
+            /* Merge with predecessor block? */
+            if ( allocated_in_map(pagenr-1) ) break;
+            ct = (chunk_tail_t *)p - 1;
+            if ( ct->level != order ) break;
+            ch = (chunk_head_t *)(p - size);
+            p -= size;
+        }
+        else
+        {
+            /* Merge with successor block? */
+            if ( allocated_in_map(pagenr+(1<<order)) ) break;
+            ch = (chunk_head_t *)(p + size);
+            if ( ch->level != order ) break;
+        }
+        
+        /* Okay, unlink the neighbour. */
+        *ch->pprev = ch->next;
+        ch->next->pprev = ch->pprev;
+
+        order++;
+        size <<= 1;
+    }
+
+    /* Okay, add the final chunk to the appropriate free list. */
+    ch = (chunk_head_t *)p;
+    ct = (chunk_tail_t *)(p+size)-1;
+    ct->level = order;
+    ch->level = order;
+    ch->pprev = &free_head[order];
+    ch->next  = free_head[order];
+    ch->next->pprev = &ch->next;
+    free_head[order] = ch;
+}
diff --git a/mini-os/time.c b/mini-os/time.c
new file mode 100644
index 0000000000..ff23b2ee63
--- /dev/null
+++ b/mini-os/time.c
@@ -0,0 +1,149 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ *        File: time.c
+ *      Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ *     Changes: 
+ *              
+ *        Date: Jul 2003
+ * 
+ * Environment: Xen Minimal OS
+ * Description: Simple time and timer functions
+ *
+ ****************************************************************************
+ * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
+ ****************************************************************************
+ */
+
+
+#include <os.h>
+#include <types.h>
+#include <hypervisor.h>
+#include <events.h>
+#include <time.h>
+#include <lib.h>
+
+/************************************************************************
+ * Time functions
+ *************************************************************************/
+
+static unsigned int rdtsc_bitshift;
+static u32      st_scale_f;
+static u32      st_scale_i;
+static u32      shadow_st_pcc;
+static s_time_t shadow_st;
+static u32      shadow_wc_version=0;
+static long     shadow_tv_sec;
+static long     shadow_tv_usec;
+static s_time_t shadow_wc_timestamp;
+
+/*
+ * System time.
+ * We need to read the values from the shared info page "atomically" 
+ * and use the cycle counter value as the "version" number. Clashes
+ * should be very rare.
+ */
+inline s_time_t get_s_time(void)
+{
+    s32 delta_tsc;
+    u32 low;
+    u64 delta, tsc;
+    u32	version;
+    u64 cpu_freq, scale;
+
+    /* check if our values are still up-to-date */
+    while ( (version = HYPERVISOR_shared_info->wc_version) != 
+            shadow_wc_version )
+    {
+        barrier();
+
+        shadow_wc_version   = version;
+        shadow_tv_sec       = HYPERVISOR_shared_info->tv_sec;
+        shadow_tv_usec      = HYPERVISOR_shared_info->tv_usec;
+        shadow_wc_timestamp = HYPERVISOR_shared_info->wc_timestamp;
+        shadow_st_pcc       = HYPERVISOR_shared_info->st_timestamp;
+        shadow_st           = HYPERVISOR_shared_info->system_time;
+
+        rdtsc_bitshift      = HYPERVISOR_shared_info->rdtsc_bitshift;
+        cpu_freq            = HYPERVISOR_shared_info->cpu_freq;
+
+        /* XXX cpu_freq as u32 limits it to 4.29 GHz. Get a better do_div! */
+        scale = 1000000000LL << (32 + rdtsc_bitshift);
+        scale /= cpu_freq;
+        st_scale_f = scale & 0xffffffff;
+        st_scale_i = scale >> 32;
+
+        barrier();
+	}
+
+    rdtscll(tsc);
+    low = (u32)(tsc >> rdtsc_bitshift);
+    delta_tsc = (s32)(low - shadow_st_pcc);
+    if ( unlikely(delta_tsc < 0) ) delta_tsc = 0;
+    delta = ((u64)delta_tsc * st_scale_f);
+    delta >>= 32;
+    delta += ((u64)delta_tsc * st_scale_i);
+
+    return shadow_st + delta;
+}
+
+
+/*
+ * Wallclock time.
+ * Based on what the hypervisor tells us, extrapolated using system time.
+ * Again need to read a number of values from the shared page "atomically".
+ * this time using a version number.
+ */
+void gettimeofday(struct timeval *tv)
+{
+    long          usec, sec;
+    u64           now;
+
+    now   = get_s_time();
+    usec  = ((unsigned long)(now-shadow_wc_timestamp))/1000;
+    sec   = shadow_tv_sec;
+    usec += shadow_tv_usec;
+
+    while ( usec >= 1000000 ) 
+    {
+        usec -= 1000000;
+        sec++;
+    }
+
+    tv->tv_sec = sec;
+    tv->tv_usec = usec;
+}
+
+
+static void timer_handler(int ev, struct pt_regs *regs)
+{
+    static int i;
+    s_time_t now;
+
+    i++;
+    if (i >= 1000) {
+        now = get_s_time();
+        printf("T(%lld)\n", now);
+        i = 0;
+    }
+}
+
+
+void init_time(void)
+{
+    u64         __cpu_khz;
+    unsigned long cpu_khz;
+
+    __cpu_khz = HYPERVISOR_shared_info->cpu_freq;
+    cpu_khz = (u32) (__cpu_khz/1000);
+
+    printk("Xen reported: %lu.%03lu MHz processor.\n", 
+           cpu_khz / 1000, cpu_khz % 1000);
+
+    add_ev_action(EV_TIMER, &timer_handler);
+    enable_ev_action(EV_TIMER);
+    enable_hypervisor_event(EV_TIMER);
+
+}
diff --git a/mini-os/traps.c b/mini-os/traps.c
new file mode 100644
index 0000000000..970efdd3a6
--- /dev/null
+++ b/mini-os/traps.c
@@ -0,0 +1,150 @@
+
+#include <os.h>
+#include <hypervisor.h>
+#include <lib.h>
+
+/*
+ * These are assembler stubs in entry.S.
+ * They are the actual entry points for virtual exceptions.
+ */
+void divide_error(void);
+void debug(void);
+void int3(void);
+void overflow(void);
+void bounds(void);
+void invalid_op(void);
+void device_not_available(void);
+void double_fault(void);
+void coprocessor_segment_overrun(void);
+void invalid_TSS(void);
+void segment_not_present(void);
+void stack_segment(void);
+void general_protection(void);
+void page_fault(void);
+void coprocessor_error(void);
+void simd_coprocessor_error(void);
+void alignment_check(void);
+void spurious_interrupt_bug(void);
+void machine_check(void);
+
+/*
+ * C handlers here have their parameter-list constructed by the
+ * assembler stubs above. Each one gets a pointer to a list
+ * of register values (to be restored at end of exception).
+ * Some will also receive an error code -- this is the code that
+ * was generated by the processor for the underlying real exception. 
+ * 
+ * Note that the page-fault exception is special. It also receives
+ * the faulting linear address. Normally this would be found in
+ * register CR2, but that is not accessible in a virtualised OS.
+ */
+
+static void inline do_trap(int trapnr, char *str,
+			   struct pt_regs * regs, long error_code)
+{
+    printk("Trap\n");
+}
+
+#define DO_ERROR(trapnr, str, name) \
+void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+	do_trap(trapnr, str, regs, error_code); \
+}
+
+#define DO_ERROR_INFO(trapnr, str, name, sicode, siaddr) \
+void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+	do_trap(trapnr, str, regs, error_code); \
+}
+
+DO_ERROR_INFO( 0, "divide error", divide_error, FPE_INTDIV, regs->eip)
+DO_ERROR( 3, "int3", int3)
+DO_ERROR( 4, "overflow", overflow)
+DO_ERROR( 5, "bounds", bounds)
+DO_ERROR_INFO( 6, "invalid operand", invalid_op, ILL_ILLOPN, regs->eip)
+DO_ERROR( 7, "device not available", device_not_available)
+DO_ERROR( 8, "double fault", double_fault)
+DO_ERROR( 9, "coprocessor segment overrun", coprocessor_segment_overrun)
+DO_ERROR(10, "invalid TSS", invalid_TSS)
+DO_ERROR(11, "segment not present", segment_not_present)
+DO_ERROR(12, "stack segment", stack_segment)
+DO_ERROR_INFO(17, "alignment check", alignment_check, BUS_ADRALN, 0)
+DO_ERROR(18, "machine check", machine_check)
+
+void do_page_fault(struct pt_regs * regs, long error_code,
+                   unsigned long address)
+{
+    printk("Page fault\n");
+}
+
+void do_general_protection(struct pt_regs * regs, long error_code)
+{
+    printk("GPF\n");
+}
+
+
+void do_debug(struct pt_regs * regs, long error_code)
+{
+    printk("Debug exception\n");
+#define TF_MASK 0x100
+    regs->eflags &= ~TF_MASK;
+}
+
+
+
+void do_coprocessor_error(struct pt_regs * regs, long error_code)
+{
+    printk("Copro error\n");
+}
+
+void simd_math_error(void *eip)
+{
+    printk("SIMD error\n");
+}
+
+void do_simd_coprocessor_error(struct pt_regs * regs,
+					  long error_code)
+{
+    printk("SIMD copro error\n");
+}
+
+void do_spurious_interrupt_bug(struct pt_regs * regs,
+					  long error_code)
+{
+}
+
+/*
+ * Submit a virtual IDT to teh hypervisor. This consists of tuples
+ * (interrupt vector, privilege ring, CS:EIP of handler).
+ * The 'privilege ring' field specifies the least-privileged ring that
+ * can trap to that vector using a software-interrupt instruction (INT).
+ */
+static trap_info_t trap_table[] = {
+    {  0, 0, __KERNEL_CS, (unsigned long)divide_error                },
+    {  1, 0, __KERNEL_CS, (unsigned long)debug                       },
+    {  3, 3, __KERNEL_CS, (unsigned long)int3                        },
+    {  4, 3, __KERNEL_CS, (unsigned long)overflow                    },
+    {  5, 3, __KERNEL_CS, (unsigned long)bounds                      },
+    {  6, 0, __KERNEL_CS, (unsigned long)invalid_op                  },
+    {  7, 0, __KERNEL_CS, (unsigned long)device_not_available        },
+    {  8, 0, __KERNEL_CS, (unsigned long)double_fault                },
+    {  9, 0, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun },
+    { 10, 0, __KERNEL_CS, (unsigned long)invalid_TSS                 },
+    { 11, 0, __KERNEL_CS, (unsigned long)segment_not_present         },
+    { 12, 0, __KERNEL_CS, (unsigned long)stack_segment               },
+    { 13, 0, __KERNEL_CS, (unsigned long)general_protection          },
+    { 14, 0, __KERNEL_CS, (unsigned long)page_fault                  },
+    { 15, 0, __KERNEL_CS, (unsigned long)spurious_interrupt_bug      },
+    { 16, 0, __KERNEL_CS, (unsigned long)coprocessor_error           },
+    { 17, 0, __KERNEL_CS, (unsigned long)alignment_check             },
+    { 18, 0, __KERNEL_CS, (unsigned long)machine_check               },
+    { 19, 0, __KERNEL_CS, (unsigned long)simd_coprocessor_error      },
+    {  0, 0,           0, 0                           }
+};
+    
+
+
+void trap_init(void)
+{
+    HYPERVISOR_set_trap_table(trap_table);    
+}
diff --git a/mini-os/vmlinux.lds b/mini-os/vmlinux.lds
new file mode 100644
index 0000000000..7c4c4f8e9c
--- /dev/null
+++ b/mini-os/vmlinux.lds
@@ -0,0 +1,82 @@
+/* ld script to make i386 Linux kernel
+ * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
+ */
+OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(_start)
+SECTIONS
+{
+  . = 0xC0000000 + 0x000000;
+  _text = .;			/* Text and read-only data */
+  .text : {
+	*(.text)
+	*(.fixup)
+	*(.gnu.warning)
+	} = 0x9090
+
+  _etext = .;			/* End of text section */
+
+  .rodata : { *(.rodata) *(.rodata.*) }
+  .kstrtab : { *(.kstrtab) }
+
+  . = ALIGN(16);		/* Exception table */
+  __start___ex_table = .;
+  __ex_table : { *(__ex_table) }
+  __stop___ex_table = .;
+
+  __start___ksymtab = .;	/* Kernel symbol table */
+  __ksymtab : { *(__ksymtab) }
+  __stop___ksymtab = .;
+
+  .data : {			/* Data */
+	*(.data)
+	CONSTRUCTORS
+	}
+
+  _edata = .;			/* End of data section */
+
+  . = ALIGN(8192);		/* init_task */
+  .data.init_task : { *(.data.init_task) }
+
+  . = ALIGN(4096);		/* Init code and data */
+  __init_begin = .;
+  .text.init : { *(.text.init) }
+  .data.init : { *(.data.init) }
+  . = ALIGN(16);
+  __setup_start = .;
+  .setup.init : { *(.setup.init) }
+  __setup_end = .;
+  __initcall_start = .;
+  .initcall.init : { *(.initcall.init) }
+  __initcall_end = .;
+  . = ALIGN(4096);
+  __init_end = .;
+
+  . = ALIGN(4096);
+  .data.page_aligned : { *(.data.idt) }
+
+  . = ALIGN(32);
+  .data.cacheline_aligned : { *(.data.cacheline_aligned) }
+
+  __bss_start = .;		/* BSS */
+  .bss : {
+	*(.bss)
+	}
+  _end = . ;
+
+  /* Sections to be discarded */
+  /DISCARD/ : {
+	*(.text.exit)
+	*(.data.exit)
+	*(.exitcall.exit)
+	}
+
+  /* Stabs debugging sections.  */
+  .stab 0 : { *(.stab) }
+  .stabstr 0 : { *(.stabstr) }
+  .stab.excl 0 : { *(.stab.excl) }
+  .stab.exclstr 0 : { *(.stab.exclstr) }
+  .stab.index 0 : { *(.stab.index) }
+  .stab.indexstr 0 : { *(.stab.indexstr) }
+  .comment 0 : { *(.comment) }
+}
-- 
cgit v1.2.3