From f2c4c1d4e811964ff9fc87101f0804cf10f9789e Mon Sep 17 00:00:00 2001 From: "rneugeba@wyvis.research" Date: Mon, 6 Oct 2003 11:26:01 +0000 Subject: bitkeeper revision 1.478.1.1 (3f815149k7sE-z_IK6MG5eHi34m-Qg) Minimal guest OS (based on some old code from Keir) --- .bk-to-hg | 8 + .hg-to-bk | 7 + .rootkeys | 27 + mini-os/Makefile | 40 + mini-os/README | 35 + mini-os/entry.S | 348 +++ mini-os/events.c | 106 + mini-os/h/events.h | 53 + mini-os/h/hypervisor.h | 266 +++ mini-os/h/lib.h | 129 ++ mini-os/h/list.h | 164 ++ mini-os/h/mm.h | 107 + mini-os/h/os.h | 270 +++ mini-os/h/time.h | 59 + mini-os/h/types.h | 41 + mini-os/head.S | 46 + mini-os/hypervisor.c | 83 + mini-os/kernel.c | 115 + mini-os/lib/malloc.c | 5700 ++++++++++++++++++++++++++++++++++++++++++++++++ mini-os/lib/math.c | 385 ++++ mini-os/lib/printf.c | 470 ++++ mini-os/lib/string.c | 142 ++ mini-os/mm.c | 375 ++++ mini-os/time.c | 149 ++ mini-os/traps.c | 150 ++ mini-os/vmlinux.lds | 82 + 26 files changed, 9357 insertions(+) create mode 100644 mini-os/Makefile create mode 100644 mini-os/README create mode 100644 mini-os/entry.S create mode 100644 mini-os/events.c create mode 100644 mini-os/h/events.h create mode 100644 mini-os/h/hypervisor.h create mode 100644 mini-os/h/lib.h create mode 100644 mini-os/h/list.h create mode 100644 mini-os/h/mm.h create mode 100644 mini-os/h/os.h create mode 100644 mini-os/h/time.h create mode 100644 mini-os/h/types.h create mode 100644 mini-os/head.S create mode 100644 mini-os/hypervisor.c create mode 100644 mini-os/kernel.c create mode 100644 mini-os/lib/malloc.c create mode 100644 mini-os/lib/math.c create mode 100644 mini-os/lib/printf.c create mode 100644 mini-os/lib/string.c create mode 100644 mini-os/mm.c create mode 100644 mini-os/time.c create mode 100644 mini-os/traps.c create mode 100644 mini-os/vmlinux.lds diff --git a/.bk-to-hg b/.bk-to-hg index a41913bd35..f76cff0b51 100755 --- a/.bk-to-hg +++ b/.bk-to-hg @@ -1,5 +1,13 @@ #!/bin/sh -x set -e +test -L mini-os/h/hypervisor-ifs/block.h +rm mini-os/h/hypervisor-ifs/block.h +test -L mini-os/h/hypervisor-ifs/hypervisor-if.h +rm mini-os/h/hypervisor-ifs/hypervisor-if.h +test -L mini-os/h/hypervisor-ifs/kbd.h +rm mini-os/h/hypervisor-ifs/kbd.h +test -L mini-os/h/hypervisor-ifs/network.h +rm mini-os/h/hypervisor-ifs/network.h test -L xenolinux-sparse rm xenolinux-sparse (find -depth -type d -print | xargs -r rmdir 2>/dev/null) || true diff --git a/.hg-to-bk b/.hg-to-bk index f74c6a785b..714544304a 100755 --- a/.hg-to-bk +++ b/.hg-to-bk @@ -1,5 +1,12 @@ #!/bin/sh -x set -e +mkdir -p mini-os +mkdir -p mini-os/h +mkdir -p mini-os/h/hypervisor-ifs +ln -s ../../../xen/include/hypervisor-ifs/block.h mini-os/h/hypervisor-ifs/block.h +ln -s ../../../xen/include/hypervisor-ifs/hypervisor-if.h mini-os/h/hypervisor-ifs/hypervisor-if.h +ln -s ../../../xen/include/hypervisor-ifs/kbd.h mini-os/h/hypervisor-ifs/kbd.h +ln -s ../../../xen/include/hypervisor-ifs/network.h mini-os/h/hypervisor-ifs/network.h ln -s xenolinux-2.4.22-sparse xenolinux-sparse (find -depth -type d -print | xargs -r rmdir 2>/dev/null) || true exit 0 diff --git a/.rootkeys b/.rootkeys index 7f3ce574b9..1e7a841a52 100644 --- a/.rootkeys +++ b/.rootkeys @@ -6,6 +6,33 @@ 3f5ef5a24IaQasQE2tyMxrfxskMmvw README 3f5ef5a2l4kfBYSQTUaOyyD76WROZQ README.CD 3f69d8abYB1vMyD_QVDvzxy5Zscf1A TODO +3f815144d1vI2777JI-dO4wk49Iw7g mini-os/Makefile +3f815144zTnCV5591ulIJQrpe5b-5Q mini-os/README +3f815144wiiDekmfMl9LIPIvhR83Uw mini-os/entry.S +3f815144r7AHj8GPvc3Nl1L9OSsWIg mini-os/events.c +3f815144h-Chna6E38yo40jqU95G1Q mini-os/h/events.h +3f815144oqr2OlUDzE2GfkKX5Hcxqg mini-os/h/hypervisor-ifs/block.h +3f8151443nGXvfUTFG67VXOIH8P4lg mini-os/h/hypervisor-ifs/hypervisor-if.h +3f81514417ZlYqiRdM_AHPy7G11htA mini-os/h/hypervisor-ifs/kbd.h +3f815144J3ZfU5am03Td7Wjfrz30qQ mini-os/h/hypervisor-ifs/network.h +3f8151445bYdgThGHQPeOW49PsrJ_A mini-os/h/hypervisor.h +3f815144f2Vg3qb6tiwt2VZad-DWsg mini-os/h/lib.h +3f815144iqXtdYup_pyfPSmDZuvZcg mini-os/h/list.h +3f81514437EzzRWAnZl4_Ej1oznMjg mini-os/h/mm.h +3f815144nbSjjT1h4m99-QPbeSWY0Q mini-os/h/os.h +3f815144L1t0AevJt2JDXPegv6JTrw mini-os/h/time.h +3f815144UxddtL0ICCKisN-NDHNFaA mini-os/h/types.h +3f815145W2mamPMclRLOzm5B38vWUQ mini-os/head.S +3f815145LqcH11TCEZbAvcjarckkJw mini-os/hypervisor.c +3f815145vwnmxhCwN7dMRWv_XFtXbg mini-os/kernel.c +3f8151451k5emQAlRe80JdIvfSN4VA mini-os/lib/malloc.c +3f815145Mb9WSKjOPsYTLsPIvPyy4Q mini-os/lib/math.c +3f8151454rEuPjN74V2Bcu65RLnM-Q mini-os/lib/printf.c +3f815145MQZrUJV0iRmTK2KIhwB2wg mini-os/lib/string.c +3f815145CB8XdPUqsmhAjSDFuwOoqA mini-os/mm.c +3f815145vGYx1WY79voKkZB9yKwJKQ mini-os/time.c +3f815145xlKBAQmal9oces3G_Mvxqw mini-os/traps.c +3f815145AYE58Kpmsj5U7oHDpVDZJA mini-os/vmlinux.lds 3f776bd1Hy9rn69ntXBhPReUFw9IEA tools/Makefile 3e6377b24eQqYMsDi9XrFkIgTzZ47A tools/balloon/Makefile 3e6377d6eiFjF1hHIS6JEIOFk62xSA tools/balloon/README diff --git a/mini-os/Makefile b/mini-os/Makefile new file mode 100644 index 0000000000..251ee273ba --- /dev/null +++ b/mini-os/Makefile @@ -0,0 +1,40 @@ + +CC := gcc +LD := ld +# Linker should relocate monitor to this address +MONITOR_BASE := 0xE0100000 +CFLAGS := -fno-builtin -O3 -Wall -Ih/ + +TARGET := image.final + +LOBJS:= lib/malloc.o lib/math.o lib/printf.o lib/string.o +OBJS := entry.o kernel.o traps.o hypervisor.o mm.o events.o time.o ${LOBJS} + +HINTF := h/hypervisor-ifs/hypervisor-if.h +HDRS := h/os.h h/types.h h/hypervisor.h h/mm.h h/events.h h/time.h h/lib.h $(HINTF) + +default: $(TARGET) + +$(TARGET): head.o $(OBJS) + # Image will load at 0xC0000000. First bytes from head.o + #$(LD) -N -Ttext 0xC0000000 head.o $(OBJS) -o image.elf + $(LD) -N -T vmlinux.lds head.o $(OBJS) -o image.elf + # Guest OS header -- first 8 bytes are identifier 'XenoGues'. + echo -e -n 'XenoGues' >$@ + # Guest OS header -- next 4 bytes are load address (0xC0000000). + echo -e -n '\000\000\000\300' >>$@ + # Create a raw bag of bytes from the ELF image. + objcopy -O binary -R .note -R .comment image.elf image.raw + # Guest OS header is immediately followed by raw OS image. + cat image.raw >>$@ + #gzip -f -9 $@ + +clean: + rm -f *.o *~ core image.elf image.raw image.final image.final.gz + +%.o: %.c $(HDRS) Makefile + $(CC) $(CFLAGS) -c $< -o $@ + +%.o: %.S $(HDRS) Makefile + $(CC) $(CFLAGS) -D__ASSEMBLY__ -c $< -o $@ + diff --git a/mini-os/README b/mini-os/README new file mode 100644 index 0000000000..98661a9274 --- /dev/null +++ b/mini-os/README @@ -0,0 +1,35 @@ + Minimal OS + ---------- + +This shows some of the stuff that any guest OS will have to set up. + +This includes: + + * installing a virtual exception table + * handling virtual exceptions + * handling asynchronous events + * enabling/disabling async events + * parsing start_info struct at start-of-day + * registering virtual interrupt handlers (for timer interrupts) + * a simple page and memory allocator + * minimal libc support + +Stuff it doesn't show: + + * modifying page tables + * network code + * block-device code + + +- to build it just type make. + +- copy image.final somewhere where dom0 can access it + +- in dom0 + # xi_create 16000 test + + # xi_build image.final 0 + # xi_start + +this prints out a bunch of stuff and then every 1000 timer interrupts the +system time. diff --git a/mini-os/entry.S b/mini-os/entry.S new file mode 100644 index 0000000000..a5a86543b0 --- /dev/null +++ b/mini-os/entry.S @@ -0,0 +1,348 @@ +/* + * linux/arch/i386/entry.S + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * Adjusted for XenoLinux use by K A Frasier + * Adjusted for Xen minimal os by R Neugebauer + */ + + +#include + +EBX = 0x00 +ECX = 0x04 +EDX = 0x08 +ESI = 0x0C +EDI = 0x10 +EBP = 0x14 +EAX = 0x18 +DS = 0x1C +ES = 0x20 +ORIG_EAX = 0x24 +EIP = 0x28 +CS = 0x2C +EFLAGS = 0x30 +OLDESP = 0x34 +OLDSS = 0x38 + +CF_MASK = 0x00000001 +IF_MASK = 0x00000200 +NT_MASK = 0x00004000 + +/* Declare a globally-visible label */ +#define ENTRY(X) .globl X ; X : + +/* A Linux hangover. Just ignore it. */ +#define SYMBOL_NAME(X) X + +#define SAVE_ALL \ + cld; \ + pushl %es; \ + pushl %ds; \ + pushl %eax; \ + pushl %ebp; \ + pushl %edi; \ + pushl %esi; \ + pushl %edx; \ + pushl %ecx; \ + pushl %ebx; \ + movl $(__KERNEL_DS),%edx; \ + movl %edx,%ds; \ + movl %edx,%es; + +#define RESTORE_ALL \ + popl %ebx; \ + popl %ecx; \ + popl %edx; \ + popl %esi; \ + popl %edi; \ + popl %ebp; \ + popl %eax; \ +1: popl %ds; \ +2: popl %es; \ + addl $4,%esp; \ +3: iret; \ +.section .fixup,"ax"; \ +4: movl $0,(%esp); \ + jmp 1b; \ +5: movl $0,(%esp); \ + jmp 2b; \ +6: pushl %ss; \ + popl %ds; \ + pushl %ss; \ + popl %es; \ + pushl $11; \ + call do_exit; \ +.previous; \ +.section __ex_table,"a";\ + .align 4; \ + .long 1b,4b; \ + .long 2b,5b; \ + .long 3b,6b; \ +.previous + +ENTRY(divide_error) + pushl $0 # no error code + pushl $ SYMBOL_NAME(do_divide_error) + .align 4 +error_code: + pushl %ds + pushl %eax + xorl %eax,%eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + decl %eax # eax = -1 + pushl %ecx + pushl %ebx + cld + movl %es,%ecx + movl ORIG_EAX(%esp), %esi # get the error code + movl ES(%esp), %edi # get the function address + movl %eax, ORIG_EAX(%esp) + movl %ecx, ES(%esp) + movl %esp,%edx + pushl %esi # push the error code + pushl %edx # push the pt_regs pointer + movl $(__KERNEL_DS),%edx + movl %edx,%ds + movl %edx,%es + call *%edi + addl $8,%esp + +# These are the tests Linux makes before exiting the OS back to userland. +# At these point preeemption may occur, or signals may get delivered. +ret_to_user_tests: +# cmpl $0,need_resched(%ebx) +# jne reschedule +# cmpl $0,sigpending(%ebx) +# je safesti + jmp safesti + + +ret_from_exception: + movb CS(%esp),%cl + test $2,%cl # slow return to ring 2 or 3 + jne ret_to_user_tests + RESTORE_ALL + +# A note on the "critical region" in our callback handler. +# We want to avoid stacking callback handlers due to events occurring +# during handling of the last event. To do this, we keep events disabled +# until weve done all processing. HOWEVER, we must enable events before +# popping the stack frame (cant be done atomically) and so it would still +# be possible to get enough handler activations to overflow the stack. +# Although unlikely, bugs of that kind are hard to track down, so wed +# like to avoid the possibility. +# So, on entry to the handler we detect whether we interrupted an +# existing activation in its critical region -- if so, we pop the current +# activation and restart the handler using the previous one. +ENTRY(hypervisor_callback) + pushl %eax + SAVE_ALL + movl EIP(%esp),%eax + cmpl $scrit,%eax + jb 11f + cmpl $ecrit,%eax + jb critical_region_fixup +11: push %esp + call do_hypervisor_callback + add $4,%esp + movl SYMBOL_NAME(HYPERVISOR_shared_info),%esi + xorl %eax,%eax + movb CS(%esp),%cl + test $2,%cl # slow return to ring 2 or 3 + jne ret_to_user_tests +safesti:btsl $31,4(%esi) # reenable event callbacks +scrit: /**** START OF CRITICAL REGION ****/ + cmpl %eax,(%esi) + jne 14f # process more events if necessary... + RESTORE_ALL +14: btrl %eax,4(%esi) + jmp 11b +ecrit: /**** END OF CRITICAL REGION ****/ +# [How we do the fixup]. We want to merge the current stack frame with the +# just-interrupted frame. How we do this depends on where in the critical +# region the interrupted handler was executing, and so how many saved +# registers are in each frame. We do this quickly using the lookup table +# 'critical_fixup_table'. For each byte offset in the critical region, it +# provides the number of bytes which have already been popped from the +# interrupted stack frame. +critical_region_fixup: + addl $critical_fixup_table-scrit,%eax + movzbl (%eax),%eax # %eax contains num bytes popped + mov %esp,%esi + add %eax,%esi # %esi points at end of src region + mov %esp,%edi + add $0x34,%edi # %edi points at end of dst region + mov %eax,%ecx + shr $2,%ecx # convert words to bytes + je 16f # skip loop if nothing to copy +15: subl $4,%esi # pre-decrementing copy loop + subl $4,%edi + movl (%esi),%eax + movl %eax,(%edi) + loop 15b +16: movl %edi,%esp # final %edi is top of merged stack + jmp 11b + +critical_fixup_table: + .byte 0x00,0x00 # cmpl %eax,(%esi) + .byte 0x00,0x00 # jne 14f + .byte 0x00 # pop %ebx + .byte 0x04 # pop %ecx + .byte 0x08 # pop %edx + .byte 0x0c # pop %esi + .byte 0x10 # pop %edi + .byte 0x14 # pop %ebp + .byte 0x18 # pop %eax + .byte 0x1c # pop %ds + .byte 0x20 # pop %es + .byte 0x24,0x24,0x24 # add $4,%esp + .byte 0x28 # iret + .byte 0x00,0x00,0x00,0x00,0x00 # btrl $31,4(%esi) + .byte 0x00,0x00 # jmp 11b + +# Hypervisor uses this for application faults while it executes. +ENTRY(failsafe_callback) +1: pop %ds +2: pop %es +3: pop %fs +4: pop %gs +5: iret +.section .fixup,"ax"; \ +6: movl $0,(%esp); \ + jmp 1b; \ +7: movl $0,(%esp); \ + jmp 2b; \ +8: movl $0,(%esp); \ + jmp 3b; \ +9: movl $0,(%esp); \ + jmp 4b; \ +10: pushl %ss; \ + popl %ds; \ + pushl %ss; \ + popl %es; \ + pushl $11; \ + call do_exit; \ +.previous; \ +.section __ex_table,"a";\ + .align 4; \ + .long 1b,6b; \ + .long 2b,7b; \ + .long 3b,8b; \ + .long 4b,9b; \ + .long 5b,10b; \ +.previous + +ENTRY(coprocessor_error) + pushl $0 + pushl $ SYMBOL_NAME(do_coprocessor_error) + jmp error_code + +ENTRY(simd_coprocessor_error) + pushl $0 + pushl $ SYMBOL_NAME(do_simd_coprocessor_error) + jmp error_code + +ENTRY(device_not_available) + pushl $-1 # mark this as an int + SAVE_ALL + #call SYMBOL_NAME(math_state_restore) + jmp ret_from_exception + +ENTRY(debug) + pushl $0 + pushl $ SYMBOL_NAME(do_debug) + jmp error_code + +ENTRY(int3) + pushl $0 + pushl $ SYMBOL_NAME(do_int3) + jmp error_code + +ENTRY(overflow) + pushl $0 + pushl $ SYMBOL_NAME(do_overflow) + jmp error_code + +ENTRY(bounds) + pushl $0 + pushl $ SYMBOL_NAME(do_bounds) + jmp error_code + +ENTRY(invalid_op) + pushl $0 + pushl $ SYMBOL_NAME(do_invalid_op) + jmp error_code + +ENTRY(coprocessor_segment_overrun) + pushl $0 + pushl $ SYMBOL_NAME(do_coprocessor_segment_overrun) + jmp error_code + +ENTRY(double_fault) + pushl $ SYMBOL_NAME(do_double_fault) + jmp error_code + +ENTRY(invalid_TSS) + pushl $ SYMBOL_NAME(do_invalid_TSS) + jmp error_code + +ENTRY(segment_not_present) + pushl $ SYMBOL_NAME(do_segment_not_present) + jmp error_code + +ENTRY(stack_segment) + pushl $ SYMBOL_NAME(do_stack_segment) + jmp error_code + +ENTRY(general_protection) + pushl $ SYMBOL_NAME(do_general_protection) + jmp error_code + +ENTRY(alignment_check) + pushl $ SYMBOL_NAME(do_alignment_check) + jmp error_code + +# This handler is special, because it gets an extra value on its stack, +# which is the linear faulting address. +ENTRY(page_fault) + pushl %ds + pushl %eax + xorl %eax,%eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + decl %eax # eax = -1 + pushl %ecx + pushl %ebx + cld + movl %es,%ecx + movl ORIG_EAX(%esp), %esi # get the error code + movl ES(%esp), %edi # get the faulting address + movl %eax, ORIG_EAX(%esp) + movl %ecx, ES(%esp) + movl %esp,%edx + pushl %edi # push the faulting address + pushl %esi # push the error code + pushl %edx # push the pt_regs pointer + movl $(__KERNEL_DS),%edx + movl %edx,%ds + movl %edx,%es + call SYMBOL_NAME(do_page_fault) + addl $12,%esp + jmp ret_from_exception + +ENTRY(machine_check) + pushl $0 + pushl $ SYMBOL_NAME(do_machine_check) + jmp error_code + +ENTRY(spurious_interrupt_bug) + pushl $0 + pushl $ SYMBOL_NAME(do_spurious_interrupt_bug) + jmp error_code diff --git a/mini-os/events.c b/mini-os/events.c new file mode 100644 index 0000000000..a2083afa37 --- /dev/null +++ b/mini-os/events.c @@ -0,0 +1,106 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: events.c + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: + * + * Date: Jul 2003 + * + * Environment: Xen Minimal OS + * Description: Deal with events + * + **************************************************************************** + * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $ + **************************************************************************** + */ + +#include +#include +#include +#include + +static ev_action_t ev_actions[NR_EVS]; +void default_handler(int ev, struct pt_regs *regs); + + +/* + * demux events to different handlers + */ +asmlinkage unsigned int do_event(int ev, struct pt_regs *regs) +{ + ev_action_t *action; + + if (ev >= NR_EVS) { + printk("Large event number %d\n", ev); + return 0; + } + + action = &ev_actions[ev]; + action->count++; + ack_hypervisor_event(ev); + + if (!action->handler) + goto out; + + if (action->status & EVS_DISABLED) + goto out; + + /* call the handler */ + action->handler(ev, regs); + + out: + return 1; + +} + +/* + * add a handler + */ +unsigned int add_ev_action( int ev, void (*handler)(int, struct pt_regs *) ) +{ + if (ev_actions[ev].handler) { + printk ("event[%d] already handled by %p", ev, ev_actions[ev].handler); + return 0; + } + + ev_actions[ev].handler = handler; + return 1; +} + +unsigned int enable_ev_action( int ev ) +{ + if (!ev_actions[ev].handler) { + printk ("enable event[%d], no handler installed", ev); + return 0; + } + ev_actions[ev].status &= ~EVS_DISABLED; + return 1; +} + +unsigned int disable_ev_action( int ev ) +{ + ev_actions[ev].status |= EVS_DISABLED; + return 1; +} + +/* + * initially all events are without a handler and disabled + */ +void init_events(void) +{ + int i; + + /* inintialise event handler */ + for ( i = 0; i < NR_EVS; i++ ) + { + ev_actions[i].status = EVS_DISABLED; + ev_actions[i].handler = NULL; + } +} + +void default_handler(int ev, struct pt_regs *regs) { + printk("X[%d] ", ev); +} diff --git a/mini-os/h/events.h b/mini-os/h/events.h new file mode 100644 index 0000000000..5166876b76 --- /dev/null +++ b/mini-os/h/events.h @@ -0,0 +1,53 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: events.h + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: + * + * Date: Jul 2003 + * + * Environment: Xen Minimal OS + * Description: deal with events + * + **************************************************************************** + * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $ + **************************************************************************** + */ + +#ifndef _EVENTS_H_ +#define _EVENTS_H_ + +/* _EVENT_* are defined in hypervisor-if.h */ +#define EV_BLKDEV _EVENT_BLKDEV +#define EV_TIMER _EVENT_TIMER +#define EV_DIE _EVENT_DIE +#define EV_DEBUG _EVENT_DEBUG +#define EV_NET _EVENT_NET +#define EV_PS2 _EVENT_PS2 + +#define NR_EVS (sizeof(HYPERVISOR_shared_info->events) * 8) + +/* ev handler status */ +#define EVS_INPROGRESS 1 /* Event handler active - do not enter! */ +#define EVS_DISABLED 2 /* Event disabled - do not enter! */ +#define EVS_PENDING 4 /* Event pending - replay on enable */ +#define EVS_REPLAY 8 /* Event has been replayed but not acked yet */ + +/* this represents a event handler. Chaining or sharing is not allowed */ +typedef struct _ev_action_t { + void (*handler)(int, struct pt_regs *); + unsigned int status; /* IRQ status */ + u32 count; +} ev_action_t; + +/* prototypes */ +unsigned int do_event(int ev, struct pt_regs *regs); +unsigned int add_ev_action( int ev, void (*handler)(int, struct pt_regs *) ); +unsigned int enable_ev_action( int ev ); +unsigned int disable_ev_action( int ev ); +void init_events(void); + +#endif /* _EVENTS_H_ */ diff --git a/mini-os/h/hypervisor.h b/mini-os/h/hypervisor.h new file mode 100644 index 0000000000..c0d340f339 --- /dev/null +++ b/mini-os/h/hypervisor.h @@ -0,0 +1,266 @@ + +/****************************************************************************** + * hypervisor.h + * + * Linux-specific hypervisor handling. + * + * Adjusted by R Neugebauer for Xen minimal OS + * + * Copyright (c) 2002, K A Fraser + */ + +#ifndef _HYPERVISOR_H_ +#define _HYPERVISOR_H_ + +#include + +/* include the hypervisor interface */ +#include +#include +#include + + +/* + * a placeholder for the start of day information passed up from the hypervisor + */ +union start_info_union +{ + start_info_t start_info; + char padding[512]; +}; +extern union start_info_union start_info_union; +#define start_info (start_info_union.start_info) + + +/* hypervisor.c */ +void do_hypervisor_callback(struct pt_regs *regs); +void enable_hypervisor_event(unsigned int ev); +void disable_hypervisor_event(unsigned int ev); +void ack_hypervisor_event(unsigned int ev); + +/* + * Assembler stubs for hyper-calls. + */ + +static inline int HYPERVISOR_set_trap_table(trap_info_t *table) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_set_trap_table), + "b" (table) ); + + return ret; +} + +static inline int HYPERVISOR_pt_update(page_update_request_t *req, int count) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_pt_update), + "b" (req), "c" (count) ); + + return ret; +} + +static inline int HYPERVISOR_console_write(const char *str, int count) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_console_write), + "b" (str), "c" (count) ); + + + return ret; +} + +static inline int HYPERVISOR_set_gdt(unsigned long *frame_list, int entries) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_set_gdt), + "b" (frame_list), "c" (entries) ); + + + return ret; +} + +static inline int HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_stack_switch), + "b" (ss), "c" (esp) : "memory" ); + + return ret; +} + +static inline int HYPERVISOR_set_callbacks( + unsigned long event_selector, unsigned long event_address, + unsigned long failsafe_selector, unsigned long failsafe_address) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_set_callbacks), + "b" (event_selector), "c" (event_address), + "d" (failsafe_selector), "S" (failsafe_address) : "memory" ); + + return ret; +} + +static inline int HYPERVISOR_net_update(void) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_net_update) ); + + return ret; +} + +static inline int HYPERVISOR_fpu_taskswitch(void) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_fpu_taskswitch) ); + + return ret; +} + +static inline int HYPERVISOR_yield(void) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_yield) ); + + return ret; +} + +static inline int HYPERVISOR_exit(void) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_exit) ); + + return ret; +} + +static inline int HYPERVISOR_dom0_op(void *dom0_op) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_dom0_op), + "b" (dom0_op) : "memory" ); + + return ret; +} + +static inline int HYPERVISOR_network_op(void *network_op) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_network_op), + "b" (network_op) ); + + return ret; +} + +static inline int HYPERVISOR_block_io_op(void) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_block_io_op) ); + + return ret; +} + +static inline int HYPERVISOR_set_debugreg(int reg, unsigned long value) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_set_debugreg), + "b" (reg), "c" (value) ); + + return ret; +} + +static inline unsigned long HYPERVISOR_get_debugreg(int reg) +{ + unsigned long ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_get_debugreg), + "b" (reg) ); + + return ret; +} + +static inline int HYPERVISOR_update_descriptor( + unsigned long pa, unsigned long word1, unsigned long word2) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_update_descriptor), + "b" (pa), "c" (word1), "d" (word2) ); + + return ret; +} + +static inline int HYPERVISOR_set_fast_trap(int idx) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_set_fast_trap), + "b" (idx) ); + + return ret; +} + +static inline int HYPERVISOR_dom_mem_op(void *dom_mem_op) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_dom_mem_op), + "b" (dom_mem_op) : "memory" ); + + return ret; +} + +static inline int HYPERVISOR_multicall(void *call_list, int nr_calls) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_multicall), + "b" (call_list), "c" (nr_calls) : "memory" ); + + return ret; +} + +static inline long HYPERVISOR_kbd_op(unsigned char op, unsigned char val) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_kbd_op), + "b" (op), "c" (val) ); + + return ret; +} + +#endif /* __HYPERVISOR_H__ */ diff --git a/mini-os/h/lib.h b/mini-os/h/lib.h new file mode 100644 index 0000000000..48140ab79d --- /dev/null +++ b/mini-os/h/lib.h @@ -0,0 +1,129 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: lib.h + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: + * + * Date: Aug 2003 + * + * Environment: Xen Minimal OS + * Description: Random useful library functions, contains some freebsd stuff + * + **************************************************************************** + * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $ + **************************************************************************** + * + *- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)stdarg.h 8.1 (Berkeley) 6/10/93 + * $FreeBSD: src/sys/i386/include/stdarg.h,v 1.10 1999/08/28 00:44:26 peter Exp $ + */ + +#ifndef _LIB_H_ +#define _LIB_H_ + + +/* variadic function support */ +typedef char *va_list; +#define __va_size(type) \ + (((sizeof(type) + sizeof(int) - 1) / sizeof(int)) * sizeof(int)) +#ifdef __GNUC__ +#define va_start(ap, last) \ + ((ap) = (va_list)__builtin_next_arg(last)) +#else +#define va_start(ap, last) \ + ((ap) = (va_list)&(last) + __va_size(last)) +#endif +#define va_arg(ap, type) \ + (*(type *)((ap) += __va_size(type), (ap) - __va_size(type))) +#define va_end(ap) + + +/* printing */ +#define printk printf +#define kprintf printf +int printf(const char *fmt, ...); +int vprintf(const char *fmt, va_list ap); +int sprintf(char *buf, const char *cfmt, ...); +int vsprintf(char *buf, const char *cfmt, va_list ap); + +/* string and memory manipulation */ +int memcmp(const void *cs, const void *ct, size_t count); +void *memcpy(void *dest, const void *src, size_t count); +int strncmp(const char *cs, const char *ct, size_t count); +int strcmp(const char *cs, const char *ct); +char *strcpy(char *dest, const char *src); +char *strncpy(char *dest, const char *src, size_t count); +void *memset(void *s,int c, size_t count); +size_t strnlen(const char *s, size_t count); +size_t strlen(const char *s); +char *strchr(const char *s, int c); +char *strstr(const char *s1, const char *s2); + + +/* dlmalloc functions */ +struct mallinfo { + int arena; /* non-mmapped space allocated from system */ + int ordblks; /* number of free chunks */ + int smblks; /* number of fastbin blocks */ + int hblks; /* number of mmapped regions */ + int hblkhd; /* space in mmapped regions */ + int usmblks; /* maximum total allocated space */ + int fsmblks; /* space available in freed fastbin blocks */ + int uordblks; /* total allocated space */ + int fordblks; /* total free space */ + int keepcost; /* top-most, releasable (via malloc_trim) space */ +}; + +void *malloc(size_t n); +void *calloc(size_t n_elements, size_t element_size); +void free(void* p); +void *realloc(void* p, size_t n); +void *memalign(size_t alignment, size_t n); +void *valloc(size_t n); +struct mallinfo mallinfo(); +int mallopt(int parameter_number, int parameter_value); + +void **independent_calloc(size_t n_elements, size_t size, void* chunks[]); +void **independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]); +void *pvalloc(size_t n); +void cfree(void* p); +int malloc_trim(size_t pad); +size_t malloc_usable_size(void* p); +void malloc_stats(); + + +#endif /* _LIB_H_ */ diff --git a/mini-os/h/list.h b/mini-os/h/list.h new file mode 100644 index 0000000000..eec102405c --- /dev/null +++ b/mini-os/h/list.h @@ -0,0 +1,164 @@ +#ifndef _LINUX_LIST_H +#define _LINUX_LIST_H + +#define ASSERT(x) ((void)0) + +/* + * Simple doubly linked list implementation. + * + * Some of the internal functions ("__xxx") are useful when + * manipulating whole lists rather than single entries, as + * sometimes we already know the next/prev entries and we can + * generate better code by using them directly rather than + * using the generic single-entry routines. + */ + +struct list_head { + struct list_head *next, *prev; +}; + +#define LIST_HEAD_INIT(name) { &(name), &(name) } + +#define LIST_HEAD(name) \ + struct list_head name = LIST_HEAD_INIT(name) + +#define INIT_LIST_HEAD(ptr) do { \ + (ptr)->next = (ptr); (ptr)->prev = (ptr); \ +} while (0) + +/* + * Insert a new entry between two known consecutive entries. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static __inline__ void __list_add(struct list_head * new, + struct list_head * prev, + struct list_head * next) +{ + next->prev = new; + new->next = next; + new->prev = prev; + prev->next = new; +} + +/** + * list_add - add a new entry + * @new: new entry to be added + * @head: list head to add it after + * + * Insert a new entry after the specified head. + * This is good for implementing stacks. + */ +static __inline__ void list_add(struct list_head *new, struct list_head *head) +{ + __list_add(new, head, head->next); +} + +/** + * list_add_tail - add a new entry + * @new: new entry to be added + * @head: list head to add it before + * + * Insert a new entry before the specified head. + * This is useful for implementing queues. + */ +static __inline__ void list_add_tail(struct list_head *new, struct list_head *head) +{ + __list_add(new, head->prev, head); +} + +/* + * Delete a list entry by making the prev/next entries + * point to each other. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static __inline__ void __list_del(struct list_head * prev, + struct list_head * next) +{ + next->prev = prev; + prev->next = next; +} + +/** + * list_del - deletes entry from list. + * @entry: the element to delete from the list. + * Note: list_empty on entry does not return true after this, the entry is in an undefined state. + */ +static __inline__ void list_del(struct list_head *entry) +{ + ASSERT(entry->next->prev == entry); + ASSERT(entry->prev->next == entry); + __list_del(entry->prev, entry->next); +} + +/** + * list_del_init - deletes entry from list and reinitialize it. + * @entry: the element to delete from the list. + */ +static __inline__ void list_del_init(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + INIT_LIST_HEAD(entry); +} + +/** + * list_empty - tests whether a list is empty + * @head: the list to test. + */ +static __inline__ int list_empty(struct list_head *head) +{ + return head->next == head; +} + +/** + * list_splice - join two lists + * @list: the new list to add. + * @head: the place to add it in the first list. + */ +static __inline__ void list_splice(struct list_head *list, struct list_head *head) +{ + struct list_head *first = list->next; + + if (first != list) { + struct list_head *last = list->prev; + struct list_head *at = head->next; + + first->prev = head; + head->next = first; + + last->next = at; + at->prev = last; + } +} + +/** + * list_entry - get the struct for this entry + * @ptr: the &struct list_head pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + */ +#define list_entry(ptr, type, member) \ + ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) + +/** + * list_for_each - iterate over a list + * @pos: the &struct list_head to use as a loop counter. + * @head: the head for your list. + */ +#define list_for_each(pos, head) \ + for (pos = (head)->next; pos != (head); pos = pos->next) + +/** + * list_for_each_safe - iterate over a list safe against removal of list entry + * @pos: the &struct list_head to use as a loop counter. + * @n: another &struct list_head to use as temporary storage + * @head: the head for your list. + */ +#define list_for_each_safe(pos, n, head) \ + for (pos = (head)->next, n = pos->next; pos != (head); \ + pos = n, n = pos->next) + +#endif diff --git a/mini-os/h/mm.h b/mini-os/h/mm.h new file mode 100644 index 0000000000..9cc2271f91 --- /dev/null +++ b/mini-os/h/mm.h @@ -0,0 +1,107 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: mm.h + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: + * + * Date: Aug 2003 + * + * Environment: + * Description: + * + **************************************************************************** + * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $ + **************************************************************************** + */ + +#ifndef _MM_H_ +#define _MM_H_ + +/* PAGE_SHIFT determines the page size */ +#define PAGE_SHIFT 12 +#define PAGE_SIZE (1UL << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) + + +#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) +#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) +#define PFN_PHYS(x) ((x) << PAGE_SHIFT) + + +/* to align the pointer to the (next) page boundary */ +#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) + + +extern unsigned long *phys_to_machine_mapping; +#define pfn_to_mfn(_pfn) (phys_to_machine_mapping[(_pfn)]) +#define mfn_to_pfn(_mfn) (machine_to_phys_mapping[(_mfn)]) +static inline unsigned long phys_to_machine(unsigned long phys) +{ + unsigned long machine = pfn_to_mfn(phys >> PAGE_SHIFT); + machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK); + return machine; +} +static inline unsigned long machine_to_phys(unsigned long machine) +{ + unsigned long phys = mfn_to_pfn(machine >> PAGE_SHIFT); + phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK); + return phys; +} + +/* VIRT <-> MACHINE conversion */ +#define virt_to_machine(_a) (phys_to_machine(__pa(_a))) +#define machine_to_virt(_m) (__va(machine_to_phys(_m))) + +/* + * This handles the memory map.. We could make this a config + * option, but too many people screw it up, and too few need + * it. + * + * A __PAGE_OFFSET of 0xC0000000 means that the kernel has + * a virtual address space of one gigabyte, which limits the + * amount of physical memory you can use to about 950MB. + * + * If you want more physical memory than this then see the CONFIG_HIGHMEM4G + * and CONFIG_HIGHMEM64G options in the kernel configuration. + */ + +#define __PAGE_OFFSET (0xC0000000) + +#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) +#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) +#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) +#define virt_to_page(kaddr) (mem_map + (__pa(kaddr) >> PAGE_SHIFT)) +#define VALID_PAGE(page) ((page - mem_map) < max_mapnr) + +#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + + +/* prototypes */ +void init_mm(); +void release_bytes_to_allocator(unsigned long min, unsigned long max); +unsigned long __get_free_pages(int order); +void __free_pages(unsigned long p, int order); +#define get_free_pages(_o) (__get_free_pages(_o)) +#define get_free_page() (__get_free_pages(0)) +#define free_pages(_p,_o) (__free_pages(_p,_o)) +#define free_page(_p) (__free_pages(_p,0)) + +static __inline__ int get_order(unsigned long size) +{ + int order; + + size = (size-1) >> (PAGE_SHIFT-1); + order = -1; + do { + size >>= 1; + order++; + } while (size); + return order; +} + + +#endif /* _MM_H_ */ diff --git a/mini-os/h/os.h b/mini-os/h/os.h new file mode 100644 index 0000000000..2645bea954 --- /dev/null +++ b/mini-os/h/os.h @@ -0,0 +1,270 @@ +/****************************************************************************** + * os.h + * + * random collection of macros and definition + */ + +#ifndef _OS_H_ +#define _OS_H_ + + +#define NULL 0 + +/* + * These are the segment descriptors provided for us by the hypervisor. + * For now, these are hardwired -- guest OSes cannot update the GDT + * or LDT. + * + * It shouldn't be hard to support descriptor-table frobbing -- let me + * know if the BSD or XP ports require flexibility here. + */ + + +/* + * these are also defined in hypervisor-if.h but can't be pulled in as + * they are used in start of day assembly. Need to clean up the .h files + * a bit more... + */ + +#ifndef FLAT_RING1_CS +#define FLAT_RING1_CS 0x0819 +#define FLAT_RING1_DS 0x0821 +#define FLAT_RING3_CS 0x082b +#define FLAT_RING3_DS 0x0833 +#endif + +#define __KERNEL_CS FLAT_RING1_CS +#define __KERNEL_DS FLAT_RING1_DS + +/* Everything below this point is not included by assembler (.S) files. */ +#ifndef __ASSEMBLY__ + +#include +#include + + +/* this struct defines the way the registers are stored on the + stack during an exception or interrupt. */ +struct pt_regs { + long ebx; + long ecx; + long edx; + long esi; + long edi; + long ebp; + long eax; + int xds; + int xes; + long orig_eax; + long eip; + int xcs; + long eflags; + long esp; + int xss; +}; + + +/* + * STI/CLI equivalents. These basically set and clear the virtual + * event_enable flag in teh shared_info structure. Note that when + * the enable bit is set, there may be pending events to be handled. + * We may therefore call into do_hypervisor_callback() directly. + */ +#define unlikely(x) __builtin_expect((x),0) +#define __save_flags(x) \ +do { \ + (x) = test_bit(EVENTS_MASTER_ENABLE_BIT, \ + &HYPERVISOR_shared_info->events_mask); \ + barrier(); \ +} while (0) + +#define __restore_flags(x) \ +do { \ + shared_info_t *_shared = HYPERVISOR_shared_info; \ + if (x) set_bit(EVENTS_MASTER_ENABLE_BIT, &_shared->events_mask); \ + barrier(); \ + if ( unlikely(_shared->events) && (x) ) do_hypervisor_callback(NULL); \ +} while (0) + +#define __cli() \ +do { \ + clear_bit(EVENTS_MASTER_ENABLE_BIT, &HYPERVISOR_shared_info->events_mask);\ + barrier(); \ +} while (0) + +#define __sti() \ +do { \ + shared_info_t *_shared = HYPERVISOR_shared_info; \ + set_bit(EVENTS_MASTER_ENABLE_BIT, &_shared->events_mask); \ + barrier(); \ + if ( unlikely(_shared->events) ) do_hypervisor_callback(NULL); \ +} while (0) +#define cli() __cli() +#define sti() __sti() +#define save_flags(x) __save_flags(x) +#define restore_flags(x) __restore_flags(x) +#define save_and_cli(x) __save_and_cli(x) +#define save_and_sti(x) __save_and_sti(x) + + + +/* This is a barrier for the compiler only, NOT the processor! */ +#define barrier() __asm__ __volatile__("": : :"memory") + +#define LOCK_PREFIX "" +#define LOCK "" +#define ADDR (*(volatile long *) addr) +/* + * Make sure gcc doesn't try to be clever and move things around + * on us. We need to use _exactly_ the address the user gave us, + * not some alias that contains the same information. + */ +typedef struct { volatile int counter; } atomic_t; + + +/* + * This XCHG macro is straight from Linux. It is gross. + */ +#define xchg(ptr,v) \ + ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) +struct __xchg_dummy { unsigned long a[100]; }; +#define __xg(x) ((struct __xchg_dummy *)(x)) +static inline unsigned long __xchg(unsigned long x, volatile void * ptr, + int size) +{ + switch (size) { + case 1: + __asm__ __volatile__("xchgb %b0,%1" + :"=q" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + case 2: + __asm__ __volatile__("xchgw %w0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + case 4: + __asm__ __volatile__("xchgl %0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + } + return x; +} + +/** + * test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static __inline__ int test_and_clear_bit(int nr, volatile void * addr) +{ + int oldbit; + + __asm__ __volatile__( LOCK_PREFIX + "btrl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"Ir" (nr) : "memory"); + return oldbit; +} + +static __inline__ int constant_test_bit(int nr, const volatile void * addr) +{ + return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0; +} + +static __inline__ int variable_test_bit(int nr, volatile void * addr) +{ + int oldbit; + + __asm__ __volatile__( + "btl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit) + :"m" (ADDR),"Ir" (nr)); + return oldbit; +} + +#define test_bit(nr,addr) \ +(__builtin_constant_p(nr) ? \ + constant_test_bit((nr),(addr)) : \ + variable_test_bit((nr),(addr))) + + +/** + * set_bit - Atomically set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * This function is atomic and may not be reordered. See __set_bit() + * if you do not require the atomic guarantees. + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static __inline__ void set_bit(int nr, volatile void * addr) +{ + __asm__ __volatile__( LOCK_PREFIX + "btsl %1,%0" + :"=m" (ADDR) + :"Ir" (nr)); +} + +/** + * clear_bit - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * clear_bit() is atomic and may not be reordered. However, it does + * not contain a memory barrier, so if it is used for locking purposes, + * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() + * in order to ensure changes are visible on other processors. + */ +static __inline__ void clear_bit(int nr, volatile void * addr) +{ + __asm__ __volatile__( LOCK_PREFIX + "btrl %1,%0" + :"=m" (ADDR) + :"Ir" (nr)); +} + +/** + * atomic_inc - increment atomic variable + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +static __inline__ void atomic_inc(atomic_t *v) +{ + __asm__ __volatile__( + LOCK "incl %0" + :"=m" (v->counter) + :"m" (v->counter)); +} + + +/* useful hypervisor macros */ + +struct desc_struct { + unsigned long a,b; +}; +extern struct desc_struct default_ldt[]; + +#define asmlinkage __attribute__((regparm(0))) + +/* + * some random linux macros + */ + +#define rdtscll(val) \ + __asm__ __volatile__("rdtsc" : "=A" (val)) + + +#endif /* !__ASSEMBLY__ */ + +#endif /* _OS_H_ */ diff --git a/mini-os/h/time.h b/mini-os/h/time.h new file mode 100644 index 0000000000..b136f4b0c3 --- /dev/null +++ b/mini-os/h/time.h @@ -0,0 +1,59 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: time.h + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: + * + * Date: Jul 2003 + * + * Environment: Xen Minimal OS + * Description: Time and timer functions + * + **************************************************************************** + * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $ + **************************************************************************** + */ + +#ifndef _TIME_H_ +#define _TIME_H_ + +#include + +/* + * System Time + * 64 bit value containing the nanoseconds elapsed since boot time. + * This value is adjusted by frequency drift. + * NOW() returns the current time. + * The other macros are for convenience to approximate short intervals + * of real time into system time + */ +typedef s64 s_time_t; +#define NOW() ((s_time_t)get_s_time()) +#define SECONDS(_s) (((s_time_t)(_s)) * 1000000000UL ) +#define TENTHS(_ts) (((s_time_t)(_ts)) * 100000000UL ) +#define HUNDREDTHS(_hs) (((s_time_t)(_hs)) * 10000000UL ) +#define MILLISECS(_ms) (((s_time_t)(_ms)) * 1000000UL ) +#define MICROSECS(_us) (((s_time_t)(_us)) * 1000UL ) +#define Time_Max ((s_time_t) 0x7fffffffffffffffLL) +#define FOREVER Time_Max + + +/* wall clock time */ +typedef long time_t; +typedef long suseconds_t; +struct timeval { + time_t tv_sec; /* seconds */ + suseconds_t tv_usec; /* microseconds */ +}; + + +/* prototypes */ +void init_time(void); +s_time_t get_s_time(void); +s_time_t get_v_time(void); +void gettimeofday(struct timeval *tv); + +#endif /* _TIME_H_ */ diff --git a/mini-os/h/types.h b/mini-os/h/types.h new file mode 100644 index 0000000000..4f87a74e99 --- /dev/null +++ b/mini-os/h/types.h @@ -0,0 +1,41 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: types.h + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: + * + * Date: May 2003 + * + * Environment: Xeno Minimal OS + * Description: a random collection of type definitions + * + **************************************************************************** + * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $ + **************************************************************************** + */ + +#ifndef _TYPES_H_ +#define _TYPES_H_ + +typedef signed char s8; +typedef unsigned char u8; +typedef signed short s16; +typedef unsigned short u16; +typedef signed int s32; +typedef unsigned int u32; +typedef signed long long s64; +typedef unsigned long long u64; + +typedef unsigned int size_t; + +/* FreeBSD compat types */ +typedef unsigned char u_char; +typedef unsigned int u_int; +typedef unsigned long u_long; +typedef long long quad_t; +typedef unsigned long long u_quad_t; +typedef unsigned int uintptr_t; +#endif /* _TYPES_H_ */ diff --git a/mini-os/head.S b/mini-os/head.S new file mode 100644 index 0000000000..3f4e6670c3 --- /dev/null +++ b/mini-os/head.S @@ -0,0 +1,46 @@ +#include + +/* Offsets in start_info structure */ +#define SHARED_INFO 4 +#define MOD_START 12 +#define MOD_LEN 16 + +#define ENTRY(X) .globl X ; X : + +.globl _start +_start: + cld + + lss stack_start,%esp + + /* Copy any module somewhere safe before it's clobbered by BSS. */ + mov MOD_LEN(%esi),%ecx + shr $2,%ecx + jz 2f /* bail from copy loop if no module */ + + mov $_end,%edi + add MOD_LEN(%esi),%edi + mov MOD_START(%esi),%eax + add MOD_LEN(%esi),%eax +1: sub $4,%eax + sub $4,%edi + mov (%eax),%ebx + mov %ebx,(%edi) + loop 1b + mov %edi,MOD_START(%esi) + + /* Clear BSS first so that there are no surprises... */ +2: xorl %eax,%eax + movl $__bss_start,%edi + movl $_end,%ecx + subl %edi,%ecx + rep stosb + + push %esi + call start_kernel + + +stack_start: + .long stack+8192, __KERNEL_DS + + diff --git a/mini-os/hypervisor.c b/mini-os/hypervisor.c new file mode 100644 index 0000000000..cf3349426e --- /dev/null +++ b/mini-os/hypervisor.c @@ -0,0 +1,83 @@ +/****************************************************************************** + * hypervisor.c + * + * Communication to/from hypervisor. + * + * Copied from XenoLinux and adjusted by Rolf.Neugebauer@intel.com + * + * Copyright (c) 2002, K A Fraser + */ + +#include +#include + +static unsigned long event_mask = 0; +static unsigned long ev_err_count; + +void do_hypervisor_callback(struct pt_regs *regs) +{ + unsigned long events, flags; + shared_info_t *shared = HYPERVISOR_shared_info; + + do { + /* Specialised local_irq_save(). */ + flags = test_and_clear_bit(EVENTS_MASTER_ENABLE_BIT, + &shared->events_mask); + barrier(); + + events = xchg(&shared->events, 0); + events &= event_mask; + + /* 'events' now contains some pending events to handle. */ + __asm__ __volatile__ ( + " push %1 ;" + " sub $4,%%esp ;" + " jmp 2f ;" + "1: btrl %%eax,%0 ;" /* clear bit */ + " mov %%eax,(%%esp) ;" + " call do_event ;" /* do_event(event) */ + "2: bsfl %0,%%eax ;" /* %eax == bit # */ + " jnz 1b ;" + " add $8,%%esp ;" + /* we use %ebx because it is callee-saved */ + : : "b" (events), "r" (regs) + /* clobbered by callback function calls */ + : "eax", "ecx", "edx", "memory" ); + + /* Specialised local_irq_restore(). */ + if ( flags ) set_bit(EVENTS_MASTER_ENABLE_BIT, &shared->events_mask); + barrier(); + } + while ( shared->events ); +} + + + +/* + * Define interface to generic handling in irq.c + */ + +void enable_hypervisor_event(unsigned int ev) +{ + set_bit(ev, &event_mask); + set_bit(ev, &HYPERVISOR_shared_info->events_mask); + if ( test_bit(EVENTS_MASTER_ENABLE_BIT, + &HYPERVISOR_shared_info->events_mask) ) + do_hypervisor_callback(NULL); +} + +void disable_hypervisor_event(unsigned int ev) +{ + clear_bit(ev, &event_mask); + clear_bit(ev, &HYPERVISOR_shared_info->events_mask); +} + +void ack_hypervisor_event(unsigned int ev) +{ + if ( !(event_mask & (1<events_mask); +} diff --git a/mini-os/kernel.c b/mini-os/kernel.c new file mode 100644 index 0000000000..7c37604745 --- /dev/null +++ b/mini-os/kernel.c @@ -0,0 +1,115 @@ +/****************************************************************************** + * kernel.c + * + * Assorted crap goes here, including the initial C entry point, jumped at + * from head.S. + */ + +#include +#include +#include +#include +#include +#include +#include + +/* + * Shared page for communicating with the hypervisor. + * Events flags go here, for example. + */ +shared_info_t *HYPERVISOR_shared_info; + +/* + * This structure contains start-of-day info, such as pagetable base pointer, + * address of the shared_info structure, and things like that. + */ +union start_info_union start_info_union; + +/* + * Just allocate the kernel stack here. SS:ESP is set up to point here + * in head.S. + */ +char stack[8192]; + + +/* Assembler interface fns in entry.S. */ +void hypervisor_callback(void); +void failsafe_callback(void); + +/* default exit event handler */ +static void exit_handler(int ev, struct pt_regs *regs); + +/* + * INITIAL C ENTRY POINT. + */ +void start_kernel(start_info_t *si) +{ + int i; + + /* Copy the start_info struct to a globally-accessible area. */ + memcpy(&start_info, si, sizeof(*si)); + + /* Grab the shared_info pointer and put it in a safe place. */ + HYPERVISOR_shared_info = start_info.shared_info; + + /* Set up event and failsafe callback addresses. */ + HYPERVISOR_set_callbacks( + __KERNEL_CS, (unsigned long)hypervisor_callback, + __KERNEL_CS, (unsigned long)failsafe_callback); + + + /* ENABLE EVENT DELIVERY. This is disabled at start of day. */ + __sti(); + + /* print out some useful information */ + printk("Xeno Minimal OS!\n"); + printk("start_info: %p\n", si); + printk(" nr_pages: %lu", si->nr_pages); + printk(" shared_inf: %p\n", si->shared_info); + printk(" pt_base: %p", (void *)si->pt_base); + printk(" mod_start: 0x%lx\n", si->mod_start); + printk(" mod_len: %lu\n", si->mod_len); + printk(" net_rings: "); + for (i = 0; i < MAX_DOMAIN_VIFS; i++) { + printk(" %lx", si->net_rings[i]); + }; printk("\n"); + printk(" blk_ring: 0x%lx\n", si->blk_ring); + printk(" dom_id: %d\n", si->dom_id); + printk(" flags: 0x%lx\n", si->flags); + printk(" cmd_line: %s\n", si->cmd_line ? (const char *)si->cmd_line : "NULL"); + + /* init memory management */ + init_mm(); + + /* set up events */ + init_events(); + + /* install some handlers */ + add_ev_action(EV_DIE, &exit_handler); + enable_ev_action(EV_DIE); + enable_hypervisor_event(EV_DIE); + + /* init time and timers */ + init_time(); + + /* do nothing */ + for ( ; ; ) HYPERVISOR_yield(); +} + + +/* + * do_exit: This is called whenever an IRET fails in entry.S. + * This will generally be because an application has got itself into + * a really bad state (probably a bad CS or SS). It must be killed. + * Of course, minimal OS doesn't have applications :-) + */ + +void do_exit(void) +{ + printk("do_exit called!\n"); + for ( ;; ) ; +} +static void exit_handler(int ev, struct pt_regs *regs) { + do_exit(); +} + diff --git a/mini-os/lib/malloc.c b/mini-os/lib/malloc.c new file mode 100644 index 0000000000..003c086858 --- /dev/null +++ b/mini-os/lib/malloc.c @@ -0,0 +1,5700 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: malloc.c + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: + * + * Date: Aug 2003 + * + * Environment: Xen Minimal OS + * Description: Library functions, maloc at al + * + **************************************************************************** + * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $ + **************************************************************************** + */ + +#include +#include +#include +#include + +/* standard compile option */ +#define HAVE_MEMCOPY 1 +#define USE_MEMCPY 1 +#undef HAVE_MMAP +#undef MMAP_CLEARS +#undef HAVE_MREMAP +#define malloc_getpagesize PAGE_SIZE +#undef HAVE_USR_INCLUDE_MALLOC_H +#define LACKS_UNISTD_H 1 +#define LACKS_SYS_PARAM_H 1 +#define LACKS_SYS_MMAN_H 1 +#define LACKS_FCNTL_H 1 + + +/* page allocator interface */ +#define MORECORE more_core +#define MORECORE_CONTIGUOUS 0 +#define MORECORE_FAILURE 0 +#define MORECORE_CANNOT_TRIM 1 + +static void *more_core(size_t n) +{ + static void *last; + unsigned long order, num_pages; + void *ret; + + if (n == 0) + return last; + + /* get pages */ + order = get_order(n); + ret = (void *)get_free_pages(order); + + /* work out pointer to end of chunk */ + if (ret) { + num_pages = 1 << order; + last = ret + (num_pages * PAGE_SIZE); + } + + //printk("malloc(%lu) -> o=%lu r=%p, l=%p", n, order, ret, last); + + return ret; +} + +/* other options commented out below */ +#define __STD_C 1 +#define Void_t void +#define assert(x) ((void)0) + +#define CHUNK_SIZE_T unsigned long +#define PTR_UINT unsigned long +#define INTERNAL_SIZE_T size_t +#define SIZE_SZ (sizeof(INTERNAL_SIZE_T)) +#define MALLOC_ALIGNMENT (2 * SIZE_SZ) +#define MALLOC_ALIGN_MASK (MALLOC_ALIGNMENT - 1) +#define TRIM_FASTBINS 0 + +#define M_MXFAST 1 +#define DEFAULT_MXFAST 64 +#define M_TRIM_THRESHOLD -1 +#define DEFAULT_TRIM_THRESHOLD (256 * 1024) +#define M_TOP_PAD -2 +#define DEFAULT_TOP_PAD (0) +#define M_MMAP_THRESHOLD -3 +#define DEFAULT_MMAP_THRESHOLD (256 * 1024) +#define M_MMAP_MAX -4 +#define DEFAULT_MMAP_MAX (0) +#define MALLOC_FAILURE_ACTION printf("malloc failure\n") + +#define cALLOc public_cALLOc +#define fREe public_fREe +#define cFREe public_cFREe +#define mALLOc public_mALLOc +#define mEMALIGn public_mEMALIGn +#define rEALLOc public_rEALLOc +#define vALLOc public_vALLOc +#define pVALLOc public_pVALLOc +#define mALLINFo public_mALLINFo +#define mALLOPt public_mALLOPt +#define mTRIm public_mTRIm +#define mSTATs public_mSTATs +#define mUSABLe public_mUSABLe +#define iCALLOc public_iCALLOc +#define iCOMALLOc public_iCOMALLOc + +#define public_cALLOc calloc +#define public_fREe free +#define public_cFREe cfree +#define public_mALLOc malloc +#define public_mEMALIGn memalign +#define public_rEALLOc realloc +#define public_vALLOc valloc +#define public_pVALLOc pvalloc +#define public_mALLINFo mallinfo +#define public_mALLOPt mallopt +#define public_mTRIm malloc_trim +#define public_mSTATs malloc_stats +#define public_mUSABLe malloc_usable_size +#define public_iCALLOc independent_calloc +#define public_iCOMALLOc independent_comalloc + + +/* + This is a version (aka dlmalloc) of malloc/free/realloc written by + Doug Lea and released to the public domain. Use, modify, and + redistribute this code without permission or acknowledgement in any + way you wish. Send questions, comments, complaints, performance + data, etc to dl@cs.oswego.edu + +* VERSION 2.7.2 Sat Aug 17 09:07:30 2002 Doug Lea (dl at gee) + + Note: There may be an updated version of this malloc obtainable at + ftp://gee.cs.oswego.edu/pub/misc/malloc.c + Check before installing! + +* Quickstart + + This library is all in one file to simplify the most common usage: + ftp it, compile it (-O), and link it into another program. All + of the compile-time options default to reasonable values for use on + most unix platforms. Compile -DWIN32 for reasonable defaults on windows. + You might later want to step through various compile-time and dynamic + tuning options. + + For convenience, an include file for code using this malloc is at: + ftp://gee.cs.oswego.edu/pub/misc/malloc-2.7.1.h + You don't really need this .h file unless you call functions not + defined in your system include files. The .h file contains only the + excerpts from this file needed for using this malloc on ANSI C/C++ + systems, so long as you haven't changed compile-time options about + naming and tuning parameters. If you do, then you can create your + own malloc.h that does include all settings by cutting at the point + indicated below. + +* Why use this malloc? + + This is not the fastest, most space-conserving, most portable, or + most tunable malloc ever written. However it is among the fastest + while also being among the most space-conserving, portable and tunable. + Consistent balance across these factors results in a good general-purpose + allocator for malloc-intensive programs. + + The main properties of the algorithms are: + * For large (>= 512 bytes) requests, it is a pure best-fit allocator, + with ties normally decided via FIFO (i.e. least recently used). + * For small (<= 64 bytes by default) requests, it is a caching + allocator, that maintains pools of quickly recycled chunks. + * In between, and for combinations of large and small requests, it does + the best it can trying to meet both goals at once. + * For very large requests (>= 128KB by default), it relies on system + memory mapping facilities, if supported. + + For a longer but slightly out of date high-level description, see + http://gee.cs.oswego.edu/dl/html/malloc.html + + You may already by default be using a C library containing a malloc + that is based on some version of this malloc (for example in + linux). You might still want to use the one in this file in order to + customize settings or to avoid overheads associated with library + versions. + +* Contents, described in more detail in "description of public routines" below. + + Standard (ANSI/SVID/...) functions: + malloc(size_t n); + calloc(size_t n_elements, size_t element_size); + free(Void_t* p); + realloc(Void_t* p, size_t n); + memalign(size_t alignment, size_t n); + valloc(size_t n); + mallinfo() + mallopt(int parameter_number, int parameter_value) + + Additional functions: + independent_calloc(size_t n_elements, size_t size, Void_t* chunks[]); + independent_comalloc(size_t n_elements, size_t sizes[], Void_t* chunks[]); + pvalloc(size_t n); + cfree(Void_t* p); + malloc_trim(size_t pad); + malloc_usable_size(Void_t* p); + malloc_stats(); + +* Vital statistics: + + Supported pointer representation: 4 or 8 bytes + Supported size_t representation: 4 or 8 bytes + Note that size_t is allowed to be 4 bytes even if pointers are 8. + You can adjust this by defining INTERNAL_SIZE_T + + Alignment: 2 * sizeof(size_t) (default) + (i.e., 8 byte alignment with 4byte size_t). This suffices for + nearly all current machines and C compilers. However, you can + define MALLOC_ALIGNMENT to be wider than this if necessary. + + Minimum overhead per allocated chunk: 4 or 8 bytes + Each malloced chunk has a hidden word of overhead holding size + and status information. + + Minimum allocated size: 4-byte ptrs: 16 bytes (including 4 overhead) + 8-byte ptrs: 24/32 bytes (including, 4/8 overhead) + + When a chunk is freed, 12 (for 4byte ptrs) or 20 (for 8 byte + ptrs but 4 byte size) or 24 (for 8/8) additional bytes are + needed; 4 (8) for a trailing size field and 8 (16) bytes for + free list pointers. Thus, the minimum allocatable size is + 16/24/32 bytes. + + Even a request for zero bytes (i.e., malloc(0)) returns a + pointer to something of the minimum allocatable size. + + The maximum overhead wastage (i.e., number of extra bytes + allocated than were requested in malloc) is less than or equal + to the minimum size, except for requests >= mmap_threshold that + are serviced via mmap(), where the worst case wastage is 2 * + sizeof(size_t) bytes plus the remainder from a system page (the + minimal mmap unit); typically 4096 or 8192 bytes. + + Maximum allocated size: 4-byte size_t: 2^32 minus about two pages + 8-byte size_t: 2^64 minus about two pages + + It is assumed that (possibly signed) size_t values suffice to + represent chunk sizes. `Possibly signed' is due to the fact + that `size_t' may be defined on a system as either a signed or + an unsigned type. The ISO C standard says that it must be + unsigned, but a few systems are known not to adhere to this. + Additionally, even when size_t is unsigned, sbrk (which is by + default used to obtain memory from system) accepts signed + arguments, and may not be able to handle size_t-wide arguments + with negative sign bit. Generally, values that would + appear as negative after accounting for overhead and alignment + are supported only via mmap(), which does not have this + limitation. + + Requests for sizes outside the allowed range will perform an optional + failure action and then return null. (Requests may also + also fail because a system is out of memory.) + + Thread-safety: NOT thread-safe unless USE_MALLOC_LOCK defined + + When USE_MALLOC_LOCK is defined, wrappers are created to + surround every public call with either a pthread mutex or + a win32 spinlock (depending on WIN32). This is not + especially fast, and can be a major bottleneck. + It is designed only to provide minimal protection + in concurrent environments, and to provide a basis for + extensions. If you are using malloc in a concurrent program, + you would be far better off obtaining ptmalloc, which is + derived from a version of this malloc, and is well-tuned for + concurrent programs. (See http://www.malloc.de) Note that + even when USE_MALLOC_LOCK is defined, you can can guarantee + full thread-safety only if no threads acquire memory through + direct calls to MORECORE or other system-level allocators. + + Compliance: I believe it is compliant with the 1997 Single Unix Specification + (See http://www.opennc.org). Also SVID/XPG, ANSI C, and probably + others as well. + +* Synopsis of compile-time options: + + People have reported using previous versions of this malloc on all + versions of Unix, sometimes by tweaking some of the defines + below. It has been tested most extensively on Solaris and + Linux. It is also reported to work on WIN32 platforms. + People also report using it in stand-alone embedded systems. + + The implementation is in straight, hand-tuned ANSI C. It is not + at all modular. (Sorry!) It uses a lot of macros. To be at all + usable, this code should be compiled using an optimizing compiler + (for example gcc -O3) that can simplify expressions and control + paths. (FAQ: some macros import variables as arguments rather than + declare locals because people reported that some debuggers + otherwise get confused.) + + OPTION DEFAULT VALUE + + Compilation Environment options: + + __STD_C derived from C compiler defines + WIN32 NOT defined + HAVE_MEMCPY defined + USE_MEMCPY 1 if HAVE_MEMCPY is defined + HAVE_MMAP defined as 1 + MMAP_CLEARS 1 + HAVE_MREMAP 0 unless linux defined + malloc_getpagesize derived from system #includes, or 4096 if not + HAVE_USR_INCLUDE_MALLOC_H NOT defined + LACKS_UNISTD_H NOT defined unless WIN32 + LACKS_SYS_PARAM_H NOT defined unless WIN32 + LACKS_SYS_MMAN_H NOT defined unless WIN32 + LACKS_FCNTL_H NOT defined + + Changing default word sizes: + + INTERNAL_SIZE_T size_t + MALLOC_ALIGNMENT 2 * sizeof(INTERNAL_SIZE_T) + PTR_UINT unsigned long + CHUNK_SIZE_T unsigned long + + Configuration and functionality options: + + USE_DL_PREFIX NOT defined + USE_PUBLIC_MALLOC_WRAPPERS NOT defined + USE_MALLOC_LOCK NOT defined + DEBUG NOT defined + REALLOC_ZERO_BYTES_FREES NOT defined + MALLOC_FAILURE_ACTION errno = ENOMEM, if __STD_C defined, else no-op + TRIM_FASTBINS 0 + FIRST_SORTED_BIN_SIZE 512 + + Options for customizing MORECORE: + + MORECORE sbrk + MORECORE_CONTIGUOUS 1 + MORECORE_CANNOT_TRIM NOT defined + MMAP_AS_MORECORE_SIZE (1024 * 1024) + + Tuning options that are also dynamically changeable via mallopt: + + DEFAULT_MXFAST 64 + DEFAULT_TRIM_THRESHOLD 256 * 1024 + DEFAULT_TOP_PAD 0 + DEFAULT_MMAP_THRESHOLD 256 * 1024 + DEFAULT_MMAP_MAX 65536 + + There are several other #defined constants and macros that you + probably don't want to touch unless you are extending or adapting malloc. +*/ + +/* RN: XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ +#if 0 + +/* + WIN32 sets up defaults for MS environment and compilers. + Otherwise defaults are for unix. +*/ + +/* #define WIN32 */ + +#ifdef WIN32 + +#define WIN32_LEAN_AND_MEAN +#include + +/* Win32 doesn't supply or need the following headers */ +#define LACKS_UNISTD_H +#define LACKS_SYS_PARAM_H +#define LACKS_SYS_MMAN_H + +/* Use the supplied emulation of sbrk */ +#define MORECORE sbrk +#define MORECORE_CONTIGUOUS 1 +#define MORECORE_FAILURE ((void*)(-1)) + +/* Use the supplied emulation of mmap and munmap */ +#define HAVE_MMAP 1 +#define MUNMAP_FAILURE (-1) +#define MMAP_CLEARS 1 + +/* These values don't really matter in windows mmap emulation */ +#define MAP_PRIVATE 1 +#define MAP_ANONYMOUS 2 +#define PROT_READ 1 +#define PROT_WRITE 2 + +/* Emulation functions defined at the end of this file */ + +/* If USE_MALLOC_LOCK, use supplied critical-section-based lock functions */ +#ifdef USE_MALLOC_LOCK +static int slwait(int *sl); +static int slrelease(int *sl); +#endif + +static long getpagesize(void); +static long getregionsize(void); +static void *sbrk(long size); +static void *mmap(void *ptr, long size, long prot, long type, long handle, long arg); +static long munmap(void *ptr, long size); + +static void vminfo (unsigned long*free, unsigned long*reserved, unsigned long*committed); +static int cpuinfo (int whole, unsigned long*kernel, unsigned long*user); + +#endif + +/* + __STD_C should be nonzero if using ANSI-standard C compiler, a C++ + compiler, or a C compiler sufficiently close to ANSI to get away + with it. +*/ + +#ifndef __STD_C +#if defined(__STDC__) || defined(_cplusplus) +#define __STD_C 1 +#else +#define __STD_C 0 +#endif +#endif /*__STD_C*/ + + +/* + Void_t* is the pointer type that malloc should say it returns +*/ + +#ifndef Void_t +#if (__STD_C || defined(WIN32)) +#define Void_t void +#else +#define Void_t char +#endif +#endif /*Void_t*/ + +#if __STD_C +#include /* for size_t */ +#else +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* define LACKS_UNISTD_H if your system does not have a . */ + +/* #define LACKS_UNISTD_H */ + +#ifndef LACKS_UNISTD_H +#include +#endif + +/* define LACKS_SYS_PARAM_H if your system does not have a . */ + +/* #define LACKS_SYS_PARAM_H */ + + +#include /* needed for malloc_stats */ +#include /* needed for optional MALLOC_FAILURE_ACTION */ + + +/* + Debugging: + + Because freed chunks may be overwritten with bookkeeping fields, this + malloc will often die when freed memory is overwritten by user + programs. This can be very effective (albeit in an annoying way) + in helping track down dangling pointers. + + If you compile with -DDEBUG, a number of assertion checks are + enabled that will catch more memory errors. You probably won't be + able to make much sense of the actual assertion errors, but they + should help you locate incorrectly overwritten memory. The + checking is fairly extensive, and will slow down execution + noticeably. Calling malloc_stats or mallinfo with DEBUG set will + attempt to check every non-mmapped allocated and free chunk in the + course of computing the summmaries. (By nature, mmapped regions + cannot be checked very much automatically.) + + Setting DEBUG may also be helpful if you are trying to modify + this code. The assertions in the check routines spell out in more + detail the assumptions and invariants underlying the algorithms. + + Setting DEBUG does NOT provide an automated mechanism for checking + that all accesses to malloced memory stay within their + bounds. However, there are several add-ons and adaptations of this + or other mallocs available that do this. +*/ + +#if DEBUG +#include +#else +#define assert(x) ((void)0) +#endif + +/* + The unsigned integer type used for comparing any two chunk sizes. + This should be at least as wide as size_t, but should not be signed. +*/ + +#ifndef CHUNK_SIZE_T +#define CHUNK_SIZE_T unsigned long +#endif + +/* + The unsigned integer type used to hold addresses when they are are + manipulated as integers. Except that it is not defined on all + systems, intptr_t would suffice. +*/ +#ifndef PTR_UINT +#define PTR_UINT unsigned long +#endif + + +/* + INTERNAL_SIZE_T is the word-size used for internal bookkeeping + of chunk sizes. + + The default version is the same as size_t. + + While not strictly necessary, it is best to define this as an + unsigned type, even if size_t is a signed type. This may avoid some + artificial size limitations on some systems. + + On a 64-bit machine, you may be able to reduce malloc overhead by + defining INTERNAL_SIZE_T to be a 32 bit `unsigned int' at the + expense of not being able to handle more than 2^32 of malloced + space. If this limitation is acceptable, you are encouraged to set + this unless you are on a platform requiring 16byte alignments. In + this case the alignment requirements turn out to negate any + potential advantages of decreasing size_t word size. + + Implementors: Beware of the possible combinations of: + - INTERNAL_SIZE_T might be signed or unsigned, might be 32 or 64 bits, + and might be the same width as int or as long + - size_t might have different width and signedness as INTERNAL_SIZE_T + - int and long might be 32 or 64 bits, and might be the same width + To deal with this, most comparisons and difference computations + among INTERNAL_SIZE_Ts should cast them to CHUNK_SIZE_T, being + aware of the fact that casting an unsigned int to a wider long does + not sign-extend. (This also makes checking for negative numbers + awkward.) Some of these casts result in harmless compiler warnings + on some systems. +*/ + +#ifndef INTERNAL_SIZE_T +#define INTERNAL_SIZE_T size_t +#endif + +/* The corresponding word size */ +#define SIZE_SZ (sizeof(INTERNAL_SIZE_T)) + + + +/* + MALLOC_ALIGNMENT is the minimum alignment for malloc'ed chunks. + It must be a power of two at least 2 * SIZE_SZ, even on machines + for which smaller alignments would suffice. It may be defined as + larger than this though. Note however that code and data structures + are optimized for the case of 8-byte alignment. +*/ + + +#ifndef MALLOC_ALIGNMENT +#define MALLOC_ALIGNMENT (2 * SIZE_SZ) +#endif + +/* The corresponding bit mask value */ +#define MALLOC_ALIGN_MASK (MALLOC_ALIGNMENT - 1) + + + +/* + REALLOC_ZERO_BYTES_FREES should be set if a call to + realloc with zero bytes should be the same as a call to free. + Some people think it should. Otherwise, since this malloc + returns a unique pointer for malloc(0), so does realloc(p, 0). +*/ + +/* #define REALLOC_ZERO_BYTES_FREES */ + +/* + TRIM_FASTBINS controls whether free() of a very small chunk can + immediately lead to trimming. Setting to true (1) can reduce memory + footprint, but will almost always slow down programs that use a lot + of small chunks. + + Define this only if you are willing to give up some speed to more + aggressively reduce system-level memory footprint when releasing + memory in programs that use many small chunks. You can get + essentially the same effect by setting MXFAST to 0, but this can + lead to even greater slowdowns in programs using many small chunks. + TRIM_FASTBINS is an in-between compile-time option, that disables + only those chunks bordering topmost memory from being placed in + fastbins. +*/ + +#ifndef TRIM_FASTBINS +#define TRIM_FASTBINS 0 +#endif + + +/* + USE_DL_PREFIX will prefix all public routines with the string 'dl'. + This is necessary when you only want to use this malloc in one part + of a program, using your regular system malloc elsewhere. +*/ + +/* #define USE_DL_PREFIX */ + + +/* + USE_MALLOC_LOCK causes wrapper functions to surround each + callable routine with pthread mutex lock/unlock. + + USE_MALLOC_LOCK forces USE_PUBLIC_MALLOC_WRAPPERS to be defined +*/ + + +/* #define USE_MALLOC_LOCK */ + + +/* + If USE_PUBLIC_MALLOC_WRAPPERS is defined, every public routine is + actually a wrapper function that first calls MALLOC_PREACTION, then + calls the internal routine, and follows it with + MALLOC_POSTACTION. This is needed for locking, but you can also use + this, without USE_MALLOC_LOCK, for purposes of interception, + instrumentation, etc. It is a sad fact that using wrappers often + noticeably degrades performance of malloc-intensive programs. +*/ + +#ifdef USE_MALLOC_LOCK +#define USE_PUBLIC_MALLOC_WRAPPERS +#else +/* #define USE_PUBLIC_MALLOC_WRAPPERS */ +#endif + + +/* + Two-phase name translation. + All of the actual routines are given mangled names. + When wrappers are used, they become the public callable versions. + When DL_PREFIX is used, the callable names are prefixed. +*/ + +#ifndef USE_PUBLIC_MALLOC_WRAPPERS +#define cALLOc public_cALLOc +#define fREe public_fREe +#define cFREe public_cFREe +#define mALLOc public_mALLOc +#define mEMALIGn public_mEMALIGn +#define rEALLOc public_rEALLOc +#define vALLOc public_vALLOc +#define pVALLOc public_pVALLOc +#define mALLINFo public_mALLINFo +#define mALLOPt public_mALLOPt +#define mTRIm public_mTRIm +#define mSTATs public_mSTATs +#define mUSABLe public_mUSABLe +#define iCALLOc public_iCALLOc +#define iCOMALLOc public_iCOMALLOc +#endif + +#ifdef USE_DL_PREFIX +#define public_cALLOc dlcalloc +#define public_fREe dlfree +#define public_cFREe dlcfree +#define public_mALLOc dlmalloc +#define public_mEMALIGn dlmemalign +#define public_rEALLOc dlrealloc +#define public_vALLOc dlvalloc +#define public_pVALLOc dlpvalloc +#define public_mALLINFo dlmallinfo +#define public_mALLOPt dlmallopt +#define public_mTRIm dlmalloc_trim +#define public_mSTATs dlmalloc_stats +#define public_mUSABLe dlmalloc_usable_size +#define public_iCALLOc dlindependent_calloc +#define public_iCOMALLOc dlindependent_comalloc +#else /* USE_DL_PREFIX */ +#define public_cALLOc calloc +#define public_fREe free +#define public_cFREe cfree +#define public_mALLOc malloc +#define public_mEMALIGn memalign +#define public_rEALLOc realloc +#define public_vALLOc valloc +#define public_pVALLOc pvalloc +#define public_mALLINFo mallinfo +#define public_mALLOPt mallopt +#define public_mTRIm malloc_trim +#define public_mSTATs malloc_stats +#define public_mUSABLe malloc_usable_size +#define public_iCALLOc independent_calloc +#define public_iCOMALLOc independent_comalloc +#endif /* USE_DL_PREFIX */ + + +/* + HAVE_MEMCPY should be defined if you are not otherwise using + ANSI STD C, but still have memcpy and memset in your C library + and want to use them in calloc and realloc. Otherwise simple + macro versions are defined below. + + USE_MEMCPY should be defined as 1 if you actually want to + have memset and memcpy called. People report that the macro + versions are faster than libc versions on some systems. + + Even if USE_MEMCPY is set to 1, loops to copy/clear small chunks + (of <= 36 bytes) are manually unrolled in realloc and calloc. +*/ + +#define HAVE_MEMCPY + +#ifndef USE_MEMCPY +#ifdef HAVE_MEMCPY +#define USE_MEMCPY 1 +#else +#define USE_MEMCPY 0 +#endif +#endif + + +#if (__STD_C || defined(HAVE_MEMCPY)) + +#ifdef WIN32 +/* On Win32 memset and memcpy are already declared in windows.h */ +#else +#if __STD_C +void* memset(void*, int, size_t); +void* memcpy(void*, const void*, size_t); +#else +Void_t* memset(); +Void_t* memcpy(); +#endif +#endif +#endif + +/* + MALLOC_FAILURE_ACTION is the action to take before "return 0" when + malloc fails to be able to return memory, either because memory is + exhausted or because of illegal arguments. + + By default, sets errno if running on STD_C platform, else does nothing. +*/ + +#ifndef MALLOC_FAILURE_ACTION +#if __STD_C +#define MALLOC_FAILURE_ACTION \ + errno = ENOMEM; + +#else +#define MALLOC_FAILURE_ACTION +#endif +#endif + +/* + MORECORE-related declarations. By default, rely on sbrk +*/ + + +#ifdef LACKS_UNISTD_H +#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) +#if __STD_C +extern Void_t* sbrk(ptrdiff_t); +#else +extern Void_t* sbrk(); +#endif +#endif +#endif + +/* + MORECORE is the name of the routine to call to obtain more memory + from the system. See below for general guidance on writing + alternative MORECORE functions, as well as a version for WIN32 and a + sample version for pre-OSX macos. +*/ + +#ifndef MORECORE +#define MORECORE sbrk +#endif + +/* + MORECORE_FAILURE is the value returned upon failure of MORECORE + as well as mmap. Since it cannot be an otherwise valid memory address, + and must reflect values of standard sys calls, you probably ought not + try to redefine it. +*/ + +#ifndef MORECORE_FAILURE +#define MORECORE_FAILURE (-1) +#endif + +/* + If MORECORE_CONTIGUOUS is true, take advantage of fact that + consecutive calls to MORECORE with positive arguments always return + contiguous increasing addresses. This is true of unix sbrk. Even + if not defined, when regions happen to be contiguous, malloc will + permit allocations spanning regions obtained from different + calls. But defining this when applicable enables some stronger + consistency checks and space efficiencies. +*/ + +#ifndef MORECORE_CONTIGUOUS +#define MORECORE_CONTIGUOUS 1 +#endif + +/* + Define MORECORE_CANNOT_TRIM if your version of MORECORE + cannot release space back to the system when given negative + arguments. This is generally necessary only if you are using + a hand-crafted MORECORE function that cannot handle negative arguments. +*/ + +/* #define MORECORE_CANNOT_TRIM */ + + +/* + Define HAVE_MMAP as true to optionally make malloc() use mmap() to + allocate very large blocks. These will be returned to the + operating system immediately after a free(). Also, if mmap + is available, it is used as a backup strategy in cases where + MORECORE fails to provide space from system. + + This malloc is best tuned to work with mmap for large requests. + If you do not have mmap, operations involving very large chunks (1MB + or so) may be slower than you'd like. +*/ + +#ifndef HAVE_MMAP +#define HAVE_MMAP 1 +#endif + +#if HAVE_MMAP +/* + Standard unix mmap using /dev/zero clears memory so calloc doesn't + need to. +*/ + +#ifndef MMAP_CLEARS +#define MMAP_CLEARS 1 +#endif + +#else /* no mmap */ +#ifndef MMAP_CLEARS +#define MMAP_CLEARS 0 +#endif +#endif + + +/* + MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if + sbrk fails, and mmap is used as a backup (which is done only if + HAVE_MMAP). The value must be a multiple of page size. This + backup strategy generally applies only when systems have "holes" in + address space, so sbrk cannot perform contiguous expansion, but + there is still space available on system. On systems for which + this is known to be useful (i.e. most linux kernels), this occurs + only when programs allocate huge amounts of memory. Between this, + and the fact that mmap regions tend to be limited, the size should + be large, to avoid too many mmap calls and thus avoid running out + of kernel resources. +*/ + +#ifndef MMAP_AS_MORECORE_SIZE +#define MMAP_AS_MORECORE_SIZE (1024 * 1024) +#endif + +/* + Define HAVE_MREMAP to make realloc() use mremap() to re-allocate + large blocks. This is currently only possible on Linux with + kernel versions newer than 1.3.77. +*/ + +#ifndef HAVE_MREMAP +#ifdef linux +#define HAVE_MREMAP 1 +#else +#define HAVE_MREMAP 0 +#endif + +#endif /* HAVE_MMAP */ + + +/* + The system page size. To the extent possible, this malloc manages + memory from the system in page-size units. Note that this value is + cached during initialization into a field of malloc_state. So even + if malloc_getpagesize is a function, it is only called once. + + The following mechanics for getpagesize were adapted from bsd/gnu + getpagesize.h. If none of the system-probes here apply, a value of + 4096 is used, which should be OK: If they don't apply, then using + the actual value probably doesn't impact performance. +*/ + + +#ifndef malloc_getpagesize + +#ifndef LACKS_UNISTD_H +# include +#endif + +# ifdef _SC_PAGESIZE /* some SVR4 systems omit an underscore */ +# ifndef _SC_PAGE_SIZE +# define _SC_PAGE_SIZE _SC_PAGESIZE +# endif +# endif + +# ifdef _SC_PAGE_SIZE +# define malloc_getpagesize sysconf(_SC_PAGE_SIZE) +# else +# if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE) + extern size_t getpagesize(); +# define malloc_getpagesize getpagesize() +# else +# ifdef WIN32 /* use supplied emulation of getpagesize */ +# define malloc_getpagesize getpagesize() +# else +# ifndef LACKS_SYS_PARAM_H +# include +# endif +# ifdef EXEC_PAGESIZE +# define malloc_getpagesize EXEC_PAGESIZE +# else +# ifdef NBPG +# ifndef CLSIZE +# define malloc_getpagesize NBPG +# else +# define malloc_getpagesize (NBPG * CLSIZE) +# endif +# else +# ifdef NBPC +# define malloc_getpagesize NBPC +# else +# ifdef PAGESIZE +# define malloc_getpagesize PAGESIZE +# else /* just guess */ +# define malloc_getpagesize (4096) +# endif +# endif +# endif +# endif +# endif +# endif +# endif +#endif + +/* + This version of malloc supports the standard SVID/XPG mallinfo + routine that returns a struct containing usage properties and + statistics. It should work on any SVID/XPG compliant system that has + a /usr/include/malloc.h defining struct mallinfo. (If you'd like to + install such a thing yourself, cut out the preliminary declarations + as described above and below and save them in a malloc.h file. But + there's no compelling reason to bother to do this.) + + The main declaration needed is the mallinfo struct that is returned + (by-copy) by mallinfo(). The SVID/XPG malloinfo struct contains a + bunch of fields that are not even meaningful in this version of + malloc. These fields are are instead filled by mallinfo() with + other numbers that might be of interest. + + HAVE_USR_INCLUDE_MALLOC_H should be set if you have a + /usr/include/malloc.h file that includes a declaration of struct + mallinfo. If so, it is included; else an SVID2/XPG2 compliant + version is declared below. These must be precisely the same for + mallinfo() to work. The original SVID version of this struct, + defined on most systems with mallinfo, declares all fields as + ints. But some others define as unsigned long. If your system + defines the fields using a type of different width than listed here, + you must #include your system version and #define + HAVE_USR_INCLUDE_MALLOC_H. +*/ + +/* #define HAVE_USR_INCLUDE_MALLOC_H */ + +#ifdef HAVE_USR_INCLUDE_MALLOC_H +#include "/usr/include/malloc.h" +#else + +/* SVID2/XPG mallinfo structure */ + +struct mallinfo { + int arena; /* non-mmapped space allocated from system */ + int ordblks; /* number of free chunks */ + int smblks; /* number of fastbin blocks */ + int hblks; /* number of mmapped regions */ + int hblkhd; /* space in mmapped regions */ + int usmblks; /* maximum total allocated space */ + int fsmblks; /* space available in freed fastbin blocks */ + int uordblks; /* total allocated space */ + int fordblks; /* total free space */ + int keepcost; /* top-most, releasable (via malloc_trim) space */ +}; + +/* + SVID/XPG defines four standard parameter numbers for mallopt, + normally defined in malloc.h. Only one of these (M_MXFAST) is used + in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply, + so setting them has no effect. But this malloc also supports other + options in mallopt described below. +*/ +#endif + + +/* ---------- description of public routines ------------ */ + +/* + malloc(size_t n) + Returns a pointer to a newly allocated chunk of at least n bytes, or null + if no space is available. Additionally, on failure, errno is + set to ENOMEM on ANSI C systems. + + If n is zero, malloc returns a minumum-sized chunk. (The minimum + size is 16 bytes on most 32bit systems, and 24 or 32 bytes on 64bit + systems.) On most systems, size_t is an unsigned type, so calls + with negative arguments are interpreted as requests for huge amounts + of space, which will often fail. The maximum supported value of n + differs across systems, but is in all cases less than the maximum + representable value of a size_t. +*/ +#if __STD_C +Void_t* public_mALLOc(size_t); +#else +Void_t* public_mALLOc(); +#endif + +/* + free(Void_t* p) + Releases the chunk of memory pointed to by p, that had been previously + allocated using malloc or a related routine such as realloc. + It has no effect if p is null. It can have arbitrary (i.e., bad!) + effects if p has already been freed. + + Unless disabled (using mallopt), freeing very large spaces will + when possible, automatically trigger operations that give + back unused memory to the system, thus reducing program footprint. +*/ +#if __STD_C +void public_fREe(Void_t*); +#else +void public_fREe(); +#endif + +/* + calloc(size_t n_elements, size_t element_size); + Returns a pointer to n_elements * element_size bytes, with all locations + set to zero. +*/ +#if __STD_C +Void_t* public_cALLOc(size_t, size_t); +#else +Void_t* public_cALLOc(); +#endif + +/* + realloc(Void_t* p, size_t n) + Returns a pointer to a chunk of size n that contains the same data + as does chunk p up to the minimum of (n, p's size) bytes, or null + if no space is available. + + The returned pointer may or may not be the same as p. The algorithm + prefers extending p when possible, otherwise it employs the + equivalent of a malloc-copy-free sequence. + + If p is null, realloc is equivalent to malloc. + + If space is not available, realloc returns null, errno is set (if on + ANSI) and p is NOT freed. + + if n is for fewer bytes than already held by p, the newly unused + space is lopped off and freed if possible. Unless the #define + REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of + zero (re)allocates a minimum-sized chunk. + + Large chunks that were internally obtained via mmap will always + be reallocated using malloc-copy-free sequences unless + the system supports MREMAP (currently only linux). + + The old unix realloc convention of allowing the last-free'd chunk + to be used as an argument to realloc is not supported. +*/ +#if __STD_C +Void_t* public_rEALLOc(Void_t*, size_t); +#else +Void_t* public_rEALLOc(); +#endif + +/* + memalign(size_t alignment, size_t n); + Returns a pointer to a newly allocated chunk of n bytes, aligned + in accord with the alignment argument. + + The alignment argument should be a power of two. If the argument is + not a power of two, the nearest greater power is used. + 8-byte alignment is guaranteed by normal malloc calls, so don't + bother calling memalign with an argument of 8 or less. + + Overreliance on memalign is a sure way to fragment space. +*/ +#if __STD_C +Void_t* public_mEMALIGn(size_t, size_t); +#else +Void_t* public_mEMALIGn(); +#endif + +/* + valloc(size_t n); + Equivalent to memalign(pagesize, n), where pagesize is the page + size of the system. If the pagesize is unknown, 4096 is used. +*/ +#if __STD_C +Void_t* public_vALLOc(size_t); +#else +Void_t* public_vALLOc(); +#endif + + + +/* + mallopt(int parameter_number, int parameter_value) + Sets tunable parameters The format is to provide a + (parameter-number, parameter-value) pair. mallopt then sets the + corresponding parameter to the argument value if it can (i.e., so + long as the value is meaningful), and returns 1 if successful else + 0. SVID/XPG/ANSI defines four standard param numbers for mallopt, + normally defined in malloc.h. Only one of these (M_MXFAST) is used + in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply, + so setting them has no effect. But this malloc also supports four + other options in mallopt. See below for details. Briefly, supported + parameters are as follows (listed defaults are for "typical" + configurations). + + Symbol param # default allowed param values + M_MXFAST 1 64 0-80 (0 disables fastbins) + M_TRIM_THRESHOLD -1 256*1024 any (-1U disables trimming) + M_TOP_PAD -2 0 any + M_MMAP_THRESHOLD -3 256*1024 any (or 0 if no MMAP support) + M_MMAP_MAX -4 65536 any (0 disables use of mmap) +*/ +#if __STD_C +int public_mALLOPt(int, int); +#else +int public_mALLOPt(); +#endif + + +/* + mallinfo() + Returns (by copy) a struct containing various summary statistics: + + arena: current total non-mmapped bytes allocated from system + ordblks: the number of free chunks + smblks: the number of fastbin blocks (i.e., small chunks that + have been freed but not use resused or consolidated) + hblks: current number of mmapped regions + hblkhd: total bytes held in mmapped regions + usmblks: the maximum total allocated space. This will be greater + than current total if trimming has occurred. + fsmblks: total bytes held in fastbin blocks + uordblks: current total allocated space (normal or mmapped) + fordblks: total free space + keepcost: the maximum number of bytes that could ideally be released + back to system via malloc_trim. ("ideally" means that + it ignores page restrictions etc.) + + Because these fields are ints, but internal bookkeeping may + be kept as longs, the reported values may wrap around zero and + thus be inaccurate. +*/ +#if __STD_C +struct mallinfo public_mALLINFo(void); +#else +struct mallinfo public_mALLINFo(); +#endif + +/* + independent_calloc(size_t n_elements, size_t element_size, Void_t* chunks[]); + + independent_calloc is similar to calloc, but instead of returning a + single cleared space, it returns an array of pointers to n_elements + independent elements that can hold contents of size elem_size, each + of which starts out cleared, and can be independently freed, + realloc'ed etc. The elements are guaranteed to be adjacently + allocated (this is not guaranteed to occur with multiple callocs or + mallocs), which may also improve cache locality in some + applications. + + The "chunks" argument is optional (i.e., may be null, which is + probably the most typical usage). If it is null, the returned array + is itself dynamically allocated and should also be freed when it is + no longer needed. Otherwise, the chunks array must be of at least + n_elements in length. It is filled in with the pointers to the + chunks. + + In either case, independent_calloc returns this pointer array, or + null if the allocation failed. If n_elements is zero and "chunks" + is null, it returns a chunk representing an array with zero elements + (which should be freed if not wanted). + + Each element must be individually freed when it is no longer + needed. If you'd like to instead be able to free all at once, you + should instead use regular calloc and assign pointers into this + space to represent elements. (In this case though, you cannot + independently free elements.) + + independent_calloc simplifies and speeds up implementations of many + kinds of pools. It may also be useful when constructing large data + structures that initially have a fixed number of fixed-sized nodes, + but the number is not known at compile time, and some of the nodes + may later need to be freed. For example: + + struct Node { int item; struct Node* next; }; + + struct Node* build_list() { + struct Node** pool; + int n = read_number_of_nodes_needed(); + if (n <= 0) return 0; + pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0); + if (pool == 0) die(); + // organize into a linked list... + struct Node* first = pool[0]; + for (i = 0; i < n-1; ++i) + pool[i]->next = pool[i+1]; + free(pool); // Can now free the array (or not, if it is needed later) + return first; + } +*/ +#if __STD_C +Void_t** public_iCALLOc(size_t, size_t, Void_t**); +#else +Void_t** public_iCALLOc(); +#endif + +/* + independent_comalloc(size_t n_elements, size_t sizes[], Void_t* chunks[]); + + independent_comalloc allocates, all at once, a set of n_elements + chunks with sizes indicated in the "sizes" array. It returns + an array of pointers to these elements, each of which can be + independently freed, realloc'ed etc. The elements are guaranteed to + be adjacently allocated (this is not guaranteed to occur with + multiple callocs or mallocs), which may also improve cache locality + in some applications. + + The "chunks" argument is optional (i.e., may be null). If it is null + the returned array is itself dynamically allocated and should also + be freed when it is no longer needed. Otherwise, the chunks array + must be of at least n_elements in length. It is filled in with the + pointers to the chunks. + + In either case, independent_comalloc returns this pointer array, or + null if the allocation failed. If n_elements is zero and chunks is + null, it returns a chunk representing an array with zero elements + (which should be freed if not wanted). + + Each element must be individually freed when it is no longer + needed. If you'd like to instead be able to free all at once, you + should instead use a single regular malloc, and assign pointers at + particular offsets in the aggregate space. (In this case though, you + cannot independently free elements.) + + independent_comallac differs from independent_calloc in that each + element may have a different size, and also that it does not + automatically clear elements. + + independent_comalloc can be used to speed up allocation in cases + where several structs or objects must always be allocated at the + same time. For example: + + struct Head { ... } + struct Foot { ... } + + void send_message(char* msg) { + int msglen = strlen(msg); + size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) }; + void* chunks[3]; + if (independent_comalloc(3, sizes, chunks) == 0) + die(); + struct Head* head = (struct Head*)(chunks[0]); + char* body = (char*)(chunks[1]); + struct Foot* foot = (struct Foot*)(chunks[2]); + // ... + } + + In general though, independent_comalloc is worth using only for + larger values of n_elements. For small values, you probably won't + detect enough difference from series of malloc calls to bother. + + Overuse of independent_comalloc can increase overall memory usage, + since it cannot reuse existing noncontiguous small chunks that + might be available for some of the elements. +*/ +#if __STD_C +Void_t** public_iCOMALLOc(size_t, size_t*, Void_t**); +#else +Void_t** public_iCOMALLOc(); +#endif + + +/* + pvalloc(size_t n); + Equivalent to valloc(minimum-page-that-holds(n)), that is, + round up n to nearest pagesize. + */ +#if __STD_C +Void_t* public_pVALLOc(size_t); +#else +Void_t* public_pVALLOc(); +#endif + +/* + cfree(Void_t* p); + Equivalent to free(p). + + cfree is needed/defined on some systems that pair it with calloc, + for odd historical reasons (such as: cfree is used in example + code in the first edition of K&R). +*/ +#if __STD_C +void public_cFREe(Void_t*); +#else +void public_cFREe(); +#endif + +/* + malloc_trim(size_t pad); + + If possible, gives memory back to the system (via negative + arguments to sbrk) if there is unused memory at the `high' end of + the malloc pool. You can call this after freeing large blocks of + memory to potentially reduce the system-level memory requirements + of a program. However, it cannot guarantee to reduce memory. Under + some allocation patterns, some large free blocks of memory will be + locked between two used chunks, so they cannot be given back to + the system. + + The `pad' argument to malloc_trim represents the amount of free + trailing space to leave untrimmed. If this argument is zero, + only the minimum amount of memory to maintain internal data + structures will be left (one page or less). Non-zero arguments + can be supplied to maintain enough trailing space to service + future expected allocations without having to re-obtain memory + from the system. + + Malloc_trim returns 1 if it actually released any memory, else 0. + On systems that do not support "negative sbrks", it will always + rreturn 0. +*/ +#if __STD_C +int public_mTRIm(size_t); +#else +int public_mTRIm(); +#endif + +/* + malloc_usable_size(Void_t* p); + + Returns the number of bytes you can actually use in + an allocated chunk, which may be more than you requested (although + often not) due to alignment and minimum size constraints. + You can use this many bytes without worrying about + overwriting other allocated objects. This is not a particularly great + programming practice. malloc_usable_size can be more useful in + debugging and assertions, for example: + + p = malloc(n); + assert(malloc_usable_size(p) >= 256); + +*/ +#if __STD_C +size_t public_mUSABLe(Void_t*); +#else +size_t public_mUSABLe(); +#endif + +/* + malloc_stats(); + Prints on stderr the amount of space obtained from the system (both + via sbrk and mmap), the maximum amount (which may be more than + current if malloc_trim and/or munmap got called), and the current + number of bytes allocated via malloc (or realloc, etc) but not yet + freed. Note that this is the number of bytes allocated, not the + number requested. It will be larger than the number requested + because of alignment and bookkeeping overhead. Because it includes + alignment wastage as being in use, this figure may be greater than + zero even when no user-level chunks are allocated. + + The reported current and maximum system memory can be inaccurate if + a program makes other calls to system memory allocation functions + (normally sbrk) outside of malloc. + + malloc_stats prints only the most commonly interesting statistics. + More information can be obtained by calling mallinfo. + +*/ +#if __STD_C +void public_mSTATs(); +#else +void public_mSTATs(); +#endif + +/* mallopt tuning options */ + +/* + M_MXFAST is the maximum request size used for "fastbins", special bins + that hold returned chunks without consolidating their spaces. This + enables future requests for chunks of the same size to be handled + very quickly, but can increase fragmentation, and thus increase the + overall memory footprint of a program. + + This malloc manages fastbins very conservatively yet still + efficiently, so fragmentation is rarely a problem for values less + than or equal to the default. The maximum supported value of MXFAST + is 80. You wouldn't want it any higher than this anyway. Fastbins + are designed especially for use with many small structs, objects or + strings -- the default handles structs/objects/arrays with sizes up + to 16 4byte fields, or small strings representing words, tokens, + etc. Using fastbins for larger objects normally worsens + fragmentation without improving speed. + + M_MXFAST is set in REQUEST size units. It is internally used in + chunksize units, which adds padding and alignment. You can reduce + M_MXFAST to 0 to disable all use of fastbins. This causes the malloc + algorithm to be a closer approximation of fifo-best-fit in all cases, + not just for larger requests, but will generally cause it to be + slower. +*/ + + +/* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */ +#ifndef M_MXFAST +#define M_MXFAST 1 +#endif + +#ifndef DEFAULT_MXFAST +#define DEFAULT_MXFAST 64 +#endif + + +/* + M_TRIM_THRESHOLD is the maximum amount of unused top-most memory + to keep before releasing via malloc_trim in free(). + + Automatic trimming is mainly useful in long-lived programs. + Because trimming via sbrk can be slow on some systems, and can + sometimes be wasteful (in cases where programs immediately + afterward allocate more large chunks) the value should be high + enough so that your overall system performance would improve by + releasing this much memory. + + The trim threshold and the mmap control parameters (see below) + can be traded off with one another. Trimming and mmapping are + two different ways of releasing unused memory back to the + system. Between these two, it is often possible to keep + system-level demands of a long-lived program down to a bare + minimum. For example, in one test suite of sessions measuring + the XF86 X server on Linux, using a trim threshold of 128K and a + mmap threshold of 192K led to near-minimal long term resource + consumption. + + If you are using this malloc in a long-lived program, it should + pay to experiment with these values. As a rough guide, you + might set to a value close to the average size of a process + (program) running on your system. Releasing this much memory + would allow such a process to run in memory. Generally, it's + worth it to tune for trimming rather tham memory mapping when a + program undergoes phases where several large chunks are + allocated and released in ways that can reuse each other's + storage, perhaps mixed with phases where there are no such + chunks at all. And in well-behaved long-lived programs, + controlling release of large blocks via trimming versus mapping + is usually faster. + + However, in most programs, these parameters serve mainly as + protection against the system-level effects of carrying around + massive amounts of unneeded memory. Since frequent calls to + sbrk, mmap, and munmap otherwise degrade performance, the default + parameters are set to relatively high values that serve only as + safeguards. + + The trim value must be greater than page size to have any useful + effect. To disable trimming completely, you can set to + (unsigned long)(-1) + + Trim settings interact with fastbin (MXFAST) settings: Unless + TRIM_FASTBINS is defined, automatic trimming never takes place upon + freeing a chunk with size less than or equal to MXFAST. Trimming is + instead delayed until subsequent freeing of larger chunks. However, + you can still force an attempted trim by calling malloc_trim. + + Also, trimming is not generally possible in cases where + the main arena is obtained via mmap. + + Note that the trick some people use of mallocing a huge space and + then freeing it at program startup, in an attempt to reserve system + memory, doesn't have the intended effect under automatic trimming, + since that memory will immediately be returned to the system. +*/ + +#define M_TRIM_THRESHOLD -1 + +#ifndef DEFAULT_TRIM_THRESHOLD +#define DEFAULT_TRIM_THRESHOLD (256 * 1024) +#endif + +/* + M_TOP_PAD is the amount of extra `padding' space to allocate or + retain whenever sbrk is called. It is used in two ways internally: + + * When sbrk is called to extend the top of the arena to satisfy + a new malloc request, this much padding is added to the sbrk + request. + + * When malloc_trim is called automatically from free(), + it is used as the `pad' argument. + + In both cases, the actual amount of padding is rounded + so that the end of the arena is always a system page boundary. + + The main reason for using padding is to avoid calling sbrk so + often. Having even a small pad greatly reduces the likelihood + that nearly every malloc request during program start-up (or + after trimming) will invoke sbrk, which needlessly wastes + time. + + Automatic rounding-up to page-size units is normally sufficient + to avoid measurable overhead, so the default is 0. However, in + systems where sbrk is relatively slow, it can pay to increase + this value, at the expense of carrying around more memory than + the program needs. +*/ + +#define M_TOP_PAD -2 + +#ifndef DEFAULT_TOP_PAD +#define DEFAULT_TOP_PAD (0) +#endif + +/* + M_MMAP_THRESHOLD is the request size threshold for using mmap() + to service a request. Requests of at least this size that cannot + be allocated using already-existing space will be serviced via mmap. + (If enough normal freed space already exists it is used instead.) + + Using mmap segregates relatively large chunks of memory so that + they can be individually obtained and released from the host + system. A request serviced through mmap is never reused by any + other request (at least not directly; the system may just so + happen to remap successive requests to the same locations). + + Segregating space in this way has the benefits that: + + 1. Mmapped space can ALWAYS be individually released back + to the system, which helps keep the system level memory + demands of a long-lived program low. + 2. Mapped memory can never become `locked' between + other chunks, as can happen with normally allocated chunks, which + means that even trimming via malloc_trim would not release them. + 3. On some systems with "holes" in address spaces, mmap can obtain + memory that sbrk cannot. + + However, it has the disadvantages that: + + 1. The space cannot be reclaimed, consolidated, and then + used to service later requests, as happens with normal chunks. + 2. It can lead to more wastage because of mmap page alignment + requirements + 3. It causes malloc performance to be more dependent on host + system memory management support routines which may vary in + implementation quality and may impose arbitrary + limitations. Generally, servicing a request via normal + malloc steps is faster than going through a system's mmap. + + The advantages of mmap nearly always outweigh disadvantages for + "large" chunks, but the value of "large" varies across systems. The + default is an empirically derived value that works well in most + systems. +*/ + +#define M_MMAP_THRESHOLD -3 + +#ifndef DEFAULT_MMAP_THRESHOLD +#define DEFAULT_MMAP_THRESHOLD (256 * 1024) +#endif + +/* + M_MMAP_MAX is the maximum number of requests to simultaneously + service using mmap. This parameter exists because +. Some systems have a limited number of internal tables for + use by mmap, and using more than a few of them may degrade + performance. + + The default is set to a value that serves only as a safeguard. + Setting to 0 disables use of mmap for servicing large requests. If + HAVE_MMAP is not set, the default value is 0, and attempts to set it + to non-zero values in mallopt will fail. +*/ + +#define M_MMAP_MAX -4 + +#ifndef DEFAULT_MMAP_MAX +#if HAVE_MMAP +#define DEFAULT_MMAP_MAX (65536) +#else +#define DEFAULT_MMAP_MAX (0) +#endif +#endif + +#ifdef __cplusplus +}; /* end of extern "C" */ +#endif + + +/* RN XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ +#endif + +/* + ======================================================================== + To make a fully customizable malloc.h header file, cut everything + above this line, put into file malloc.h, edit to suit, and #include it + on the next line, as well as in programs that use this malloc. + ======================================================================== +*/ + +/* #include "malloc.h" */ + +/* --------------------- public wrappers ---------------------- */ + +#ifdef USE_PUBLIC_MALLOC_WRAPPERS + +/* Declare all routines as internal */ +#if __STD_C +static Void_t* mALLOc(size_t); +static void fREe(Void_t*); +static Void_t* rEALLOc(Void_t*, size_t); +static Void_t* mEMALIGn(size_t, size_t); +static Void_t* vALLOc(size_t); +static Void_t* pVALLOc(size_t); +static Void_t* cALLOc(size_t, size_t); +static Void_t** iCALLOc(size_t, size_t, Void_t**); +static Void_t** iCOMALLOc(size_t, size_t*, Void_t**); +static void cFREe(Void_t*); +static int mTRIm(size_t); +static size_t mUSABLe(Void_t*); +static void mSTATs(); +static int mALLOPt(int, int); +static struct mallinfo mALLINFo(void); +#else +static Void_t* mALLOc(); +static void fREe(); +static Void_t* rEALLOc(); +static Void_t* mEMALIGn(); +static Void_t* vALLOc(); +static Void_t* pVALLOc(); +static Void_t* cALLOc(); +static Void_t** iCALLOc(); +static Void_t** iCOMALLOc(); +static void cFREe(); +static int mTRIm(); +static size_t mUSABLe(); +static void mSTATs(); +static int mALLOPt(); +static struct mallinfo mALLINFo(); +#endif + +/* + MALLOC_PREACTION and MALLOC_POSTACTION should be + defined to return 0 on success, and nonzero on failure. + The return value of MALLOC_POSTACTION is currently ignored + in wrapper functions since there is no reasonable default + action to take on failure. +*/ + + +#ifdef USE_MALLOC_LOCK + +#ifdef WIN32 + +static int mALLOC_MUTEx; +#define MALLOC_PREACTION slwait(&mALLOC_MUTEx) +#define MALLOC_POSTACTION slrelease(&mALLOC_MUTEx) + +#else + +#include + +static pthread_mutex_t mALLOC_MUTEx = PTHREAD_MUTEX_INITIALIZER; + +#define MALLOC_PREACTION pthread_mutex_lock(&mALLOC_MUTEx) +#define MALLOC_POSTACTION pthread_mutex_unlock(&mALLOC_MUTEx) + +#endif /* USE_MALLOC_LOCK */ + +#else + +/* Substitute anything you like for these */ + +#define MALLOC_PREACTION (0) +#define MALLOC_POSTACTION (0) + +#endif + +Void_t* public_mALLOc(size_t bytes) { + Void_t* m; + if (MALLOC_PREACTION != 0) { + return 0; + } + m = mALLOc(bytes); + if (MALLOC_POSTACTION != 0) { + } + return m; +} + +void public_fREe(Void_t* m) { + if (MALLOC_PREACTION != 0) { + return; + } + fREe(m); + if (MALLOC_POSTACTION != 0) { + } +} + +Void_t* public_rEALLOc(Void_t* m, size_t bytes) { + if (MALLOC_PREACTION != 0) { + return 0; + } + m = rEALLOc(m, bytes); + if (MALLOC_POSTACTION != 0) { + } + return m; +} + +Void_t* public_mEMALIGn(size_t alignment, size_t bytes) { + Void_t* m; + if (MALLOC_PREACTION != 0) { + return 0; + } + m = mEMALIGn(alignment, bytes); + if (MALLOC_POSTACTION != 0) { + } + return m; +} + +Void_t* public_vALLOc(size_t bytes) { + Void_t* m; + if (MALLOC_PREACTION != 0) { + return 0; + } + m = vALLOc(bytes); + if (MALLOC_POSTACTION != 0) { + } + return m; +} + +Void_t* public_pVALLOc(size_t bytes) { + Void_t* m; + if (MALLOC_PREACTION != 0) { + return 0; + } + m = pVALLOc(bytes); + if (MALLOC_POSTACTION != 0) { + } + return m; +} + +Void_t* public_cALLOc(size_t n, size_t elem_size) { + Void_t* m; + if (MALLOC_PREACTION != 0) { + return 0; + } + m = cALLOc(n, elem_size); + if (MALLOC_POSTACTION != 0) { + } + return m; +} + + +Void_t** public_iCALLOc(size_t n, size_t elem_size, Void_t** chunks) { + Void_t** m; + if (MALLOC_PREACTION != 0) { + return 0; + } + m = iCALLOc(n, elem_size, chunks); + if (MALLOC_POSTACTION != 0) { + } + return m; +} + +Void_t** public_iCOMALLOc(size_t n, size_t sizes[], Void_t** chunks) { + Void_t** m; + if (MALLOC_PREACTION != 0) { + return 0; + } + m = iCOMALLOc(n, sizes, chunks); + if (MALLOC_POSTACTION != 0) { + } + return m; +} + +void public_cFREe(Void_t* m) { + if (MALLOC_PREACTION != 0) { + return; + } + cFREe(m); + if (MALLOC_POSTACTION != 0) { + } +} + +int public_mTRIm(size_t s) { + int result; + if (MALLOC_PREACTION != 0) { + return 0; + } + result = mTRIm(s); + if (MALLOC_POSTACTION != 0) { + } + return result; +} + +size_t public_mUSABLe(Void_t* m) { + size_t result; + if (MALLOC_PREACTION != 0) { + return 0; + } + result = mUSABLe(m); + if (MALLOC_POSTACTION != 0) { + } + return result; +} + +void public_mSTATs() { + if (MALLOC_PREACTION != 0) { + return; + } + mSTATs(); + if (MALLOC_POSTACTION != 0) { + } +} + +struct mallinfo public_mALLINFo() { + struct mallinfo m; + if (MALLOC_PREACTION != 0) { + struct mallinfo nm = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + return nm; + } + m = mALLINFo(); + if (MALLOC_POSTACTION != 0) { + } + return m; +} + +int public_mALLOPt(int p, int v) { + int result; + if (MALLOC_PREACTION != 0) { + return 0; + } + result = mALLOPt(p, v); + if (MALLOC_POSTACTION != 0) { + } + return result; +} + +#endif + + + +/* ------------- Optional versions of memcopy ---------------- */ + + +#if USE_MEMCPY + +/* + Note: memcpy is ONLY invoked with non-overlapping regions, + so the (usually slower) memmove is not needed. +*/ + +#define MALLOC_COPY(dest, src, nbytes) memcpy(dest, src, nbytes) +#define MALLOC_ZERO(dest, nbytes) memset(dest, 0, nbytes) + +#else /* !USE_MEMCPY */ + +/* Use Duff's device for good zeroing/copying performance. */ + +#define MALLOC_ZERO(charp, nbytes) \ +do { \ + INTERNAL_SIZE_T* mzp = (INTERNAL_SIZE_T*)(charp); \ + CHUNK_SIZE_T mctmp = (nbytes)/sizeof(INTERNAL_SIZE_T); \ + long mcn; \ + if (mctmp < 8) mcn = 0; else { mcn = (mctmp-1)/8; mctmp %= 8; } \ + switch (mctmp) { \ + case 0: for(;;) { *mzp++ = 0; \ + case 7: *mzp++ = 0; \ + case 6: *mzp++ = 0; \ + case 5: *mzp++ = 0; \ + case 4: *mzp++ = 0; \ + case 3: *mzp++ = 0; \ + case 2: *mzp++ = 0; \ + case 1: *mzp++ = 0; if(mcn <= 0) break; mcn--; } \ + } \ +} while(0) + +#define MALLOC_COPY(dest,src,nbytes) \ +do { \ + INTERNAL_SIZE_T* mcsrc = (INTERNAL_SIZE_T*) src; \ + INTERNAL_SIZE_T* mcdst = (INTERNAL_SIZE_T*) dest; \ + CHUNK_SIZE_T mctmp = (nbytes)/sizeof(INTERNAL_SIZE_T); \ + long mcn; \ + if (mctmp < 8) mcn = 0; else { mcn = (mctmp-1)/8; mctmp %= 8; } \ + switch (mctmp) { \ + case 0: for(;;) { *mcdst++ = *mcsrc++; \ + case 7: *mcdst++ = *mcsrc++; \ + case 6: *mcdst++ = *mcsrc++; \ + case 5: *mcdst++ = *mcsrc++; \ + case 4: *mcdst++ = *mcsrc++; \ + case 3: *mcdst++ = *mcsrc++; \ + case 2: *mcdst++ = *mcsrc++; \ + case 1: *mcdst++ = *mcsrc++; if(mcn <= 0) break; mcn--; } \ + } \ +} while(0) + +#endif + +/* ------------------ MMAP support ------------------ */ + + +#if HAVE_MMAP + +#ifndef LACKS_FCNTL_H +#include +#endif + +#ifndef LACKS_SYS_MMAN_H +#include +#endif + +#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) +#define MAP_ANONYMOUS MAP_ANON +#endif + +/* + Nearly all versions of mmap support MAP_ANONYMOUS, + so the following is unlikely to be needed, but is + supplied just in case. +*/ + +#ifndef MAP_ANONYMOUS + +static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */ + +#define MMAP(addr, size, prot, flags) ((dev_zero_fd < 0) ? \ + (dev_zero_fd = open("/dev/zero", O_RDWR), \ + mmap((addr), (size), (prot), (flags), dev_zero_fd, 0)) : \ + mmap((addr), (size), (prot), (flags), dev_zero_fd, 0)) + +#else + +#define MMAP(addr, size, prot, flags) \ + (mmap((addr), (size), (prot), (flags)|MAP_ANONYMOUS, -1, 0)) + +#endif + + +#endif /* HAVE_MMAP */ + + +/* + ----------------------- Chunk representations ----------------------- +*/ + + +/* + This struct declaration is misleading (but accurate and necessary). + It declares a "view" into memory allowing access to necessary + fields at known offsets from a given base. See explanation below. +*/ + +struct malloc_chunk { + + INTERNAL_SIZE_T prev_size; /* Size of previous chunk (if free). */ + INTERNAL_SIZE_T size; /* Size in bytes, including overhead. */ + + struct malloc_chunk* fd; /* double links -- used only if free. */ + struct malloc_chunk* bk; +}; + + +typedef struct malloc_chunk* mchunkptr; + +/* + malloc_chunk details: + + (The following includes lightly edited explanations by Colin Plumb.) + + Chunks of memory are maintained using a `boundary tag' method as + described in e.g., Knuth or Standish. (See the paper by Paul + Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a + survey of such techniques.) Sizes of free chunks are stored both + in the front of each chunk and at the end. This makes + consolidating fragmented chunks into bigger chunks very fast. The + size fields also hold bits representing whether chunks are free or + in use. + + An allocated chunk looks like this: + + + chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of previous chunk, if allocated | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of chunk, in bytes |P| + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | User data starts here... . + . . + . (malloc_usable_space() bytes) . + . | +nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of chunk | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + + Where "chunk" is the front of the chunk for the purpose of most of + the malloc code, but "mem" is the pointer that is returned to the + user. "Nextchunk" is the beginning of the next contiguous chunk. + + Chunks always begin on even word boundries, so the mem portion + (which is returned to the user) is also on an even word boundary, and + thus at least double-word aligned. + + Free chunks are stored in circular doubly-linked lists, and look like this: + + chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of previous chunk | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + `head:' | Size of chunk, in bytes |P| + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Forward pointer to next chunk in list | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Back pointer to previous chunk in list | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Unused space (may be 0 bytes long) . + . . + . | +nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + `foot:' | Size of chunk, in bytes | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + The P (PREV_INUSE) bit, stored in the unused low-order bit of the + chunk size (which is always a multiple of two words), is an in-use + bit for the *previous* chunk. If that bit is *clear*, then the + word before the current chunk size contains the previous chunk + size, and can be used to find the front of the previous chunk. + The very first chunk allocated always has this bit set, + preventing access to non-existent (or non-owned) memory. If + prev_inuse is set for any given chunk, then you CANNOT determine + the size of the previous chunk, and might even get a memory + addressing fault when trying to do so. + + Note that the `foot' of the current chunk is actually represented + as the prev_size of the NEXT chunk. This makes it easier to + deal with alignments etc but can be very confusing when trying + to extend or adapt this code. + + The two exceptions to all this are + + 1. The special chunk `top' doesn't bother using the + trailing size field since there is no next contiguous chunk + that would have to index off it. After initialization, `top' + is forced to always exist. If it would become less than + MINSIZE bytes long, it is replenished. + + 2. Chunks allocated via mmap, which have the second-lowest-order + bit (IS_MMAPPED) set in their size fields. Because they are + allocated one-by-one, each must contain its own trailing size field. + +*/ + +/* + ---------- Size and alignment checks and conversions ---------- +*/ + +/* conversion from malloc headers to user pointers, and back */ + +#define chunk2mem(p) ((Void_t*)((char*)(p) + 2*SIZE_SZ)) +#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ)) + +/* The smallest possible chunk */ +#define MIN_CHUNK_SIZE (sizeof(struct malloc_chunk)) + +/* The smallest size we can malloc is an aligned minimal chunk */ + +#define MINSIZE \ + (CHUNK_SIZE_T)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)) + +/* Check if m has acceptable alignment */ + +#define aligned_OK(m) (((PTR_UINT)((m)) & (MALLOC_ALIGN_MASK)) == 0) + + +/* + Check if a request is so large that it would wrap around zero when + padded and aligned. To simplify some other code, the bound is made + low enough so that adding MINSIZE will also not wrap around sero. +*/ + +#define REQUEST_OUT_OF_RANGE(req) \ + ((CHUNK_SIZE_T)(req) >= \ + (CHUNK_SIZE_T)(INTERNAL_SIZE_T)(-2 * MINSIZE)) + +/* pad request bytes into a usable size -- internal version */ + +#define request2size(req) \ + (((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE) ? \ + MINSIZE : \ + ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK) + +/* Same, except also perform argument check */ + +#define checked_request2size(req, sz) \ + if (REQUEST_OUT_OF_RANGE(req)) { \ + MALLOC_FAILURE_ACTION; \ + return 0; \ + } \ + (sz) = request2size(req); + +/* + --------------- Physical chunk operations --------------- +*/ + + +/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */ +#define PREV_INUSE 0x1 + +/* extract inuse bit of previous chunk */ +#define prev_inuse(p) ((p)->size & PREV_INUSE) + + +/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */ +#define IS_MMAPPED 0x2 + +/* check for mmap()'ed chunk */ +#define chunk_is_mmapped(p) ((p)->size & IS_MMAPPED) + +/* + Bits to mask off when extracting size + + Note: IS_MMAPPED is intentionally not masked off from size field in + macros for which mmapped chunks should never be seen. This should + cause helpful core dumps to occur if it is tried by accident by + people extending or adapting this malloc. +*/ +#define SIZE_BITS (PREV_INUSE|IS_MMAPPED) + +/* Get size, ignoring use bits */ +#define chunksize(p) ((p)->size & ~(SIZE_BITS)) + + +/* Ptr to next physical malloc_chunk. */ +#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->size & ~PREV_INUSE) )) + +/* Ptr to previous physical malloc_chunk */ +#define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_size) )) + +/* Treat space at ptr + offset as a chunk */ +#define chunk_at_offset(p, s) ((mchunkptr)(((char*)(p)) + (s))) + +/* extract p's inuse bit */ +#define inuse(p)\ +((((mchunkptr)(((char*)(p))+((p)->size & ~PREV_INUSE)))->size) & PREV_INUSE) + +/* set/clear chunk as being inuse without otherwise disturbing */ +#define set_inuse(p)\ +((mchunkptr)(((char*)(p)) + ((p)->size & ~PREV_INUSE)))->size |= PREV_INUSE + +#define clear_inuse(p)\ +((mchunkptr)(((char*)(p)) + ((p)->size & ~PREV_INUSE)))->size &= ~(PREV_INUSE) + + +/* check/set/clear inuse bits in known places */ +#define inuse_bit_at_offset(p, s)\ + (((mchunkptr)(((char*)(p)) + (s)))->size & PREV_INUSE) + +#define set_inuse_bit_at_offset(p, s)\ + (((mchunkptr)(((char*)(p)) + (s)))->size |= PREV_INUSE) + +#define clear_inuse_bit_at_offset(p, s)\ + (((mchunkptr)(((char*)(p)) + (s)))->size &= ~(PREV_INUSE)) + + +/* Set size at head, without disturbing its use bit */ +#define set_head_size(p, s) ((p)->size = (((p)->size & PREV_INUSE) | (s))) + +/* Set size/use field */ +#define set_head(p, s) ((p)->size = (s)) + +/* Set size at footer (only when chunk is not in use) */ +#define set_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_size = (s)) + + +/* + -------------------- Internal data structures -------------------- + + All internal state is held in an instance of malloc_state defined + below. There are no other static variables, except in two optional + cases: + * If USE_MALLOC_LOCK is defined, the mALLOC_MUTEx declared above. + * If HAVE_MMAP is true, but mmap doesn't support + MAP_ANONYMOUS, a dummy file descriptor for mmap. + + Beware of lots of tricks that minimize the total bookkeeping space + requirements. The result is a little over 1K bytes (for 4byte + pointers and size_t.) +*/ + +/* + Bins + + An array of bin headers for free chunks. Each bin is doubly + linked. The bins are approximately proportionally (log) spaced. + There are a lot of these bins (128). This may look excessive, but + works very well in practice. Most bins hold sizes that are + unusual as malloc request sizes, but are more usual for fragments + and consolidated sets of chunks, which is what these bins hold, so + they can be found quickly. All procedures maintain the invariant + that no consolidated chunk physically borders another one, so each + chunk in a list is known to be preceeded and followed by either + inuse chunks or the ends of memory. + + Chunks in bins are kept in size order, with ties going to the + approximately least recently used chunk. Ordering isn't needed + for the small bins, which all contain the same-sized chunks, but + facilitates best-fit allocation for larger chunks. These lists + are just sequential. Keeping them in order almost never requires + enough traversal to warrant using fancier ordered data + structures. + + Chunks of the same size are linked with the most + recently freed at the front, and allocations are taken from the + back. This results in LRU (FIFO) allocation order, which tends + to give each chunk an equal opportunity to be consolidated with + adjacent freed chunks, resulting in larger free chunks and less + fragmentation. + + To simplify use in double-linked lists, each bin header acts + as a malloc_chunk. This avoids special-casing for headers. + But to conserve space and improve locality, we allocate + only the fd/bk pointers of bins, and then use repositioning tricks + to treat these as the fields of a malloc_chunk*. +*/ + +typedef struct malloc_chunk* mbinptr; + +/* addressing -- note that bin_at(0) does not exist */ +#define bin_at(m, i) ((mbinptr)((char*)&((m)->bins[(i)<<1]) - (SIZE_SZ<<1))) + +/* analog of ++bin */ +#define next_bin(b) ((mbinptr)((char*)(b) + (sizeof(mchunkptr)<<1))) + +/* Reminders about list directionality within bins */ +#define first(b) ((b)->fd) +#define last(b) ((b)->bk) + +/* Take a chunk off a bin list */ +#define unlink(P, BK, FD) { \ + FD = P->fd; \ + BK = P->bk; \ + FD->bk = BK; \ + BK->fd = FD; \ +} + +/* + Indexing + + Bins for sizes < 512 bytes contain chunks of all the same size, spaced + 8 bytes apart. Larger bins are approximately logarithmically spaced: + + 64 bins of size 8 + 32 bins of size 64 + 16 bins of size 512 + 8 bins of size 4096 + 4 bins of size 32768 + 2 bins of size 262144 + 1 bin of size what's left + + The bins top out around 1MB because we expect to service large + requests via mmap. +*/ + +#define NBINS 96 +#define NSMALLBINS 32 +#define SMALLBIN_WIDTH 8 +#define MIN_LARGE_SIZE 256 + +#define in_smallbin_range(sz) \ + ((CHUNK_SIZE_T)(sz) < (CHUNK_SIZE_T)MIN_LARGE_SIZE) + +#define smallbin_index(sz) (((unsigned)(sz)) >> 3) + +/* + Compute index for size. We expect this to be inlined when + compiled with optimization, else not, which works out well. +*/ +static int largebin_index(unsigned int sz) { + unsigned int x = sz >> SMALLBIN_WIDTH; + unsigned int m; /* bit position of highest set bit of m */ + + if (x >= 0x10000) return NBINS-1; + + /* On intel, use BSRL instruction to find highest bit */ +#if defined(__GNUC__) && defined(i386) + + __asm__("bsrl %1,%0\n\t" + : "=r" (m) + : "g" (x)); + +#else + { + /* + Based on branch-free nlz algorithm in chapter 5 of Henry + S. Warren Jr's book "Hacker's Delight". + */ + + unsigned int n = ((x - 0x100) >> 16) & 8; + x <<= n; + m = ((x - 0x1000) >> 16) & 4; + n += m; + x <<= m; + m = ((x - 0x4000) >> 16) & 2; + n += m; + x = (x << m) >> 14; + m = 13 - n + (x & ~(x>>1)); + } +#endif + + /* Use next 2 bits to create finer-granularity bins */ + return NSMALLBINS + (m << 2) + ((sz >> (m + 6)) & 3); +} + +#define bin_index(sz) \ + ((in_smallbin_range(sz)) ? smallbin_index(sz) : largebin_index(sz)) + +/* + FIRST_SORTED_BIN_SIZE is the chunk size corresponding to the + first bin that is maintained in sorted order. This must + be the smallest size corresponding to a given bin. + + Normally, this should be MIN_LARGE_SIZE. But you can weaken + best fit guarantees to sometimes speed up malloc by increasing value. + Doing this means that malloc may choose a chunk that is + non-best-fitting by up to the width of the bin. + + Some useful cutoff values: + 512 - all bins sorted + 2560 - leaves bins <= 64 bytes wide unsorted + 12288 - leaves bins <= 512 bytes wide unsorted + 65536 - leaves bins <= 4096 bytes wide unsorted + 262144 - leaves bins <= 32768 bytes wide unsorted + -1 - no bins sorted (not recommended!) +*/ + +#define FIRST_SORTED_BIN_SIZE MIN_LARGE_SIZE +/* #define FIRST_SORTED_BIN_SIZE 65536 */ + +/* + Unsorted chunks + + All remainders from chunk splits, as well as all returned chunks, + are first placed in the "unsorted" bin. They are then placed + in regular bins after malloc gives them ONE chance to be used before + binning. So, basically, the unsorted_chunks list acts as a queue, + with chunks being placed on it in free (and malloc_consolidate), + and taken off (to be either used or placed in bins) in malloc. +*/ + +/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */ +#define unsorted_chunks(M) (bin_at(M, 1)) + +/* + Top + + The top-most available chunk (i.e., the one bordering the end of + available memory) is treated specially. It is never included in + any bin, is used only if no other chunk is available, and is + released back to the system if it is very large (see + M_TRIM_THRESHOLD). Because top initially + points to its own bin with initial zero size, thus forcing + extension on the first malloc request, we avoid having any special + code in malloc to check whether it even exists yet. But we still + need to do so when getting memory from system, so we make + initial_top treat the bin as a legal but unusable chunk during the + interval between initialization and the first call to + sYSMALLOc. (This is somewhat delicate, since it relies on + the 2 preceding words to be zero during this interval as well.) +*/ + +/* Conveniently, the unsorted bin can be used as dummy top on first call */ +#define initial_top(M) (unsorted_chunks(M)) + +/* + Binmap + + To help compensate for the large number of bins, a one-level index + structure is used for bin-by-bin searching. `binmap' is a + bitvector recording whether bins are definitely empty so they can + be skipped over during during traversals. The bits are NOT always + cleared as soon as bins are empty, but instead only + when they are noticed to be empty during traversal in malloc. +*/ + +/* Conservatively use 32 bits per map word, even if on 64bit system */ +#define BINMAPSHIFT 5 +#define BITSPERMAP (1U << BINMAPSHIFT) +#define BINMAPSIZE (NBINS / BITSPERMAP) + +#define idx2block(i) ((i) >> BINMAPSHIFT) +#define idx2bit(i) ((1U << ((i) & ((1U << BINMAPSHIFT)-1)))) + +#define mark_bin(m,i) ((m)->binmap[idx2block(i)] |= idx2bit(i)) +#define unmark_bin(m,i) ((m)->binmap[idx2block(i)] &= ~(idx2bit(i))) +#define get_binmap(m,i) ((m)->binmap[idx2block(i)] & idx2bit(i)) + +/* + Fastbins + + An array of lists holding recently freed small chunks. Fastbins + are not doubly linked. It is faster to single-link them, and + since chunks are never removed from the middles of these lists, + double linking is not necessary. Also, unlike regular bins, they + are not even processed in FIFO order (they use faster LIFO) since + ordering doesn't much matter in the transient contexts in which + fastbins are normally used. + + Chunks in fastbins keep their inuse bit set, so they cannot + be consolidated with other free chunks. malloc_consolidate + releases all chunks in fastbins and consolidates them with + other free chunks. +*/ + +typedef struct malloc_chunk* mfastbinptr; + +/* offset 2 to use otherwise unindexable first 2 bins */ +#define fastbin_index(sz) ((((unsigned int)(sz)) >> 3) - 2) + +/* The maximum fastbin request size we support */ +#define MAX_FAST_SIZE 80 + +#define NFASTBINS (fastbin_index(request2size(MAX_FAST_SIZE))+1) + +/* + FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free() + that triggers automatic consolidation of possibly-surrounding + fastbin chunks. This is a heuristic, so the exact value should not + matter too much. It is defined at half the default trim threshold as a + compromise heuristic to only attempt consolidation if it is likely + to lead to trimming. However, it is not dynamically tunable, since + consolidation reduces fragmentation surrounding loarge chunks even + if trimming is not used. +*/ + +#define FASTBIN_CONSOLIDATION_THRESHOLD \ + ((unsigned long)(DEFAULT_TRIM_THRESHOLD) >> 1) + +/* + Since the lowest 2 bits in max_fast don't matter in size comparisons, + they are used as flags. +*/ + +/* + ANYCHUNKS_BIT held in max_fast indicates that there may be any + freed chunks at all. It is set true when entering a chunk into any + bin. +*/ + +#define ANYCHUNKS_BIT (1U) + +#define have_anychunks(M) (((M)->max_fast & ANYCHUNKS_BIT)) +#define set_anychunks(M) ((M)->max_fast |= ANYCHUNKS_BIT) +#define clear_anychunks(M) ((M)->max_fast &= ~ANYCHUNKS_BIT) + +/* + FASTCHUNKS_BIT held in max_fast indicates that there are probably + some fastbin chunks. It is set true on entering a chunk into any + fastbin, and cleared only in malloc_consolidate. +*/ + +#define FASTCHUNKS_BIT (2U) + +#define have_fastchunks(M) (((M)->max_fast & FASTCHUNKS_BIT)) +#define set_fastchunks(M) ((M)->max_fast |= (FASTCHUNKS_BIT|ANYCHUNKS_BIT)) +#define clear_fastchunks(M) ((M)->max_fast &= ~(FASTCHUNKS_BIT)) + +/* + Set value of max_fast. + Use impossibly small value if 0. +*/ + +#define set_max_fast(M, s) \ + (M)->max_fast = (((s) == 0)? SMALLBIN_WIDTH: request2size(s)) | \ + ((M)->max_fast & (FASTCHUNKS_BIT|ANYCHUNKS_BIT)) + +#define get_max_fast(M) \ + ((M)->max_fast & ~(FASTCHUNKS_BIT | ANYCHUNKS_BIT)) + + +/* + morecore_properties is a status word holding dynamically discovered + or controlled properties of the morecore function +*/ + +#define MORECORE_CONTIGUOUS_BIT (1U) + +#define contiguous(M) \ + (((M)->morecore_properties & MORECORE_CONTIGUOUS_BIT)) +#define noncontiguous(M) \ + (((M)->morecore_properties & MORECORE_CONTIGUOUS_BIT) == 0) +#define set_contiguous(M) \ + ((M)->morecore_properties |= MORECORE_CONTIGUOUS_BIT) +#define set_noncontiguous(M) \ + ((M)->morecore_properties &= ~MORECORE_CONTIGUOUS_BIT) + + +/* + ----------- Internal state representation and initialization ----------- +*/ + +struct malloc_state { + + /* The maximum chunk size to be eligible for fastbin */ + INTERNAL_SIZE_T max_fast; /* low 2 bits used as flags */ + + /* Fastbins */ + mfastbinptr fastbins[NFASTBINS]; + + /* Base of the topmost chunk -- not otherwise kept in a bin */ + mchunkptr top; + + /* The remainder from the most recent split of a small request */ + mchunkptr last_remainder; + + /* Normal bins packed as described above */ + mchunkptr bins[NBINS * 2]; + + /* Bitmap of bins. Trailing zero map handles cases of largest binned size */ + unsigned int binmap[BINMAPSIZE+1]; + + /* Tunable parameters */ + CHUNK_SIZE_T trim_threshold; + INTERNAL_SIZE_T top_pad; + INTERNAL_SIZE_T mmap_threshold; + + /* Memory map support */ + int n_mmaps; + int n_mmaps_max; + int max_n_mmaps; + + /* Cache malloc_getpagesize */ + unsigned int pagesize; + + /* Track properties of MORECORE */ + unsigned int morecore_properties; + + /* Statistics */ + INTERNAL_SIZE_T mmapped_mem; + INTERNAL_SIZE_T sbrked_mem; + INTERNAL_SIZE_T max_sbrked_mem; + INTERNAL_SIZE_T max_mmapped_mem; + INTERNAL_SIZE_T max_total_mem; +}; + +typedef struct malloc_state *mstate; + +/* + There is exactly one instance of this struct in this malloc. + If you are adapting this malloc in a way that does NOT use a static + malloc_state, you MUST explicitly zero-fill it before using. This + malloc relies on the property that malloc_state is initialized to + all zeroes (as is true of C statics). +*/ + +static struct malloc_state av_; /* never directly referenced */ + +/* + All uses of av_ are via get_malloc_state(). + At most one "call" to get_malloc_state is made per invocation of + the public versions of malloc and free, but other routines + that in turn invoke malloc and/or free may call more then once. + Also, it is called in check* routines if DEBUG is set. +*/ + +#define get_malloc_state() (&(av_)) + +/* + Initialize a malloc_state struct. + + This is called only from within malloc_consolidate, which needs + be called in the same contexts anyway. It is never called directly + outside of malloc_consolidate because some optimizing compilers try + to inline it at all call points, which turns out not to be an + optimization at all. (Inlining it in malloc_consolidate is fine though.) +*/ + +#if __STD_C +static void malloc_init_state(mstate av) +#else +static void malloc_init_state(av) mstate av; +#endif +{ + int i; + mbinptr bin; + + /* Establish circular links for normal bins */ + for (i = 1; i < NBINS; ++i) { + bin = bin_at(av,i); + bin->fd = bin->bk = bin; + } + + av->top_pad = DEFAULT_TOP_PAD; + av->n_mmaps_max = DEFAULT_MMAP_MAX; + av->mmap_threshold = DEFAULT_MMAP_THRESHOLD; + av->trim_threshold = DEFAULT_TRIM_THRESHOLD; + +#if MORECORE_CONTIGUOUS + set_contiguous(av); +#else + set_noncontiguous(av); +#endif + + + set_max_fast(av, DEFAULT_MXFAST); + + av->top = initial_top(av); + av->pagesize = malloc_getpagesize; +} + +/* + Other internal utilities operating on mstates +*/ + +#if __STD_C +static Void_t* sYSMALLOc(INTERNAL_SIZE_T, mstate); +static int sYSTRIm(size_t, mstate); +static void malloc_consolidate(mstate); +static Void_t** iALLOc(size_t, size_t*, int, Void_t**); +#else +static Void_t* sYSMALLOc(); +static int sYSTRIm(); +static void malloc_consolidate(); +static Void_t** iALLOc(); +#endif + +/* + Debugging support + + These routines make a number of assertions about the states + of data structures that should be true at all times. If any + are not true, it's very likely that a user program has somehow + trashed memory. (It's also possible that there is a coding error + in malloc. In which case, please report it!) +*/ + +#if ! DEBUG + +#define check_chunk(P) +#define check_free_chunk(P) +#define check_inuse_chunk(P) +#define check_remalloced_chunk(P,N) +#define check_malloced_chunk(P,N) +#define check_malloc_state() + +#else +#define check_chunk(P) do_check_chunk(P) +#define check_free_chunk(P) do_check_free_chunk(P) +#define check_inuse_chunk(P) do_check_inuse_chunk(P) +#define check_remalloced_chunk(P,N) do_check_remalloced_chunk(P,N) +#define check_malloced_chunk(P,N) do_check_malloced_chunk(P,N) +#define check_malloc_state() do_check_malloc_state() + +/* + Properties of all chunks +*/ + +#if __STD_C +static void do_check_chunk(mchunkptr p) +#else +static void do_check_chunk(p) mchunkptr p; +#endif +{ + mstate av = get_malloc_state(); + CHUNK_SIZE_T sz = chunksize(p); + /* min and max possible addresses assuming contiguous allocation */ + char* max_address = (char*)(av->top) + chunksize(av->top); + char* min_address = max_address - av->sbrked_mem; + + if (!chunk_is_mmapped(p)) { + + /* Has legal address ... */ + if (p != av->top) { + if (contiguous(av)) { + assert(((char*)p) >= min_address); + assert(((char*)p + sz) <= ((char*)(av->top))); + } + } + else { + /* top size is always at least MINSIZE */ + assert((CHUNK_SIZE_T)(sz) >= MINSIZE); + /* top predecessor always marked inuse */ + assert(prev_inuse(p)); + } + + } + else { +#if HAVE_MMAP + /* address is outside main heap */ + if (contiguous(av) && av->top != initial_top(av)) { + assert(((char*)p) < min_address || ((char*)p) > max_address); + } + /* chunk is page-aligned */ + assert(((p->prev_size + sz) & (av->pagesize-1)) == 0); + /* mem is aligned */ + assert(aligned_OK(chunk2mem(p))); +#else + /* force an appropriate assert violation if debug set */ + assert(!chunk_is_mmapped(p)); +#endif + } +} + +/* + Properties of free chunks +*/ + +#if __STD_C +static void do_check_free_chunk(mchunkptr p) +#else +static void do_check_free_chunk(p) mchunkptr p; +#endif +{ + mstate av = get_malloc_state(); + + INTERNAL_SIZE_T sz = p->size & ~PREV_INUSE; + mchunkptr next = chunk_at_offset(p, sz); + + do_check_chunk(p); + + /* Chunk must claim to be free ... */ + assert(!inuse(p)); + assert (!chunk_is_mmapped(p)); + + /* Unless a special marker, must have OK fields */ + if ((CHUNK_SIZE_T)(sz) >= MINSIZE) + { + assert((sz & MALLOC_ALIGN_MASK) == 0); + assert(aligned_OK(chunk2mem(p))); + /* ... matching footer field */ + assert(next->prev_size == sz); + /* ... and is fully consolidated */ + assert(prev_inuse(p)); + assert (next == av->top || inuse(next)); + + /* ... and has minimally sane links */ + assert(p->fd->bk == p); + assert(p->bk->fd == p); + } + else /* markers are always of size SIZE_SZ */ + assert(sz == SIZE_SZ); +} + +/* + Properties of inuse chunks +*/ + +#if __STD_C +static void do_check_inuse_chunk(mchunkptr p) +#else +static void do_check_inuse_chunk(p) mchunkptr p; +#endif +{ + mstate av = get_malloc_state(); + mchunkptr next; + do_check_chunk(p); + + if (chunk_is_mmapped(p)) + return; /* mmapped chunks have no next/prev */ + + /* Check whether it claims to be in use ... */ + assert(inuse(p)); + + next = next_chunk(p); + + /* ... and is surrounded by OK chunks. + Since more things can be checked with free chunks than inuse ones, + if an inuse chunk borders them and debug is on, it's worth doing them. + */ + if (!prev_inuse(p)) { + /* Note that we cannot even look at prev unless it is not inuse */ + mchunkptr prv = prev_chunk(p); + assert(next_chunk(prv) == p); + do_check_free_chunk(prv); + } + + if (next == av->top) { + assert(prev_inuse(next)); + assert(chunksize(next) >= MINSIZE); + } + else if (!inuse(next)) + do_check_free_chunk(next); +} + +/* + Properties of chunks recycled from fastbins +*/ + +#if __STD_C +static void do_check_remalloced_chunk(mchunkptr p, INTERNAL_SIZE_T s) +#else +static void do_check_remalloced_chunk(p, s) mchunkptr p; INTERNAL_SIZE_T s; +#endif +{ + INTERNAL_SIZE_T sz = p->size & ~PREV_INUSE; + + do_check_inuse_chunk(p); + + /* Legal size ... */ + assert((sz & MALLOC_ALIGN_MASK) == 0); + assert((CHUNK_SIZE_T)(sz) >= MINSIZE); + /* ... and alignment */ + assert(aligned_OK(chunk2mem(p))); + /* chunk is less than MINSIZE more than request */ + assert((long)(sz) - (long)(s) >= 0); + assert((long)(sz) - (long)(s + MINSIZE) < 0); +} + +/* + Properties of nonrecycled chunks at the point they are malloced +*/ + +#if __STD_C +static void do_check_malloced_chunk(mchunkptr p, INTERNAL_SIZE_T s) +#else +static void do_check_malloced_chunk(p, s) mchunkptr p; INTERNAL_SIZE_T s; +#endif +{ + /* same as recycled case ... */ + do_check_remalloced_chunk(p, s); + + /* + ... plus, must obey implementation invariant that prev_inuse is + always true of any allocated chunk; i.e., that each allocated + chunk borders either a previously allocated and still in-use + chunk, or the base of its memory arena. This is ensured + by making all allocations from the the `lowest' part of any found + chunk. This does not necessarily hold however for chunks + recycled via fastbins. + */ + + assert(prev_inuse(p)); +} + + +/* + Properties of malloc_state. + + This may be useful for debugging malloc, as well as detecting user + programmer errors that somehow write into malloc_state. + + If you are extending or experimenting with this malloc, you can + probably figure out how to hack this routine to print out or + display chunk addresses, sizes, bins, and other instrumentation. +*/ + +static void do_check_malloc_state() +{ + mstate av = get_malloc_state(); + int i; + mchunkptr p; + mchunkptr q; + mbinptr b; + unsigned int binbit; + int empty; + unsigned int idx; + INTERNAL_SIZE_T size; + CHUNK_SIZE_T total = 0; + int max_fast_bin; + + /* internal size_t must be no wider than pointer type */ + assert(sizeof(INTERNAL_SIZE_T) <= sizeof(char*)); + + /* alignment is a power of 2 */ + assert((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT-1)) == 0); + + /* cannot run remaining checks until fully initialized */ + if (av->top == 0 || av->top == initial_top(av)) + return; + + /* pagesize is a power of 2 */ + assert((av->pagesize & (av->pagesize-1)) == 0); + + /* properties of fastbins */ + + /* max_fast is in allowed range */ + assert(get_max_fast(av) <= request2size(MAX_FAST_SIZE)); + + max_fast_bin = fastbin_index(av->max_fast); + + for (i = 0; i < NFASTBINS; ++i) { + p = av->fastbins[i]; + + /* all bins past max_fast are empty */ + if (i > max_fast_bin) + assert(p == 0); + + while (p != 0) { + /* each chunk claims to be inuse */ + do_check_inuse_chunk(p); + total += chunksize(p); + /* chunk belongs in this bin */ + assert(fastbin_index(chunksize(p)) == i); + p = p->fd; + } + } + + if (total != 0) + assert(have_fastchunks(av)); + else if (!have_fastchunks(av)) + assert(total == 0); + + /* check normal bins */ + for (i = 1; i < NBINS; ++i) { + b = bin_at(av,i); + + /* binmap is accurate (except for bin 1 == unsorted_chunks) */ + if (i >= 2) { + binbit = get_binmap(av,i); + empty = last(b) == b; + if (!binbit) + assert(empty); + else if (!empty) + assert(binbit); + } + + for (p = last(b); p != b; p = p->bk) { + /* each chunk claims to be free */ + do_check_free_chunk(p); + size = chunksize(p); + total += size; + if (i >= 2) { + /* chunk belongs in bin */ + idx = bin_index(size); + assert(idx == i); + /* lists are sorted */ + if ((CHUNK_SIZE_T) size >= (CHUNK_SIZE_T)(FIRST_SORTED_BIN_SIZE)) { + assert(p->bk == b || + (CHUNK_SIZE_T)chunksize(p->bk) >= + (CHUNK_SIZE_T)chunksize(p)); + } + } + /* chunk is followed by a legal chain of inuse chunks */ + for (q = next_chunk(p); + (q != av->top && inuse(q) && + (CHUNK_SIZE_T)(chunksize(q)) >= MINSIZE); + q = next_chunk(q)) + do_check_inuse_chunk(q); + } + } + + /* top chunk is OK */ + check_chunk(av->top); + + /* sanity checks for statistics */ + + assert(total <= (CHUNK_SIZE_T)(av->max_total_mem)); + assert(av->n_mmaps >= 0); + assert(av->n_mmaps <= av->max_n_mmaps); + + assert((CHUNK_SIZE_T)(av->sbrked_mem) <= + (CHUNK_SIZE_T)(av->max_sbrked_mem)); + + assert((CHUNK_SIZE_T)(av->mmapped_mem) <= + (CHUNK_SIZE_T)(av->max_mmapped_mem)); + + assert((CHUNK_SIZE_T)(av->max_total_mem) >= + (CHUNK_SIZE_T)(av->mmapped_mem) + (CHUNK_SIZE_T)(av->sbrked_mem)); +} +#endif + + +/* ----------- Routines dealing with system allocation -------------- */ + +/* + sysmalloc handles malloc cases requiring more memory from the system. + On entry, it is assumed that av->top does not have enough + space to service request for nb bytes, thus requiring that av->top + be extended or replaced. +*/ + +#if __STD_C +static Void_t* sYSMALLOc(INTERNAL_SIZE_T nb, mstate av) +#else +static Void_t* sYSMALLOc(nb, av) INTERNAL_SIZE_T nb; mstate av; +#endif +{ + mchunkptr old_top; /* incoming value of av->top */ + INTERNAL_SIZE_T old_size; /* its size */ + char* old_end; /* its end address */ + + long size; /* arg to first MORECORE or mmap call */ + char* brk; /* return value from MORECORE */ + + long correction; /* arg to 2nd MORECORE call */ + char* snd_brk; /* 2nd return val */ + + INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */ + INTERNAL_SIZE_T end_misalign; /* partial page left at end of new space */ + char* aligned_brk; /* aligned offset into brk */ + + mchunkptr p; /* the allocated/returned chunk */ + mchunkptr remainder; /* remainder from allocation */ + CHUNK_SIZE_T remainder_size; /* its size */ + + CHUNK_SIZE_T sum; /* for updating stats */ + + size_t pagemask = av->pagesize - 1; + + /* + If there is space available in fastbins, consolidate and retry + malloc from scratch rather than getting memory from system. This + can occur only if nb is in smallbin range so we didn't consolidate + upon entry to malloc. It is much easier to handle this case here + than in malloc proper. + */ + + if (have_fastchunks(av)) { + assert(in_smallbin_range(nb)); + malloc_consolidate(av); + return mALLOc(nb - MALLOC_ALIGN_MASK); + } + + +#if HAVE_MMAP + + /* + If have mmap, and the request size meets the mmap threshold, and + the system supports mmap, and there are few enough currently + allocated mmapped regions, try to directly map this request + rather than expanding top. + */ + + if ((CHUNK_SIZE_T)(nb) >= (CHUNK_SIZE_T)(av->mmap_threshold) && + (av->n_mmaps < av->n_mmaps_max)) { + + char* mm; /* return value from mmap call*/ + + /* + Round up size to nearest page. For mmapped chunks, the overhead + is one SIZE_SZ unit larger than for normal chunks, because there + is no following chunk whose prev_size field could be used. + */ + size = (nb + SIZE_SZ + MALLOC_ALIGN_MASK + pagemask) & ~pagemask; + + /* Don't try if size wraps around 0 */ + if ((CHUNK_SIZE_T)(size) > (CHUNK_SIZE_T)(nb)) { + + mm = (char*)(MMAP(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE)); + + if (mm != (char*)(MORECORE_FAILURE)) { + + /* + The offset to the start of the mmapped region is stored + in the prev_size field of the chunk. This allows us to adjust + returned start address to meet alignment requirements here + and in memalign(), and still be able to compute proper + address argument for later munmap in free() and realloc(). + */ + + front_misalign = (INTERNAL_SIZE_T)chunk2mem(mm) & MALLOC_ALIGN_MASK; + if (front_misalign > 0) { + correction = MALLOC_ALIGNMENT - front_misalign; + p = (mchunkptr)(mm + correction); + p->prev_size = correction; + set_head(p, (size - correction) |IS_MMAPPED); + } + else { + p = (mchunkptr)mm; + p->prev_size = 0; + set_head(p, size|IS_MMAPPED); + } + + /* update statistics */ + + if (++av->n_mmaps > av->max_n_mmaps) + av->max_n_mmaps = av->n_mmaps; + + sum = av->mmapped_mem += size; + if (sum > (CHUNK_SIZE_T)(av->max_mmapped_mem)) + av->max_mmapped_mem = sum; + sum += av->sbrked_mem; + if (sum > (CHUNK_SIZE_T)(av->max_total_mem)) + av->max_total_mem = sum; + + check_chunk(p); + + return chunk2mem(p); + } + } + } +#endif + + /* Record incoming configuration of top */ + + old_top = av->top; + old_size = chunksize(old_top); + old_end = (char*)(chunk_at_offset(old_top, old_size)); + + brk = snd_brk = (char*)(MORECORE_FAILURE); + + /* + If not the first time through, we require old_size to be + at least MINSIZE and to have prev_inuse set. + */ + + assert((old_top == initial_top(av) && old_size == 0) || + ((CHUNK_SIZE_T) (old_size) >= MINSIZE && + prev_inuse(old_top))); + + /* Precondition: not enough current space to satisfy nb request */ + assert((CHUNK_SIZE_T)(old_size) < (CHUNK_SIZE_T)(nb + MINSIZE)); + + /* Precondition: all fastbins are consolidated */ + assert(!have_fastchunks(av)); + + + /* Request enough space for nb + pad + overhead */ + + size = nb + av->top_pad + MINSIZE; + + /* + If contiguous, we can subtract out existing space that we hope to + combine with new space. We add it back later only if + we don't actually get contiguous space. + */ + + if (contiguous(av)) + size -= old_size; + + /* + Round to a multiple of page size. + If MORECORE is not contiguous, this ensures that we only call it + with whole-page arguments. And if MORECORE is contiguous and + this is not first time through, this preserves page-alignment of + previous calls. Otherwise, we correct to page-align below. + */ + + size = (size + pagemask) & ~pagemask; + + /* + Don't try to call MORECORE if argument is so big as to appear + negative. Note that since mmap takes size_t arg, it may succeed + below even if we cannot call MORECORE. + */ + + if (size > 0) + brk = (char*)(MORECORE(size)); + + /* + If have mmap, try using it as a backup when MORECORE fails or + cannot be used. This is worth doing on systems that have "holes" in + address space, so sbrk cannot extend to give contiguous space, but + space is available elsewhere. Note that we ignore mmap max count + and threshold limits, since the space will not be used as a + segregated mmap region. + */ + +#if HAVE_MMAP + if (brk == (char*)(MORECORE_FAILURE)) { + + /* Cannot merge with old top, so add its size back in */ + if (contiguous(av)) + size = (size + old_size + pagemask) & ~pagemask; + + /* If we are relying on mmap as backup, then use larger units */ + if ((CHUNK_SIZE_T)(size) < (CHUNK_SIZE_T)(MMAP_AS_MORECORE_SIZE)) + size = MMAP_AS_MORECORE_SIZE; + + /* Don't try if size wraps around 0 */ + if ((CHUNK_SIZE_T)(size) > (CHUNK_SIZE_T)(nb)) { + + brk = (char*)(MMAP(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE)); + + if (brk != (char*)(MORECORE_FAILURE)) { + + /* We do not need, and cannot use, another sbrk call to find end */ + snd_brk = brk + size; + + /* + Record that we no longer have a contiguous sbrk region. + After the first time mmap is used as backup, we do not + ever rely on contiguous space since this could incorrectly + bridge regions. + */ + set_noncontiguous(av); + } + } + } +#endif + + if (brk != (char*)(MORECORE_FAILURE)) { + av->sbrked_mem += size; + + /* + If MORECORE extends previous space, we can likewise extend top size. + */ + + if (brk == old_end && snd_brk == (char*)(MORECORE_FAILURE)) { + set_head(old_top, (size + old_size) | PREV_INUSE); + } + + /* + Otherwise, make adjustments: + + * If the first time through or noncontiguous, we need to call sbrk + just to find out where the end of memory lies. + + * We need to ensure that all returned chunks from malloc will meet + MALLOC_ALIGNMENT + + * If there was an intervening foreign sbrk, we need to adjust sbrk + request size to account for fact that we will not be able to + combine new space with existing space in old_top. + + * Almost all systems internally allocate whole pages at a time, in + which case we might as well use the whole last page of request. + So we allocate enough more memory to hit a page boundary now, + which in turn causes future contiguous calls to page-align. + */ + + else { + front_misalign = 0; + end_misalign = 0; + correction = 0; + aligned_brk = brk; + + /* + If MORECORE returns an address lower than we have seen before, + we know it isn't really contiguous. This and some subsequent + checks help cope with non-conforming MORECORE functions and + the presence of "foreign" calls to MORECORE from outside of + malloc or by other threads. We cannot guarantee to detect + these in all cases, but cope with the ones we do detect. + */ + if (contiguous(av) && old_size != 0 && brk < old_end) { + set_noncontiguous(av); + } + + /* handle contiguous cases */ + if (contiguous(av)) { + + /* + We can tolerate forward non-contiguities here (usually due + to foreign calls) but treat them as part of our space for + stats reporting. + */ + if (old_size != 0) + av->sbrked_mem += brk - old_end; + + /* Guarantee alignment of first new chunk made from this space */ + + front_misalign = (INTERNAL_SIZE_T)chunk2mem(brk) & MALLOC_ALIGN_MASK; + if (front_misalign > 0) { + + /* + Skip over some bytes to arrive at an aligned position. + We don't need to specially mark these wasted front bytes. + They will never be accessed anyway because + prev_inuse of av->top (and any chunk created from its start) + is always true after initialization. + */ + + correction = MALLOC_ALIGNMENT - front_misalign; + aligned_brk += correction; + } + + /* + If this isn't adjacent to existing space, then we will not + be able to merge with old_top space, so must add to 2nd request. + */ + + correction += old_size; + + /* Extend the end address to hit a page boundary */ + end_misalign = (INTERNAL_SIZE_T)(brk + size + correction); + correction += ((end_misalign + pagemask) & ~pagemask) - end_misalign; + + assert(correction >= 0); + snd_brk = (char*)(MORECORE(correction)); + + if (snd_brk == (char*)(MORECORE_FAILURE)) { + /* + If can't allocate correction, try to at least find out current + brk. It might be enough to proceed without failing. + */ + correction = 0; + snd_brk = (char*)(MORECORE(0)); + } + else if (snd_brk < brk) { + /* + If the second call gives noncontiguous space even though + it says it won't, the only course of action is to ignore + results of second call, and conservatively estimate where + the first call left us. Also set noncontiguous, so this + won't happen again, leaving at most one hole. + + Note that this check is intrinsically incomplete. Because + MORECORE is allowed to give more space than we ask for, + there is no reliable way to detect a noncontiguity + producing a forward gap for the second call. + */ + snd_brk = brk + size; + correction = 0; + set_noncontiguous(av); + } + + } + + /* handle non-contiguous cases */ + else { + /* MORECORE/mmap must correctly align */ + assert(aligned_OK(chunk2mem(brk))); + + /* Find out current end of memory */ + if (snd_brk == (char*)(MORECORE_FAILURE)) { + snd_brk = (char*)(MORECORE(0)); + av->sbrked_mem += snd_brk - brk - size; + } + } + + /* Adjust top based on results of second sbrk */ + if (snd_brk != (char*)(MORECORE_FAILURE)) { + av->top = (mchunkptr)aligned_brk; + set_head(av->top, (snd_brk - aligned_brk + correction) | PREV_INUSE); + av->sbrked_mem += correction; + + /* + If not the first time through, we either have a + gap due to foreign sbrk or a non-contiguous region. Insert a + double fencepost at old_top to prevent consolidation with space + we don't own. These fenceposts are artificial chunks that are + marked as inuse and are in any case too small to use. We need + two to make sizes and alignments work out. + */ + + if (old_size != 0) { + /* + Shrink old_top to insert fenceposts, keeping size a + multiple of MALLOC_ALIGNMENT. We know there is at least + enough space in old_top to do this. + */ + old_size = (old_size - 3*SIZE_SZ) & ~MALLOC_ALIGN_MASK; + set_head(old_top, old_size | PREV_INUSE); + + /* + Note that the following assignments completely overwrite + old_top when old_size was previously MINSIZE. This is + intentional. We need the fencepost, even if old_top otherwise gets + lost. + */ + chunk_at_offset(old_top, old_size )->size = + SIZE_SZ|PREV_INUSE; + + chunk_at_offset(old_top, old_size + SIZE_SZ)->size = + SIZE_SZ|PREV_INUSE; + + /* + If possible, release the rest, suppressing trimming. + */ + if (old_size >= MINSIZE) { + INTERNAL_SIZE_T tt = av->trim_threshold; + av->trim_threshold = (INTERNAL_SIZE_T)(-1); + fREe(chunk2mem(old_top)); + av->trim_threshold = tt; + } + } + } + } + + /* Update statistics */ + sum = av->sbrked_mem; + if (sum > (CHUNK_SIZE_T)(av->max_sbrked_mem)) + av->max_sbrked_mem = sum; + + sum += av->mmapped_mem; + if (sum > (CHUNK_SIZE_T)(av->max_total_mem)) + av->max_total_mem = sum; + + check_malloc_state(); + + /* finally, do the allocation */ + + p = av->top; + size = chunksize(p); + + /* check that one of the above allocation paths succeeded */ + if ((CHUNK_SIZE_T)(size) >= (CHUNK_SIZE_T)(nb + MINSIZE)) { + remainder_size = size - nb; + remainder = chunk_at_offset(p, nb); + av->top = remainder; + set_head(p, nb | PREV_INUSE); + set_head(remainder, remainder_size | PREV_INUSE); + check_malloced_chunk(p, nb); + return chunk2mem(p); + } + + } + + /* catch all failure paths */ + MALLOC_FAILURE_ACTION; + return 0; +} + + + + +/* + sYSTRIm is an inverse of sorts to sYSMALLOc. It gives memory back + to the system (via negative arguments to sbrk) if there is unused + memory at the `high' end of the malloc pool. It is called + automatically by free() when top space exceeds the trim + threshold. It is also called by the public malloc_trim routine. It + returns 1 if it actually released any memory, else 0. +*/ + +#if __STD_C +static int sYSTRIm(size_t pad, mstate av) +#else +static int sYSTRIm(pad, av) size_t pad; mstate av; +#endif +{ + long top_size; /* Amount of top-most memory */ + long extra; /* Amount to release */ + long released; /* Amount actually released */ + char* current_brk; /* address returned by pre-check sbrk call */ + char* new_brk; /* address returned by post-check sbrk call */ + size_t pagesz; + + pagesz = av->pagesize; + top_size = chunksize(av->top); + + /* Release in pagesize units, keeping at least one page */ + extra = ((top_size - pad - MINSIZE + (pagesz-1)) / pagesz - 1) * pagesz; + + if (extra > 0) { + + /* + Only proceed if end of memory is where we last set it. + This avoids problems if there were foreign sbrk calls. + */ + current_brk = (char*)(MORECORE(0)); + if (current_brk == (char*)(av->top) + top_size) { + + /* + Attempt to release memory. We ignore MORECORE return value, + and instead call again to find out where new end of memory is. + This avoids problems if first call releases less than we asked, + of if failure somehow altered brk value. (We could still + encounter problems if it altered brk in some very bad way, + but the only thing we can do is adjust anyway, which will cause + some downstream failure.) + */ + + MORECORE(-extra); + new_brk = (char*)(MORECORE(0)); + + if (new_brk != (char*)MORECORE_FAILURE) { + released = (long)(current_brk - new_brk); + + if (released != 0) { + /* Success. Adjust top. */ + av->sbrked_mem -= released; + set_head(av->top, (top_size - released) | PREV_INUSE); + check_malloc_state(); + return 1; + } + } + } + } + return 0; +} + +/* + ------------------------------ malloc ------------------------------ +*/ + + +#if __STD_C +Void_t* mALLOc(size_t bytes) +#else + Void_t* mALLOc(bytes) size_t bytes; +#endif +{ + mstate av = get_malloc_state(); + + INTERNAL_SIZE_T nb; /* normalized request size */ + unsigned int idx; /* associated bin index */ + mbinptr bin; /* associated bin */ + mfastbinptr* fb; /* associated fastbin */ + + mchunkptr victim; /* inspected/selected chunk */ + INTERNAL_SIZE_T size; /* its size */ + int victim_index; /* its bin index */ + + mchunkptr remainder; /* remainder from a split */ + CHUNK_SIZE_T remainder_size; /* its size */ + + unsigned int block; /* bit map traverser */ + unsigned int bit; /* bit map traverser */ + unsigned int map; /* current word of binmap */ + + mchunkptr fwd; /* misc temp for linking */ + mchunkptr bck; /* misc temp for linking */ + + /* + Convert request size to internal form by adding SIZE_SZ bytes + overhead plus possibly more to obtain necessary alignment and/or + to obtain a size of at least MINSIZE, the smallest allocatable + size. Also, checked_request2size traps (returning 0) request sizes + that are so large that they wrap around zero when padded and + aligned. + */ + + checked_request2size(bytes, nb); + + /* + Bypass search if no frees yet + */ + if (!have_anychunks(av)) { + if (av->max_fast == 0) /* initialization check */ + malloc_consolidate(av); + goto use_top; + } + + /* + If the size qualifies as a fastbin, first check corresponding bin. + */ + + if ((CHUNK_SIZE_T)(nb) <= (CHUNK_SIZE_T)(av->max_fast)) { + fb = &(av->fastbins[(fastbin_index(nb))]); + if ( (victim = *fb) != 0) { + *fb = victim->fd; + check_remalloced_chunk(victim, nb); + return chunk2mem(victim); + } + } + + /* + If a small request, check regular bin. Since these "smallbins" + hold one size each, no searching within bins is necessary. + (For a large request, we need to wait until unsorted chunks are + processed to find best fit. But for small ones, fits are exact + anyway, so we can check now, which is faster.) + */ + + if (in_smallbin_range(nb)) { + idx = smallbin_index(nb); + bin = bin_at(av,idx); + + if ( (victim = last(bin)) != bin) { + bck = victim->bk; + set_inuse_bit_at_offset(victim, nb); + bin->bk = bck; + bck->fd = bin; + + check_malloced_chunk(victim, nb); + return chunk2mem(victim); + } + } + + /* + If this is a large request, consolidate fastbins before continuing. + While it might look excessive to kill all fastbins before + even seeing if there is space available, this avoids + fragmentation problems normally associated with fastbins. + Also, in practice, programs tend to have runs of either small or + large requests, but less often mixtures, so consolidation is not + invoked all that often in most programs. And the programs that + it is called frequently in otherwise tend to fragment. + */ + + else { + idx = largebin_index(nb); + if (have_fastchunks(av)) + malloc_consolidate(av); + } + + /* + Process recently freed or remaindered chunks, taking one only if + it is exact fit, or, if this a small request, the chunk is remainder from + the most recent non-exact fit. Place other traversed chunks in + bins. Note that this step is the only place in any routine where + chunks are placed in bins. + */ + + while ( (victim = unsorted_chunks(av)->bk) != unsorted_chunks(av)) { + bck = victim->bk; + size = chunksize(victim); + + /* + If a small request, try to use last remainder if it is the + only chunk in unsorted bin. This helps promote locality for + runs of consecutive small requests. This is the only + exception to best-fit, and applies only when there is + no exact fit for a small chunk. + */ + + if (in_smallbin_range(nb) && + bck == unsorted_chunks(av) && + victim == av->last_remainder && + (CHUNK_SIZE_T)(size) > (CHUNK_SIZE_T)(nb + MINSIZE)) { + + /* split and reattach remainder */ + remainder_size = size - nb; + remainder = chunk_at_offset(victim, nb); + unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder; + av->last_remainder = remainder; + remainder->bk = remainder->fd = unsorted_chunks(av); + + set_head(victim, nb | PREV_INUSE); + set_head(remainder, remainder_size | PREV_INUSE); + set_foot(remainder, remainder_size); + + check_malloced_chunk(victim, nb); + return chunk2mem(victim); + } + + /* remove from unsorted list */ + unsorted_chunks(av)->bk = bck; + bck->fd = unsorted_chunks(av); + + /* Take now instead of binning if exact fit */ + + if (size == nb) { + set_inuse_bit_at_offset(victim, size); + check_malloced_chunk(victim, nb); + return chunk2mem(victim); + } + + /* place chunk in bin */ + + if (in_smallbin_range(size)) { + victim_index = smallbin_index(size); + bck = bin_at(av, victim_index); + fwd = bck->fd; + } + else { + victim_index = largebin_index(size); + bck = bin_at(av, victim_index); + fwd = bck->fd; + + if (fwd != bck) { + /* if smaller than smallest, place first */ + if ((CHUNK_SIZE_T)(size) < (CHUNK_SIZE_T)(bck->bk->size)) { + fwd = bck; + bck = bck->bk; + } + else if ((CHUNK_SIZE_T)(size) >= + (CHUNK_SIZE_T)(FIRST_SORTED_BIN_SIZE)) { + + /* maintain large bins in sorted order */ + size |= PREV_INUSE; /* Or with inuse bit to speed comparisons */ + while ((CHUNK_SIZE_T)(size) < (CHUNK_SIZE_T)(fwd->size)) + fwd = fwd->fd; + bck = fwd->bk; + } + } + } + + mark_bin(av, victim_index); + victim->bk = bck; + victim->fd = fwd; + fwd->bk = victim; + bck->fd = victim; + } + + /* + If a large request, scan through the chunks of current bin to + find one that fits. (This will be the smallest that fits unless + FIRST_SORTED_BIN_SIZE has been changed from default.) This is + the only step where an unbounded number of chunks might be + scanned without doing anything useful with them. However the + lists tend to be short. + */ + + if (!in_smallbin_range(nb)) { + bin = bin_at(av, idx); + + for (victim = last(bin); victim != bin; victim = victim->bk) { + size = chunksize(victim); + + if ((CHUNK_SIZE_T)(size) >= (CHUNK_SIZE_T)(nb)) { + remainder_size = size - nb; + unlink(victim, bck, fwd); + + /* Exhaust */ + if (remainder_size < MINSIZE) { + set_inuse_bit_at_offset(victim, size); + check_malloced_chunk(victim, nb); + return chunk2mem(victim); + } + /* Split */ + else { + remainder = chunk_at_offset(victim, nb); + unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder; + remainder->bk = remainder->fd = unsorted_chunks(av); + set_head(victim, nb | PREV_INUSE); + set_head(remainder, remainder_size | PREV_INUSE); + set_foot(remainder, remainder_size); + check_malloced_chunk(victim, nb); + return chunk2mem(victim); + } + } + } + } + + /* + Search for a chunk by scanning bins, starting with next largest + bin. This search is strictly by best-fit; i.e., the smallest + (with ties going to approximately the least recently used) chunk + that fits is selected. + + The bitmap avoids needing to check that most blocks are nonempty. + */ + + ++idx; + bin = bin_at(av,idx); + block = idx2block(idx); + map = av->binmap[block]; + bit = idx2bit(idx); + + for (;;) { + + /* Skip rest of block if there are no more set bits in this block. */ + if (bit > map || bit == 0) { + do { + if (++block >= BINMAPSIZE) /* out of bins */ + goto use_top; + } while ( (map = av->binmap[block]) == 0); + + bin = bin_at(av, (block << BINMAPSHIFT)); + bit = 1; + } + + /* Advance to bin with set bit. There must be one. */ + while ((bit & map) == 0) { + bin = next_bin(bin); + bit <<= 1; + assert(bit != 0); + } + + /* Inspect the bin. It is likely to be non-empty */ + victim = last(bin); + + /* If a false alarm (empty bin), clear the bit. */ + if (victim == bin) { + av->binmap[block] = map &= ~bit; /* Write through */ + bin = next_bin(bin); + bit <<= 1; + } + + else { + size = chunksize(victim); + + /* We know the first chunk in this bin is big enough to use. */ + assert((CHUNK_SIZE_T)(size) >= (CHUNK_SIZE_T)(nb)); + + remainder_size = size - nb; + + /* unlink */ + bck = victim->bk; + bin->bk = bck; + bck->fd = bin; + + /* Exhaust */ + if (remainder_size < MINSIZE) { + set_inuse_bit_at_offset(victim, size); + check_malloced_chunk(victim, nb); + return chunk2mem(victim); + } + + /* Split */ + else { + remainder = chunk_at_offset(victim, nb); + + unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder; + remainder->bk = remainder->fd = unsorted_chunks(av); + /* advertise as last remainder */ + if (in_smallbin_range(nb)) + av->last_remainder = remainder; + + set_head(victim, nb | PREV_INUSE); + set_head(remainder, remainder_size | PREV_INUSE); + set_foot(remainder, remainder_size); + check_malloced_chunk(victim, nb); + return chunk2mem(victim); + } + } + } + + use_top: + /* + If large enough, split off the chunk bordering the end of memory + (held in av->top). Note that this is in accord with the best-fit + search rule. In effect, av->top is treated as larger (and thus + less well fitting) than any other available chunk since it can + be extended to be as large as necessary (up to system + limitations). + + We require that av->top always exists (i.e., has size >= + MINSIZE) after initialization, so if it would otherwise be + exhuasted by current request, it is replenished. (The main + reason for ensuring it exists is that we may need MINSIZE space + to put in fenceposts in sysmalloc.) + */ + + victim = av->top; + size = chunksize(victim); + + if ((CHUNK_SIZE_T)(size) >= (CHUNK_SIZE_T)(nb + MINSIZE)) { + remainder_size = size - nb; + remainder = chunk_at_offset(victim, nb); + av->top = remainder; + set_head(victim, nb | PREV_INUSE); + set_head(remainder, remainder_size | PREV_INUSE); + + check_malloced_chunk(victim, nb); + return chunk2mem(victim); + } + + /* + If no space in top, relay to handle system-dependent cases + */ + return sYSMALLOc(nb, av); +} + +/* + ------------------------------ free ------------------------------ +*/ + +#if __STD_C +void fREe(Void_t* mem) +#else +void fREe(mem) Void_t* mem; +#endif +{ + mstate av = get_malloc_state(); + + mchunkptr p; /* chunk corresponding to mem */ + INTERNAL_SIZE_T size; /* its size */ + mfastbinptr* fb; /* associated fastbin */ + mchunkptr nextchunk; /* next contiguous chunk */ + INTERNAL_SIZE_T nextsize; /* its size */ + int nextinuse; /* true if nextchunk is used */ + INTERNAL_SIZE_T prevsize; /* size of previous contiguous chunk */ + mchunkptr bck; /* misc temp for linking */ + mchunkptr fwd; /* misc temp for linking */ + + /* free(0) has no effect */ + if (mem != 0) { + p = mem2chunk(mem); + size = chunksize(p); + + check_inuse_chunk(p); + + /* + If eligible, place chunk on a fastbin so it can be found + and used quickly in malloc. + */ + + if ((CHUNK_SIZE_T)(size) <= (CHUNK_SIZE_T)(av->max_fast) + +#if TRIM_FASTBINS + /* + If TRIM_FASTBINS set, don't place chunks + bordering top into fastbins + */ + && (chunk_at_offset(p, size) != av->top) +#endif + ) { + + set_fastchunks(av); + fb = &(av->fastbins[fastbin_index(size)]); + p->fd = *fb; + *fb = p; + } + + /* + Consolidate other non-mmapped chunks as they arrive. + */ + + else if (!chunk_is_mmapped(p)) { + set_anychunks(av); + + nextchunk = chunk_at_offset(p, size); + nextsize = chunksize(nextchunk); + + /* consolidate backward */ + if (!prev_inuse(p)) { + prevsize = p->prev_size; + size += prevsize; + p = chunk_at_offset(p, -((long) prevsize)); + unlink(p, bck, fwd); + } + + if (nextchunk != av->top) { + /* get and clear inuse bit */ + nextinuse = inuse_bit_at_offset(nextchunk, nextsize); + set_head(nextchunk, nextsize); + + /* consolidate forward */ + if (!nextinuse) { + unlink(nextchunk, bck, fwd); + size += nextsize; + } + + /* + Place the chunk in unsorted chunk list. Chunks are + not placed into regular bins until after they have + been given one chance to be used in malloc. + */ + + bck = unsorted_chunks(av); + fwd = bck->fd; + p->bk = bck; + p->fd = fwd; + bck->fd = p; + fwd->bk = p; + + set_head(p, size | PREV_INUSE); + set_foot(p, size); + + check_free_chunk(p); + } + + /* + If the chunk borders the current high end of memory, + consolidate into top + */ + + else { + size += nextsize; + set_head(p, size | PREV_INUSE); + av->top = p; + check_chunk(p); + } + + /* + If freeing a large space, consolidate possibly-surrounding + chunks. Then, if the total unused topmost memory exceeds trim + threshold, ask malloc_trim to reduce top. + + Unless max_fast is 0, we don't know if there are fastbins + bordering top, so we cannot tell for sure whether threshold + has been reached unless fastbins are consolidated. But we + don't want to consolidate on each free. As a compromise, + consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD + is reached. + */ + + if ((CHUNK_SIZE_T)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) { + if (have_fastchunks(av)) + malloc_consolidate(av); + +#ifndef MORECORE_CANNOT_TRIM + if ((CHUNK_SIZE_T)(chunksize(av->top)) >= + (CHUNK_SIZE_T)(av->trim_threshold)) + sYSTRIm(av->top_pad, av); +#endif + } + + } + /* + If the chunk was allocated via mmap, release via munmap() + Note that if HAVE_MMAP is false but chunk_is_mmapped is + true, then user must have overwritten memory. There's nothing + we can do to catch this error unless DEBUG is set, in which case + check_inuse_chunk (above) will have triggered error. + */ + + else { +#if HAVE_MMAP + int ret; + INTERNAL_SIZE_T offset = p->prev_size; + av->n_mmaps--; + av->mmapped_mem -= (size + offset); + ret = munmap((char*)p - offset, size + offset); + /* munmap returns non-zero on failure */ + assert(ret == 0); +#endif + } + } +} + +/* + ------------------------- malloc_consolidate ------------------------- + + malloc_consolidate is a specialized version of free() that tears + down chunks held in fastbins. Free itself cannot be used for this + purpose since, among other things, it might place chunks back onto + fastbins. So, instead, we need to use a minor variant of the same + code. + + Also, because this routine needs to be called the first time through + malloc anyway, it turns out to be the perfect place to trigger + initialization code. +*/ + +#if __STD_C +static void malloc_consolidate(mstate av) +#else +static void malloc_consolidate(av) mstate av; +#endif +{ + mfastbinptr* fb; /* current fastbin being consolidated */ + mfastbinptr* maxfb; /* last fastbin (for loop control) */ + mchunkptr p; /* current chunk being consolidated */ + mchunkptr nextp; /* next chunk to consolidate */ + mchunkptr unsorted_bin; /* bin header */ + mchunkptr first_unsorted; /* chunk to link to */ + + /* These have same use as in free() */ + mchunkptr nextchunk; + INTERNAL_SIZE_T size; + INTERNAL_SIZE_T nextsize; + INTERNAL_SIZE_T prevsize; + int nextinuse; + mchunkptr bck; + mchunkptr fwd; + + /* + If max_fast is 0, we know that av hasn't + yet been initialized, in which case do so below + */ + + if (av->max_fast != 0) { + clear_fastchunks(av); + + unsorted_bin = unsorted_chunks(av); + + /* + Remove each chunk from fast bin and consolidate it, placing it + then in unsorted bin. Among other reasons for doing this, + placing in unsorted bin avoids needing to calculate actual bins + until malloc is sure that chunks aren't immediately going to be + reused anyway. + */ + + maxfb = &(av->fastbins[fastbin_index(av->max_fast)]); + fb = &(av->fastbins[0]); + do { + if ( (p = *fb) != 0) { + *fb = 0; + + do { + check_inuse_chunk(p); + nextp = p->fd; + + /* Slightly streamlined version of consolidation code in free() */ + size = p->size & ~PREV_INUSE; + nextchunk = chunk_at_offset(p, size); + nextsize = chunksize(nextchunk); + + if (!prev_inuse(p)) { + prevsize = p->prev_size; + size += prevsize; + p = chunk_at_offset(p, -((long) prevsize)); + unlink(p, bck, fwd); + } + + if (nextchunk != av->top) { + nextinuse = inuse_bit_at_offset(nextchunk, nextsize); + set_head(nextchunk, nextsize); + + if (!nextinuse) { + size += nextsize; + unlink(nextchunk, bck, fwd); + } + + first_unsorted = unsorted_bin->fd; + unsorted_bin->fd = p; + first_unsorted->bk = p; + + set_head(p, size | PREV_INUSE); + p->bk = unsorted_bin; + p->fd = first_unsorted; + set_foot(p, size); + } + + else { + size += nextsize; + set_head(p, size | PREV_INUSE); + av->top = p; + } + + } while ( (p = nextp) != 0); + + } + } while (fb++ != maxfb); + } + else { + malloc_init_state(av); + check_malloc_state(); + } +} + +/* + ------------------------------ realloc ------------------------------ +*/ + + +#if __STD_C +Void_t* rEALLOc(Void_t* oldmem, size_t bytes) +#else +Void_t* rEALLOc(oldmem, bytes) Void_t* oldmem; size_t bytes; +#endif +{ + mstate av = get_malloc_state(); + + INTERNAL_SIZE_T nb; /* padded request size */ + + mchunkptr oldp; /* chunk corresponding to oldmem */ + INTERNAL_SIZE_T oldsize; /* its size */ + + mchunkptr newp; /* chunk to return */ + INTERNAL_SIZE_T newsize; /* its size */ + Void_t* newmem; /* corresponding user mem */ + + mchunkptr next; /* next contiguous chunk after oldp */ + + mchunkptr remainder; /* extra space at end of newp */ + CHUNK_SIZE_T remainder_size; /* its size */ + + mchunkptr bck; /* misc temp for linking */ + mchunkptr fwd; /* misc temp for linking */ + + CHUNK_SIZE_T copysize; /* bytes to copy */ + unsigned int ncopies; /* INTERNAL_SIZE_T words to copy */ + INTERNAL_SIZE_T* s; /* copy source */ + INTERNAL_SIZE_T* d; /* copy destination */ + + +#ifdef REALLOC_ZERO_BYTES_FREES + if (bytes == 0) { + fREe(oldmem); + return 0; + } +#endif + + /* realloc of null is supposed to be same as malloc */ + if (oldmem == 0) return mALLOc(bytes); + + checked_request2size(bytes, nb); + + oldp = mem2chunk(oldmem); + oldsize = chunksize(oldp); + + check_inuse_chunk(oldp); + + if (!chunk_is_mmapped(oldp)) { + + if ((CHUNK_SIZE_T)(oldsize) >= (CHUNK_SIZE_T)(nb)) { + /* already big enough; split below */ + newp = oldp; + newsize = oldsize; + } + + else { + next = chunk_at_offset(oldp, oldsize); + + /* Try to expand forward into top */ + if (next == av->top && + (CHUNK_SIZE_T)(newsize = oldsize + chunksize(next)) >= + (CHUNK_SIZE_T)(nb + MINSIZE)) { + set_head_size(oldp, nb); + av->top = chunk_at_offset(oldp, nb); + set_head(av->top, (newsize - nb) | PREV_INUSE); + return chunk2mem(oldp); + } + + /* Try to expand forward into next chunk; split off remainder below */ + else if (next != av->top && + !inuse(next) && + (CHUNK_SIZE_T)(newsize = oldsize + chunksize(next)) >= + (CHUNK_SIZE_T)(nb)) { + newp = oldp; + unlink(next, bck, fwd); + } + + /* allocate, copy, free */ + else { + newmem = mALLOc(nb - MALLOC_ALIGN_MASK); + if (newmem == 0) + return 0; /* propagate failure */ + + newp = mem2chunk(newmem); + newsize = chunksize(newp); + + /* + Avoid copy if newp is next chunk after oldp. + */ + if (newp == next) { + newsize += oldsize; + newp = oldp; + } + else { + /* + Unroll copy of <= 36 bytes (72 if 8byte sizes) + We know that contents have an odd number of + INTERNAL_SIZE_T-sized words; minimally 3. + */ + + copysize = oldsize - SIZE_SZ; + s = (INTERNAL_SIZE_T*)(oldmem); + d = (INTERNAL_SIZE_T*)(newmem); + ncopies = copysize / sizeof(INTERNAL_SIZE_T); + assert(ncopies >= 3); + + if (ncopies > 9) + MALLOC_COPY(d, s, copysize); + + else { + *(d+0) = *(s+0); + *(d+1) = *(s+1); + *(d+2) = *(s+2); + if (ncopies > 4) { + *(d+3) = *(s+3); + *(d+4) = *(s+4); + if (ncopies > 6) { + *(d+5) = *(s+5); + *(d+6) = *(s+6); + if (ncopies > 8) { + *(d+7) = *(s+7); + *(d+8) = *(s+8); + } + } + } + } + + fREe(oldmem); + check_inuse_chunk(newp); + return chunk2mem(newp); + } + } + } + + /* If possible, free extra space in old or extended chunk */ + + assert((CHUNK_SIZE_T)(newsize) >= (CHUNK_SIZE_T)(nb)); + + remainder_size = newsize - nb; + + if (remainder_size < MINSIZE) { /* not enough extra to split off */ + set_head_size(newp, newsize); + set_inuse_bit_at_offset(newp, newsize); + } + else { /* split remainder */ + remainder = chunk_at_offset(newp, nb); + set_head_size(newp, nb); + set_head(remainder, remainder_size | PREV_INUSE); + /* Mark remainder as inuse so free() won't complain */ + set_inuse_bit_at_offset(remainder, remainder_size); + fREe(chunk2mem(remainder)); + } + + check_inuse_chunk(newp); + return chunk2mem(newp); + } + + /* + Handle mmap cases + */ + + else { +#if HAVE_MMAP + +#if HAVE_MREMAP + INTERNAL_SIZE_T offset = oldp->prev_size; + size_t pagemask = av->pagesize - 1; + char *cp; + CHUNK_SIZE_T sum; + + /* Note the extra SIZE_SZ overhead */ + newsize = (nb + offset + SIZE_SZ + pagemask) & ~pagemask; + + /* don't need to remap if still within same page */ + if (oldsize == newsize - offset) + return oldmem; + + cp = (char*)mremap((char*)oldp - offset, oldsize + offset, newsize, 1); + + if (cp != (char*)MORECORE_FAILURE) { + + newp = (mchunkptr)(cp + offset); + set_head(newp, (newsize - offset)|IS_MMAPPED); + + assert(aligned_OK(chunk2mem(newp))); + assert((newp->prev_size == offset)); + + /* update statistics */ + sum = av->mmapped_mem += newsize - oldsize; + if (sum > (CHUNK_SIZE_T)(av->max_mmapped_mem)) + av->max_mmapped_mem = sum; + sum += av->sbrked_mem; + if (sum > (CHUNK_SIZE_T)(av->max_total_mem)) + av->max_total_mem = sum; + + return chunk2mem(newp); + } +#endif + + /* Note the extra SIZE_SZ overhead. */ + if ((CHUNK_SIZE_T)(oldsize) >= (CHUNK_SIZE_T)(nb + SIZE_SZ)) + newmem = oldmem; /* do nothing */ + else { + /* Must alloc, copy, free. */ + newmem = mALLOc(nb - MALLOC_ALIGN_MASK); + if (newmem != 0) { + MALLOC_COPY(newmem, oldmem, oldsize - 2*SIZE_SZ); + fREe(oldmem); + } + } + return newmem; + +#else + /* If !HAVE_MMAP, but chunk_is_mmapped, user must have overwritten mem */ + check_malloc_state(); + MALLOC_FAILURE_ACTION; + return 0; +#endif + } +} + +/* + ------------------------------ memalign ------------------------------ +*/ + +#if __STD_C +Void_t* mEMALIGn(size_t alignment, size_t bytes) +#else +Void_t* mEMALIGn(alignment, bytes) size_t alignment; size_t bytes; +#endif +{ + INTERNAL_SIZE_T nb; /* padded request size */ + char* m; /* memory returned by malloc call */ + mchunkptr p; /* corresponding chunk */ + char* brk; /* alignment point within p */ + mchunkptr newp; /* chunk to return */ + INTERNAL_SIZE_T newsize; /* its size */ + INTERNAL_SIZE_T leadsize; /* leading space before alignment point */ + mchunkptr remainder; /* spare room at end to split off */ + CHUNK_SIZE_T remainder_size; /* its size */ + INTERNAL_SIZE_T size; + + /* If need less alignment than we give anyway, just relay to malloc */ + + if (alignment <= MALLOC_ALIGNMENT) return mALLOc(bytes); + + /* Otherwise, ensure that it is at least a minimum chunk size */ + + if (alignment < MINSIZE) alignment = MINSIZE; + + /* Make sure alignment is power of 2 (in case MINSIZE is not). */ + if ((alignment & (alignment - 1)) != 0) { + size_t a = MALLOC_ALIGNMENT * 2; + while ((CHUNK_SIZE_T)a < (CHUNK_SIZE_T)alignment) a <<= 1; + alignment = a; + } + + checked_request2size(bytes, nb); + + /* + Strategy: find a spot within that chunk that meets the alignment + request, and then possibly free the leading and trailing space. + */ + + + /* Call malloc with worst case padding to hit alignment. */ + + m = (char*)(mALLOc(nb + alignment + MINSIZE)); + + if (m == 0) return 0; /* propagate failure */ + + p = mem2chunk(m); + + if ((((PTR_UINT)(m)) % alignment) != 0) { /* misaligned */ + + /* + Find an aligned spot inside chunk. Since we need to give back + leading space in a chunk of at least MINSIZE, if the first + calculation places us at a spot with less than MINSIZE leader, + we can move to the next aligned spot -- we've allocated enough + total room so that this is always possible. + */ + + brk = (char*)mem2chunk((PTR_UINT)(((PTR_UINT)(m + alignment - 1)) & + -((signed long) alignment))); + if ((CHUNK_SIZE_T)(brk - (char*)(p)) < MINSIZE) + brk += alignment; + + newp = (mchunkptr)brk; + leadsize = brk - (char*)(p); + newsize = chunksize(p) - leadsize; + + /* For mmapped chunks, just adjust offset */ + if (chunk_is_mmapped(p)) { + newp->prev_size = p->prev_size + leadsize; + set_head(newp, newsize|IS_MMAPPED); + return chunk2mem(newp); + } + + /* Otherwise, give back leader, use the rest */ + set_head(newp, newsize | PREV_INUSE); + set_inuse_bit_at_offset(newp, newsize); + set_head_size(p, leadsize); + fREe(chunk2mem(p)); + p = newp; + + assert (newsize >= nb && + (((PTR_UINT)(chunk2mem(p))) % alignment) == 0); + } + + /* Also give back spare room at the end */ + if (!chunk_is_mmapped(p)) { + size = chunksize(p); + if ((CHUNK_SIZE_T)(size) > (CHUNK_SIZE_T)(nb + MINSIZE)) { + remainder_size = size - nb; + remainder = chunk_at_offset(p, nb); + set_head(remainder, remainder_size | PREV_INUSE); + set_head_size(p, nb); + fREe(chunk2mem(remainder)); + } + } + + check_inuse_chunk(p); + return chunk2mem(p); +} + +/* + ------------------------------ calloc ------------------------------ +*/ + +#if __STD_C +Void_t* cALLOc(size_t n_elements, size_t elem_size) +#else +Void_t* cALLOc(n_elements, elem_size) size_t n_elements; size_t elem_size; +#endif +{ + mchunkptr p; + CHUNK_SIZE_T clearsize; + CHUNK_SIZE_T nclears; + INTERNAL_SIZE_T* d; + + Void_t* mem = mALLOc(n_elements * elem_size); + + if (mem != 0) { + p = mem2chunk(mem); + + if (!chunk_is_mmapped(p)) + { + /* + Unroll clear of <= 36 bytes (72 if 8byte sizes) + We know that contents have an odd number of + INTERNAL_SIZE_T-sized words; minimally 3. + */ + + d = (INTERNAL_SIZE_T*)mem; + clearsize = chunksize(p) - SIZE_SZ; + nclears = clearsize / sizeof(INTERNAL_SIZE_T); + assert(nclears >= 3); + + if (nclears > 9) + MALLOC_ZERO(d, clearsize); + + else { + *(d+0) = 0; + *(d+1) = 0; + *(d+2) = 0; + if (nclears > 4) { + *(d+3) = 0; + *(d+4) = 0; + if (nclears > 6) { + *(d+5) = 0; + *(d+6) = 0; + if (nclears > 8) { + *(d+7) = 0; + *(d+8) = 0; + } + } + } + } + } +#if ! MMAP_CLEARS + else + { + d = (INTERNAL_SIZE_T*)mem; + /* + Note the additional SIZE_SZ + */ + clearsize = chunksize(p) - 2*SIZE_SZ; + MALLOC_ZERO(d, clearsize); + } +#endif + } + return mem; +} + +/* + ------------------------------ cfree ------------------------------ +*/ + +#if __STD_C +void cFREe(Void_t *mem) +#else +void cFREe(mem) Void_t *mem; +#endif +{ + fREe(mem); +} + +/* + ------------------------- independent_calloc ------------------------- +*/ + +#if __STD_C +Void_t** iCALLOc(size_t n_elements, size_t elem_size, Void_t* chunks[]) +#else +Void_t** iCALLOc(n_elements, elem_size, chunks) size_t n_elements; size_t elem_size; Void_t* chunks[]; +#endif +{ + size_t sz = elem_size; /* serves as 1-element array */ + /* opts arg of 3 means all elements are same size, and should be cleared */ + return iALLOc(n_elements, &sz, 3, chunks); +} + +/* + ------------------------- independent_comalloc ------------------------- +*/ + +#if __STD_C +Void_t** iCOMALLOc(size_t n_elements, size_t sizes[], Void_t* chunks[]) +#else +Void_t** iCOMALLOc(n_elements, sizes, chunks) size_t n_elements; size_t sizes[]; Void_t* chunks[]; +#endif +{ + return iALLOc(n_elements, sizes, 0, chunks); +} + + +/* + ------------------------------ ialloc ------------------------------ + ialloc provides common support for independent_X routines, handling all of + the combinations that can result. + + The opts arg has: + bit 0 set if all elements are same size (using sizes[0]) + bit 1 set if elements should be zeroed +*/ + + +#if __STD_C +static Void_t** iALLOc(size_t n_elements, + size_t* sizes, + int opts, + Void_t* chunks[]) +#else +static Void_t** iALLOc(n_elements, sizes, opts, chunks) size_t n_elements; size_t* sizes; int opts; Void_t* chunks[]; +#endif +{ + mstate av = get_malloc_state(); + INTERNAL_SIZE_T element_size; /* chunksize of each element, if all same */ + INTERNAL_SIZE_T contents_size; /* total size of elements */ + INTERNAL_SIZE_T array_size; /* request size of pointer array */ + Void_t* mem; /* malloced aggregate space */ + mchunkptr p; /* corresponding chunk */ + INTERNAL_SIZE_T remainder_size; /* remaining bytes while splitting */ + Void_t** marray; /* either "chunks" or malloced ptr array */ + mchunkptr array_chunk; /* chunk for malloced ptr array */ + int mmx; /* to disable mmap */ + INTERNAL_SIZE_T size; + size_t i; + + /* Ensure initialization */ + if (av->max_fast == 0) malloc_consolidate(av); + + /* compute array length, if needed */ + if (chunks != 0) { + if (n_elements == 0) + return chunks; /* nothing to do */ + marray = chunks; + array_size = 0; + } + else { + /* if empty req, must still return chunk representing empty array */ + if (n_elements == 0) + return (Void_t**) mALLOc(0); + marray = 0; + array_size = request2size(n_elements * (sizeof(Void_t*))); + } + + /* compute total element size */ + if (opts & 0x1) { /* all-same-size */ + element_size = request2size(*sizes); + contents_size = n_elements * element_size; + } + else { /* add up all the sizes */ + element_size = 0; + contents_size = 0; + for (i = 0; i != n_elements; ++i) + contents_size += request2size(sizes[i]); + } + + /* subtract out alignment bytes from total to minimize overallocation */ + size = contents_size + array_size - MALLOC_ALIGN_MASK; + + /* + Allocate the aggregate chunk. + But first disable mmap so malloc won't use it, since + we would not be able to later free/realloc space internal + to a segregated mmap region. + */ + mmx = av->n_mmaps_max; /* disable mmap */ + av->n_mmaps_max = 0; + mem = mALLOc(size); + av->n_mmaps_max = mmx; /* reset mmap */ + if (mem == 0) + return 0; + + p = mem2chunk(mem); + assert(!chunk_is_mmapped(p)); + remainder_size = chunksize(p); + + if (opts & 0x2) { /* optionally clear the elements */ + MALLOC_ZERO(mem, remainder_size - SIZE_SZ - array_size); + } + + /* If not provided, allocate the pointer array as final part of chunk */ + if (marray == 0) { + array_chunk = chunk_at_offset(p, contents_size); + marray = (Void_t**) (chunk2mem(array_chunk)); + set_head(array_chunk, (remainder_size - contents_size) | PREV_INUSE); + remainder_size = contents_size; + } + + /* split out elements */ + for (i = 0; ; ++i) { + marray[i] = chunk2mem(p); + if (i != n_elements-1) { + if (element_size != 0) + size = element_size; + else + size = request2size(sizes[i]); + remainder_size -= size; + set_head(p, size | PREV_INUSE); + p = chunk_at_offset(p, size); + } + else { /* the final element absorbs any overallocation slop */ + set_head(p, remainder_size | PREV_INUSE); + break; + } + } + +#if DEBUG + if (marray != chunks) { + /* final element must have exactly exhausted chunk */ + if (element_size != 0) + assert(remainder_size == element_size); + else + assert(remainder_size == request2size(sizes[i])); + check_inuse_chunk(mem2chunk(marray)); + } + + for (i = 0; i != n_elements; ++i) + check_inuse_chunk(mem2chunk(marray[i])); +#endif + + return marray; +} + + +/* + ------------------------------ valloc ------------------------------ +*/ + +#if __STD_C +Void_t* vALLOc(size_t bytes) +#else +Void_t* vALLOc(bytes) size_t bytes; +#endif +{ + /* Ensure initialization */ + mstate av = get_malloc_state(); + if (av->max_fast == 0) malloc_consolidate(av); + return mEMALIGn(av->pagesize, bytes); +} + +/* + ------------------------------ pvalloc ------------------------------ +*/ + + +#if __STD_C +Void_t* pVALLOc(size_t bytes) +#else +Void_t* pVALLOc(bytes) size_t bytes; +#endif +{ + mstate av = get_malloc_state(); + size_t pagesz; + + /* Ensure initialization */ + if (av->max_fast == 0) malloc_consolidate(av); + pagesz = av->pagesize; + return mEMALIGn(pagesz, (bytes + pagesz - 1) & ~(pagesz - 1)); +} + + +/* + ------------------------------ malloc_trim ------------------------------ +*/ + +#if __STD_C +int mTRIm(size_t pad) +#else +int mTRIm(pad) size_t pad; +#endif +{ + mstate av = get_malloc_state(); + /* Ensure initialization/consolidation */ + malloc_consolidate(av); + +#ifndef MORECORE_CANNOT_TRIM + return sYSTRIm(pad, av); +#else + return 0; +#endif +} + + +/* + ------------------------- malloc_usable_size ------------------------- +*/ + +#if __STD_C +size_t mUSABLe(Void_t* mem) +#else +size_t mUSABLe(mem) Void_t* mem; +#endif +{ + mchunkptr p; + if (mem != 0) { + p = mem2chunk(mem); + if (chunk_is_mmapped(p)) + return chunksize(p) - 2*SIZE_SZ; + else if (inuse(p)) + return chunksize(p) - SIZE_SZ; + } + return 0; +} + +/* + ------------------------------ mallinfo ------------------------------ +*/ + +struct mallinfo mALLINFo() +{ + mstate av = get_malloc_state(); + struct mallinfo mi; + int i; + mbinptr b; + mchunkptr p; + INTERNAL_SIZE_T avail; + INTERNAL_SIZE_T fastavail; + int nblocks; + int nfastblocks; + + /* Ensure initialization */ + if (av->top == 0) malloc_consolidate(av); + + check_malloc_state(); + + /* Account for top */ + avail = chunksize(av->top); + nblocks = 1; /* top always exists */ + + /* traverse fastbins */ + nfastblocks = 0; + fastavail = 0; + + for (i = 0; i < NFASTBINS; ++i) { + for (p = av->fastbins[i]; p != 0; p = p->fd) { + ++nfastblocks; + fastavail += chunksize(p); + } + } + + avail += fastavail; + + /* traverse regular bins */ + for (i = 1; i < NBINS; ++i) { + b = bin_at(av, i); + for (p = last(b); p != b; p = p->bk) { + ++nblocks; + avail += chunksize(p); + } + } + + mi.smblks = nfastblocks; + mi.ordblks = nblocks; + mi.fordblks = avail; + mi.uordblks = av->sbrked_mem - avail; + mi.arena = av->sbrked_mem; + mi.hblks = av->n_mmaps; + mi.hblkhd = av->mmapped_mem; + mi.fsmblks = fastavail; + mi.keepcost = chunksize(av->top); + mi.usmblks = av->max_total_mem; + return mi; +} + +/* + ------------------------------ malloc_stats ------------------------------ +*/ + +void mSTATs() +{ + struct mallinfo mi = mALLINFo(); + +#ifdef WIN32 + { + CHUNK_SIZE_T free, reserved, committed; + vminfo (&free, &reserved, &committed); + fprintf(stderr, "free bytes = %10lu\n", + free); + fprintf(stderr, "reserved bytes = %10lu\n", + reserved); + fprintf(stderr, "committed bytes = %10lu\n", + committed); + } +#endif + +/* RN XXX */ + printf("max system bytes = %10lu\n", + (CHUNK_SIZE_T)(mi.usmblks)); + printf("system bytes = %10lu\n", + (CHUNK_SIZE_T)(mi.arena + mi.hblkhd)); + printf("in use bytes = %10lu\n", + (CHUNK_SIZE_T)(mi.uordblks + mi.hblkhd)); + +#ifdef WIN32 + { + CHUNK_SIZE_T kernel, user; + if (cpuinfo (TRUE, &kernel, &user)) { + fprintf(stderr, "kernel ms = %10lu\n", + kernel); + fprintf(stderr, "user ms = %10lu\n", + user); + } + } +#endif +} + + +/* + ------------------------------ mallopt ------------------------------ +*/ + +#if __STD_C +int mALLOPt(int param_number, int value) +#else +int mALLOPt(param_number, value) int param_number; int value; +#endif +{ + mstate av = get_malloc_state(); + /* Ensure initialization/consolidation */ + malloc_consolidate(av); + + switch(param_number) { + case M_MXFAST: + if (value >= 0 && value <= MAX_FAST_SIZE) { + set_max_fast(av, value); + return 1; + } + else + return 0; + + case M_TRIM_THRESHOLD: + av->trim_threshold = value; + return 1; + + case M_TOP_PAD: + av->top_pad = value; + return 1; + + case M_MMAP_THRESHOLD: + av->mmap_threshold = value; + return 1; + + case M_MMAP_MAX: +#if !HAVE_MMAP + if (value != 0) + return 0; +#endif + av->n_mmaps_max = value; + return 1; + + default: + return 0; + } +} + + +/* + -------------------- Alternative MORECORE functions -------------------- +*/ + + +/* + General Requirements for MORECORE. + + The MORECORE function must have the following properties: + + If MORECORE_CONTIGUOUS is false: + + * MORECORE must allocate in multiples of pagesize. It will + only be called with arguments that are multiples of pagesize. + + * MORECORE(0) must return an address that is at least + MALLOC_ALIGNMENT aligned. (Page-aligning always suffices.) + + else (i.e. If MORECORE_CONTIGUOUS is true): + + * Consecutive calls to MORECORE with positive arguments + return increasing addresses, indicating that space has been + contiguously extended. + + * MORECORE need not allocate in multiples of pagesize. + Calls to MORECORE need not have args of multiples of pagesize. + + * MORECORE need not page-align. + + In either case: + + * MORECORE may allocate more memory than requested. (Or even less, + but this will generally result in a malloc failure.) + + * MORECORE must not allocate memory when given argument zero, but + instead return one past the end address of memory from previous + nonzero call. This malloc does NOT call MORECORE(0) + until at least one call with positive arguments is made, so + the initial value returned is not important. + + * Even though consecutive calls to MORECORE need not return contiguous + addresses, it must be OK for malloc'ed chunks to span multiple + regions in those cases where they do happen to be contiguous. + + * MORECORE need not handle negative arguments -- it may instead + just return MORECORE_FAILURE when given negative arguments. + Negative arguments are always multiples of pagesize. MORECORE + must not misinterpret negative args as large positive unsigned + args. You can suppress all such calls from even occurring by defining + MORECORE_CANNOT_TRIM, + + There is some variation across systems about the type of the + argument to sbrk/MORECORE. If size_t is unsigned, then it cannot + actually be size_t, because sbrk supports negative args, so it is + normally the signed type of the same width as size_t (sometimes + declared as "intptr_t", and sometimes "ptrdiff_t"). It doesn't much + matter though. Internally, we use "long" as arguments, which should + work across all reasonable possibilities. + + Additionally, if MORECORE ever returns failure for a positive + request, and HAVE_MMAP is true, then mmap is used as a noncontiguous + system allocator. This is a useful backup strategy for systems with + holes in address spaces -- in this case sbrk cannot contiguously + expand the heap, but mmap may be able to map noncontiguous space. + + If you'd like mmap to ALWAYS be used, you can define MORECORE to be + a function that always returns MORECORE_FAILURE. + + Malloc only has limited ability to detect failures of MORECORE + to supply contiguous space when it says it can. In particular, + multithreaded programs that do not use locks may result in + rece conditions across calls to MORECORE that result in gaps + that cannot be detected as such, and subsequent corruption. + + If you are using this malloc with something other than sbrk (or its + emulation) to supply memory regions, you probably want to set + MORECORE_CONTIGUOUS as false. As an example, here is a custom + allocator kindly contributed for pre-OSX macOS. It uses virtually + but not necessarily physically contiguous non-paged memory (locked + in, present and won't get swapped out). You can use it by + uncommenting this section, adding some #includes, and setting up the + appropriate defines above: + + #define MORECORE osMoreCore + #define MORECORE_CONTIGUOUS 0 + + There is also a shutdown routine that should somehow be called for + cleanup upon program exit. + + #define MAX_POOL_ENTRIES 100 + #define MINIMUM_MORECORE_SIZE (64 * 1024) + static int next_os_pool; + void *our_os_pools[MAX_POOL_ENTRIES]; + + void *osMoreCore(int size) + { + void *ptr = 0; + static void *sbrk_top = 0; + + if (size > 0) + { + if (size < MINIMUM_MORECORE_SIZE) + size = MINIMUM_MORECORE_SIZE; + if (CurrentExecutionLevel() == kTaskLevel) + ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0); + if (ptr == 0) + { + return (void *) MORECORE_FAILURE; + } + // save ptrs so they can be freed during cleanup + our_os_pools[next_os_pool] = ptr; + next_os_pool++; + ptr = (void *) ((((CHUNK_SIZE_T) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK); + sbrk_top = (char *) ptr + size; + return ptr; + } + else if (size < 0) + { + // we don't currently support shrink behavior + return (void *) MORECORE_FAILURE; + } + else + { + return sbrk_top; + } + } + + // cleanup any allocated memory pools + // called as last thing before shutting down driver + + void osCleanupMem(void) + { + void **ptr; + + for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++) + if (*ptr) + { + PoolDeallocate(*ptr); + *ptr = 0; + } + } + +*/ + + +/* + -------------------------------------------------------------- + + Emulation of sbrk for win32. + Donated by J. Walter . + For additional information about this code, and malloc on Win32, see + http://www.genesys-e.de/jwalter/ +*/ + + +#ifdef WIN32 + +#ifdef _DEBUG +/* #define TRACE */ +#endif + +/* Support for USE_MALLOC_LOCK */ +#ifdef USE_MALLOC_LOCK + +/* Wait for spin lock */ +static int slwait (int *sl) { + while (InterlockedCompareExchange ((void **) sl, (void *) 1, (void *) 0) != 0) + Sleep (0); + return 0; +} + +/* Release spin lock */ +static int slrelease (int *sl) { + InterlockedExchange (sl, 0); + return 0; +} + +#ifdef NEEDED +/* Spin lock for emulation code */ +static int g_sl; +#endif + +#endif /* USE_MALLOC_LOCK */ + +/* getpagesize for windows */ +static long getpagesize (void) { + static long g_pagesize = 0; + if (! g_pagesize) { + SYSTEM_INFO system_info; + GetSystemInfo (&system_info); + g_pagesize = system_info.dwPageSize; + } + return g_pagesize; +} +static long getregionsize (void) { + static long g_regionsize = 0; + if (! g_regionsize) { + SYSTEM_INFO system_info; + GetSystemInfo (&system_info); + g_regionsize = system_info.dwAllocationGranularity; + } + return g_regionsize; +} + +/* A region list entry */ +typedef struct _region_list_entry { + void *top_allocated; + void *top_committed; + void *top_reserved; + long reserve_size; + struct _region_list_entry *previous; +} region_list_entry; + +/* Allocate and link a region entry in the region list */ +static int region_list_append (region_list_entry **last, void *base_reserved, long reserve_size) { + region_list_entry *next = HeapAlloc (GetProcessHeap (), 0, sizeof (region_list_entry)); + if (! next) + return FALSE; + next->top_allocated = (char *) base_reserved; + next->top_committed = (char *) base_reserved; + next->top_reserved = (char *) base_reserved + reserve_size; + next->reserve_size = reserve_size; + next->previous = *last; + *last = next; + return TRUE; +} +/* Free and unlink the last region entry from the region list */ +static int region_list_remove (region_list_entry **last) { + region_list_entry *previous = (*last)->previous; + if (! HeapFree (GetProcessHeap (), sizeof (region_list_entry), *last)) + return FALSE; + *last = previous; + return TRUE; +} + +#define CEIL(size,to) (((size)+(to)-1)&~((to)-1)) +#define FLOOR(size,to) ((size)&~((to)-1)) + +#define SBRK_SCALE 0 +/* #define SBRK_SCALE 1 */ +/* #define SBRK_SCALE 2 */ +/* #define SBRK_SCALE 4 */ + +/* sbrk for windows */ +static void *sbrk (long size) { + static long g_pagesize, g_my_pagesize; + static long g_regionsize, g_my_regionsize; + static region_list_entry *g_last; + void *result = (void *) MORECORE_FAILURE; +#ifdef TRACE + printf ("sbrk %d\n", size); +#endif +#if defined (USE_MALLOC_LOCK) && defined (NEEDED) + /* Wait for spin lock */ + slwait (&g_sl); +#endif + /* First time initialization */ + if (! g_pagesize) { + g_pagesize = getpagesize (); + g_my_pagesize = g_pagesize << SBRK_SCALE; + } + if (! g_regionsize) { + g_regionsize = getregionsize (); + g_my_regionsize = g_regionsize << SBRK_SCALE; + } + if (! g_last) { + if (! region_list_append (&g_last, 0, 0)) + goto sbrk_exit; + } + /* Assert invariants */ + assert (g_last); + assert ((char *) g_last->top_reserved - g_last->reserve_size <= (char *) g_last->top_allocated && + g_last->top_allocated <= g_last->top_committed); + assert ((char *) g_last->top_reserved - g_last->reserve_size <= (char *) g_last->top_committed && + g_last->top_committed <= g_last->top_reserved && + (unsigned) g_last->top_committed % g_pagesize == 0); + assert ((unsigned) g_last->top_reserved % g_regionsize == 0); + assert ((unsigned) g_last->reserve_size % g_regionsize == 0); + /* Allocation requested? */ + if (size >= 0) { + /* Allocation size is the requested size */ + long allocate_size = size; + /* Compute the size to commit */ + long to_commit = (char *) g_last->top_allocated + allocate_size - (char *) g_last->top_committed; + /* Do we reach the commit limit? */ + if (to_commit > 0) { + /* Round size to commit */ + long commit_size = CEIL (to_commit, g_my_pagesize); + /* Compute the size to reserve */ + long to_reserve = (char *) g_last->top_committed + commit_size - (char *) g_last->top_reserved; + /* Do we reach the reserve limit? */ + if (to_reserve > 0) { + /* Compute the remaining size to commit in the current region */ + long remaining_commit_size = (char *) g_last->top_reserved - (char *) g_last->top_committed; + if (remaining_commit_size > 0) { + /* Assert preconditions */ + assert ((unsigned) g_last->top_committed % g_pagesize == 0); + assert (0 < remaining_commit_size && remaining_commit_size % g_pagesize == 0); { + /* Commit this */ + void *base_committed = VirtualAlloc (g_last->top_committed, remaining_commit_size, + MEM_COMMIT, PAGE_READWRITE); + /* Check returned pointer for consistency */ + if (base_committed != g_last->top_committed) + goto sbrk_exit; + /* Assert postconditions */ + assert ((unsigned) base_committed % g_pagesize == 0); +#ifdef TRACE + printf ("Commit %p %d\n", base_committed, remaining_commit_size); +#endif + /* Adjust the regions commit top */ + g_last->top_committed = (char *) base_committed + remaining_commit_size; + } + } { + /* Now we are going to search and reserve. */ + int contiguous = -1; + int found = FALSE; + MEMORY_BASIC_INFORMATION memory_info; + void *base_reserved; + long reserve_size; + do { + /* Assume contiguous memory */ + contiguous = TRUE; + /* Round size to reserve */ + reserve_size = CEIL (to_reserve, g_my_regionsize); + /* Start with the current region's top */ + memory_info.BaseAddress = g_last->top_reserved; + /* Assert preconditions */ + assert ((unsigned) memory_info.BaseAddress % g_pagesize == 0); + assert (0 < reserve_size && reserve_size % g_regionsize == 0); + while (VirtualQuery (memory_info.BaseAddress, &memory_info, sizeof (memory_info))) { + /* Assert postconditions */ + assert ((unsigned) memory_info.BaseAddress % g_pagesize == 0); +#ifdef TRACE + printf ("Query %p %d %s\n", memory_info.BaseAddress, memory_info.RegionSize, + memory_info.State == MEM_FREE ? "FREE": + (memory_info.State == MEM_RESERVE ? "RESERVED": + (memory_info.State == MEM_COMMIT ? "COMMITTED": "?"))); +#endif + /* Region is free, well aligned and big enough: we are done */ + if (memory_info.State == MEM_FREE && + (unsigned) memory_info.BaseAddress % g_regionsize == 0 && + memory_info.RegionSize >= (unsigned) reserve_size) { + found = TRUE; + break; + } + /* From now on we can't get contiguous memory! */ + contiguous = FALSE; + /* Recompute size to reserve */ + reserve_size = CEIL (allocate_size, g_my_regionsize); + memory_info.BaseAddress = (char *) memory_info.BaseAddress + memory_info.RegionSize; + /* Assert preconditions */ + assert ((unsigned) memory_info.BaseAddress % g_pagesize == 0); + assert (0 < reserve_size && reserve_size % g_regionsize == 0); + } + /* Search failed? */ + if (! found) + goto sbrk_exit; + /* Assert preconditions */ + assert ((unsigned) memory_info.BaseAddress % g_regionsize == 0); + assert (0 < reserve_size && reserve_size % g_regionsize == 0); + /* Try to reserve this */ + base_reserved = VirtualAlloc (memory_info.BaseAddress, reserve_size, + MEM_RESERVE, PAGE_NOACCESS); + if (! base_reserved) { + int rc = GetLastError (); + if (rc != ERROR_INVALID_ADDRESS) + goto sbrk_exit; + } + /* A null pointer signals (hopefully) a race condition with another thread. */ + /* In this case, we try again. */ + } while (! base_reserved); + /* Check returned pointer for consistency */ + if (memory_info.BaseAddress && base_reserved != memory_info.BaseAddress) + goto sbrk_exit; + /* Assert postconditions */ + assert ((unsigned) base_reserved % g_regionsize == 0); +#ifdef TRACE + printf ("Reserve %p %d\n", base_reserved, reserve_size); +#endif + /* Did we get contiguous memory? */ + if (contiguous) { + long start_size = (char *) g_last->top_committed - (char *) g_last->top_allocated; + /* Adjust allocation size */ + allocate_size -= start_size; + /* Adjust the regions allocation top */ + g_last->top_allocated = g_last->top_committed; + /* Recompute the size to commit */ + to_commit = (char *) g_last->top_allocated + allocate_size - (char *) g_last->top_committed; + /* Round size to commit */ + commit_size = CEIL (to_commit, g_my_pagesize); + } + /* Append the new region to the list */ + if (! region_list_append (&g_last, base_reserved, reserve_size)) + goto sbrk_exit; + /* Didn't we get contiguous memory? */ + if (! contiguous) { + /* Recompute the size to commit */ + to_commit = (char *) g_last->top_allocated + allocate_size - (char *) g_last->top_committed; + /* Round size to commit */ + commit_size = CEIL (to_commit, g_my_pagesize); + } + } + } + /* Assert preconditions */ + assert ((unsigned) g_last->top_committed % g_pagesize == 0); + assert (0 < commit_size && commit_size % g_pagesize == 0); { + /* Commit this */ + void *base_committed = VirtualAlloc (g_last->top_committed, commit_size, + MEM_COMMIT, PAGE_READWRITE); + /* Check returned pointer for consistency */ + if (base_committed != g_last->top_committed) + goto sbrk_exit; + /* Assert postconditions */ + assert ((unsigned) base_committed % g_pagesize == 0); +#ifdef TRACE + printf ("Commit %p %d\n", base_committed, commit_size); +#endif + /* Adjust the regions commit top */ + g_last->top_committed = (char *) base_committed + commit_size; + } + } + /* Adjust the regions allocation top */ + g_last->top_allocated = (char *) g_last->top_allocated + allocate_size; + result = (char *) g_last->top_allocated - size; + /* Deallocation requested? */ + } else if (size < 0) { + long deallocate_size = - size; + /* As long as we have a region to release */ + while ((char *) g_last->top_allocated - deallocate_size < (char *) g_last->top_reserved - g_last->reserve_size) { + /* Get the size to release */ + long release_size = g_last->reserve_size; + /* Get the base address */ + void *base_reserved = (char *) g_last->top_reserved - release_size; + /* Assert preconditions */ + assert ((unsigned) base_reserved % g_regionsize == 0); + assert (0 < release_size && release_size % g_regionsize == 0); { + /* Release this */ + int rc = VirtualFree (base_reserved, 0, + MEM_RELEASE); + /* Check returned code for consistency */ + if (! rc) + goto sbrk_exit; +#ifdef TRACE + printf ("Release %p %d\n", base_reserved, release_size); +#endif + } + /* Adjust deallocation size */ + deallocate_size -= (char *) g_last->top_allocated - (char *) base_reserved; + /* Remove the old region from the list */ + if (! region_list_remove (&g_last)) + goto sbrk_exit; + } { + /* Compute the size to decommit */ + long to_decommit = (char *) g_last->top_committed - ((char *) g_last->top_allocated - deallocate_size); + if (to_decommit >= g_my_pagesize) { + /* Compute the size to decommit */ + long decommit_size = FLOOR (to_decommit, g_my_pagesize); + /* Compute the base address */ + void *base_committed = (char *) g_last->top_committed - decommit_size; + /* Assert preconditions */ + assert ((unsigned) base_committed % g_pagesize == 0); + assert (0 < decommit_size && decommit_size % g_pagesize == 0); { + /* Decommit this */ + int rc = VirtualFree ((char *) base_committed, decommit_size, + MEM_DECOMMIT); + /* Check returned code for consistency */ + if (! rc) + goto sbrk_exit; +#ifdef TRACE + printf ("Decommit %p %d\n", base_committed, decommit_size); +#endif + } + /* Adjust deallocation size and regions commit and allocate top */ + deallocate_size -= (char *) g_last->top_allocated - (char *) base_committed; + g_last->top_committed = base_committed; + g_last->top_allocated = base_committed; + } + } + /* Adjust regions allocate top */ + g_last->top_allocated = (char *) g_last->top_allocated - deallocate_size; + /* Check for underflow */ + if ((char *) g_last->top_reserved - g_last->reserve_size > (char *) g_last->top_allocated || + g_last->top_allocated > g_last->top_committed) { + /* Adjust regions allocate top */ + g_last->top_allocated = (char *) g_last->top_reserved - g_last->reserve_size; + goto sbrk_exit; + } + result = g_last->top_allocated; + } + /* Assert invariants */ + assert (g_last); + assert ((char *) g_last->top_reserved - g_last->reserve_size <= (char *) g_last->top_allocated && + g_last->top_allocated <= g_last->top_committed); + assert ((char *) g_last->top_reserved - g_last->reserve_size <= (char *) g_last->top_committed && + g_last->top_committed <= g_last->top_reserved && + (unsigned) g_last->top_committed % g_pagesize == 0); + assert ((unsigned) g_last->top_reserved % g_regionsize == 0); + assert ((unsigned) g_last->reserve_size % g_regionsize == 0); + +sbrk_exit: +#if defined (USE_MALLOC_LOCK) && defined (NEEDED) + /* Release spin lock */ + slrelease (&g_sl); +#endif + return result; +} + +/* mmap for windows */ +static void *mmap (void *ptr, long size, long prot, long type, long handle, long arg) { + static long g_pagesize; + static long g_regionsize; +#ifdef TRACE + printf ("mmap %d\n", size); +#endif +#if defined (USE_MALLOC_LOCK) && defined (NEEDED) + /* Wait for spin lock */ + slwait (&g_sl); +#endif + /* First time initialization */ + if (! g_pagesize) + g_pagesize = getpagesize (); + if (! g_regionsize) + g_regionsize = getregionsize (); + /* Assert preconditions */ + assert ((unsigned) ptr % g_regionsize == 0); + assert (size % g_pagesize == 0); + /* Allocate this */ + ptr = VirtualAlloc (ptr, size, + MEM_RESERVE | MEM_COMMIT | MEM_TOP_DOWN, PAGE_READWRITE); + if (! ptr) { + ptr = (void *) MORECORE_FAILURE; + goto mmap_exit; + } + /* Assert postconditions */ + assert ((unsigned) ptr % g_regionsize == 0); +#ifdef TRACE + printf ("Commit %p %d\n", ptr, size); +#endif +mmap_exit: +#if defined (USE_MALLOC_LOCK) && defined (NEEDED) + /* Release spin lock */ + slrelease (&g_sl); +#endif + return ptr; +} + +/* munmap for windows */ +static long munmap (void *ptr, long size) { + static long g_pagesize; + static long g_regionsize; + int rc = MUNMAP_FAILURE; +#ifdef TRACE + printf ("munmap %p %d\n", ptr, size); +#endif +#if defined (USE_MALLOC_LOCK) && defined (NEEDED) + /* Wait for spin lock */ + slwait (&g_sl); +#endif + /* First time initialization */ + if (! g_pagesize) + g_pagesize = getpagesize (); + if (! g_regionsize) + g_regionsize = getregionsize (); + /* Assert preconditions */ + assert ((unsigned) ptr % g_regionsize == 0); + assert (size % g_pagesize == 0); + /* Free this */ + if (! VirtualFree (ptr, 0, + MEM_RELEASE)) + goto munmap_exit; + rc = 0; +#ifdef TRACE + printf ("Release %p %d\n", ptr, size); +#endif +munmap_exit: +#if defined (USE_MALLOC_LOCK) && defined (NEEDED) + /* Release spin lock */ + slrelease (&g_sl); +#endif + return rc; +} + +static void vminfo (CHUNK_SIZE_T *free, CHUNK_SIZE_T *reserved, CHUNK_SIZE_T *committed) { + MEMORY_BASIC_INFORMATION memory_info; + memory_info.BaseAddress = 0; + *free = *reserved = *committed = 0; + while (VirtualQuery (memory_info.BaseAddress, &memory_info, sizeof (memory_info))) { + switch (memory_info.State) { + case MEM_FREE: + *free += memory_info.RegionSize; + break; + case MEM_RESERVE: + *reserved += memory_info.RegionSize; + break; + case MEM_COMMIT: + *committed += memory_info.RegionSize; + break; + } + memory_info.BaseAddress = (char *) memory_info.BaseAddress + memory_info.RegionSize; + } +} + +static int cpuinfo (int whole, CHUNK_SIZE_T *kernel, CHUNK_SIZE_T *user) { + if (whole) { + __int64 creation64, exit64, kernel64, user64; + int rc = GetProcessTimes (GetCurrentProcess (), + (FILETIME *) &creation64, + (FILETIME *) &exit64, + (FILETIME *) &kernel64, + (FILETIME *) &user64); + if (! rc) { + *kernel = 0; + *user = 0; + return FALSE; + } + *kernel = (CHUNK_SIZE_T) (kernel64 / 10000); + *user = (CHUNK_SIZE_T) (user64 / 10000); + return TRUE; + } else { + __int64 creation64, exit64, kernel64, user64; + int rc = GetThreadTimes (GetCurrentThread (), + (FILETIME *) &creation64, + (FILETIME *) &exit64, + (FILETIME *) &kernel64, + (FILETIME *) &user64); + if (! rc) { + *kernel = 0; + *user = 0; + return FALSE; + } + *kernel = (CHUNK_SIZE_T) (kernel64 / 10000); + *user = (CHUNK_SIZE_T) (user64 / 10000); + return TRUE; + } +} + +#endif /* WIN32 */ + +/* ------------------------------------------------------------ +History: + V2.7.2 Sat Aug 17 09:07:30 2002 Doug Lea (dl at gee) + * Fix malloc_state bitmap array misdeclaration + + V2.7.1 Thu Jul 25 10:58:03 2002 Doug Lea (dl at gee) + * Allow tuning of FIRST_SORTED_BIN_SIZE + * Use PTR_UINT as type for all ptr->int casts. Thanks to John Belmonte. + * Better detection and support for non-contiguousness of MORECORE. + Thanks to Andreas Mueller, Conal Walsh, and Wolfram Gloger + * Bypass most of malloc if no frees. Thanks To Emery Berger. + * Fix freeing of old top non-contiguous chunk im sysmalloc. + * Raised default trim and map thresholds to 256K. + * Fix mmap-related #defines. Thanks to Lubos Lunak. + * Fix copy macros; added LACKS_FCNTL_H. Thanks to Neal Walfield. + * Branch-free bin calculation + * Default trim and mmap thresholds now 256K. + + V2.7.0 Sun Mar 11 14:14:06 2001 Doug Lea (dl at gee) + * Introduce independent_comalloc and independent_calloc. + Thanks to Michael Pachos for motivation and help. + * Make optional .h file available + * Allow > 2GB requests on 32bit systems. + * new WIN32 sbrk, mmap, munmap, lock code from . + Thanks also to Andreas Mueller , + and Anonymous. + * Allow override of MALLOC_ALIGNMENT (Thanks to Ruud Waij for + helping test this.) + * memalign: check alignment arg + * realloc: don't try to shift chunks backwards, since this + leads to more fragmentation in some programs and doesn't + seem to help in any others. + * Collect all cases in malloc requiring system memory into sYSMALLOc + * Use mmap as backup to sbrk + * Place all internal state in malloc_state + * Introduce fastbins (although similar to 2.5.1) + * Many minor tunings and cosmetic improvements + * Introduce USE_PUBLIC_MALLOC_WRAPPERS, USE_MALLOC_LOCK + * Introduce MALLOC_FAILURE_ACTION, MORECORE_CONTIGUOUS + Thanks to Tony E. Bennett and others. + * Include errno.h to support default failure action. + + V2.6.6 Sun Dec 5 07:42:19 1999 Doug Lea (dl at gee) + * return null for negative arguments + * Added Several WIN32 cleanups from Martin C. Fong + * Add 'LACKS_SYS_PARAM_H' for those systems without 'sys/param.h' + (e.g. WIN32 platforms) + * Cleanup header file inclusion for WIN32 platforms + * Cleanup code to avoid Microsoft Visual C++ compiler complaints + * Add 'USE_DL_PREFIX' to quickly allow co-existence with existing + memory allocation routines + * Set 'malloc_getpagesize' for WIN32 platforms (needs more work) + * Use 'assert' rather than 'ASSERT' in WIN32 code to conform to + usage of 'assert' in non-WIN32 code + * Improve WIN32 'sbrk()' emulation's 'findRegion()' routine to + avoid infinite loop + * Always call 'fREe()' rather than 'free()' + + V2.6.5 Wed Jun 17 15:57:31 1998 Doug Lea (dl at gee) + * Fixed ordering problem with boundary-stamping + + V2.6.3 Sun May 19 08:17:58 1996 Doug Lea (dl at gee) + * Added pvalloc, as recommended by H.J. Liu + * Added 64bit pointer support mainly from Wolfram Gloger + * Added anonymously donated WIN32 sbrk emulation + * Malloc, calloc, getpagesize: add optimizations from Raymond Nijssen + * malloc_extend_top: fix mask error that caused wastage after + foreign sbrks + * Add linux mremap support code from HJ Liu + + V2.6.2 Tue Dec 5 06:52:55 1995 Doug Lea (dl at gee) + * Integrated most documentation with the code. + * Add support for mmap, with help from + Wolfram Gloger (Gloger@lrz.uni-muenchen.de). + * Use last_remainder in more cases. + * Pack bins using idea from colin@nyx10.cs.du.edu + * Use ordered bins instead of best-fit threshhold + * Eliminate block-local decls to simplify tracing and debugging. + * Support another case of realloc via move into top + * Fix error occuring when initial sbrk_base not word-aligned. + * Rely on page size for units instead of SBRK_UNIT to + avoid surprises about sbrk alignment conventions. + * Add mallinfo, mallopt. Thanks to Raymond Nijssen + (raymond@es.ele.tue.nl) for the suggestion. + * Add `pad' argument to malloc_trim and top_pad mallopt parameter. + * More precautions for cases where other routines call sbrk, + courtesy of Wolfram Gloger (Gloger@lrz.uni-muenchen.de). + * Added macros etc., allowing use in linux libc from + H.J. Lu (hjl@gnu.ai.mit.edu) + * Inverted this history list + + V2.6.1 Sat Dec 2 14:10:57 1995 Doug Lea (dl at gee) + * Re-tuned and fixed to behave more nicely with V2.6.0 changes. + * Removed all preallocation code since under current scheme + the work required to undo bad preallocations exceeds + the work saved in good cases for most test programs. + * No longer use return list or unconsolidated bins since + no scheme using them consistently outperforms those that don't + given above changes. + * Use best fit for very large chunks to prevent some worst-cases. + * Added some support for debugging + + V2.6.0 Sat Nov 4 07:05:23 1995 Doug Lea (dl at gee) + * Removed footers when chunks are in use. Thanks to + Paul Wilson (wilson@cs.texas.edu) for the suggestion. + + V2.5.4 Wed Nov 1 07:54:51 1995 Doug Lea (dl at gee) + * Added malloc_trim, with help from Wolfram Gloger + (wmglo@Dent.MED.Uni-Muenchen.DE). + + V2.5.3 Tue Apr 26 10:16:01 1994 Doug Lea (dl at g) + + V2.5.2 Tue Apr 5 16:20:40 1994 Doug Lea (dl at g) + * realloc: try to expand in both directions + * malloc: swap order of clean-bin strategy; + * realloc: only conditionally expand backwards + * Try not to scavenge used bins + * Use bin counts as a guide to preallocation + * Occasionally bin return list chunks in first scan + * Add a few optimizations from colin@nyx10.cs.du.edu + + V2.5.1 Sat Aug 14 15:40:43 1993 Doug Lea (dl at g) + * faster bin computation & slightly different binning + * merged all consolidations to one part of malloc proper + (eliminating old malloc_find_space & malloc_clean_bin) + * Scan 2 returns chunks (not just 1) + * Propagate failure in realloc if malloc returns 0 + * Add stuff to allow compilation on non-ANSI compilers + from kpv@research.att.com + + V2.5 Sat Aug 7 07:41:59 1993 Doug Lea (dl at g.oswego.edu) + * removed potential for odd address access in prev_chunk + * removed dependency on getpagesize.h + * misc cosmetics and a bit more internal documentation + * anticosmetics: mangled names in macros to evade debugger strangeness + * tested on sparc, hp-700, dec-mips, rs6000 + with gcc & native cc (hp, dec only) allowing + Detlefs & Zorn comparison study (in SIGPLAN Notices.) + + Trial version Fri Aug 28 13:14:29 1992 Doug Lea (dl at g.oswego.edu) + * Based loosely on libg++-1.2X malloc. (It retains some of the overall + structure of old version, but most details differ.) + +*/ diff --git a/mini-os/lib/math.c b/mini-os/lib/math.c new file mode 100644 index 0000000000..29e1cc933e --- /dev/null +++ b/mini-os/lib/math.c @@ -0,0 +1,385 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: math.c + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: + * + * Date: Aug 2003 + * + * Environment: Xen Minimal OS + * Description: Library functions for 64bit arith and other + * from freebsd, files in sys/libkern/ (qdivrem.c, etc) + * + **************************************************************************** + * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $ + **************************************************************************** + *- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/libkern/divdi3.c,v 1.6 1999/08/28 00:46:31 peter Exp $ +*/ + +#include + +/* + * Depending on the desired operation, we view a `long long' (aka quad_t) in + * one or more of the following formats. + */ +union uu { + s64 q; /* as a (signed) quad */ + s64 uq; /* as an unsigned quad */ + long sl[2]; /* as two signed longs */ + unsigned long ul[2]; /* as two unsigned longs */ +}; +/* XXX RN: Yuck hardcoded endianess :) */ +#define _QUAD_HIGHWORD 1 +#define _QUAD_LOWWORD 0 +/* + * Define high and low longwords. + */ +#define H _QUAD_HIGHWORD +#define L _QUAD_LOWWORD + +/* + * Total number of bits in a quad_t and in the pieces that make it up. + * These are used for shifting, and also below for halfword extraction + * and assembly. + */ +#define CHAR_BIT 8 /* number of bits in a char */ +#define QUAD_BITS (sizeof(s64) * CHAR_BIT) +#define LONG_BITS (sizeof(long) * CHAR_BIT) +#define HALF_BITS (sizeof(long) * CHAR_BIT / 2) + +/* + * Extract high and low shortwords from longword, and move low shortword of + * longword to upper half of long, i.e., produce the upper longword of + * ((quad_t)(x) << (number_of_bits_in_long/2)). (`x' must actually be u_long.) + * + * These are used in the multiply code, to split a longword into upper + * and lower halves, and to reassemble a product as a quad_t, shifted left + * (sizeof(long)*CHAR_BIT/2). + */ +#define HHALF(x) ((x) >> HALF_BITS) +#define LHALF(x) ((x) & ((1 << HALF_BITS) - 1)) +#define LHUP(x) ((x) << HALF_BITS) + +/* + * Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed), + * section 4.3.1, pp. 257--259. + */ +#define B (1 << HALF_BITS) /* digit base */ + +/* Combine two `digits' to make a single two-digit number. */ +#define COMBINE(a, b) (((u_long)(a) << HALF_BITS) | (b)) + +/* select a type for digits in base B: use unsigned short if they fit */ +#if ULONG_MAX == 0xffffffff && USHRT_MAX >= 0xffff +typedef unsigned short digit; +#else +typedef u_long digit; +#endif + + +/* + * Shift p[0]..p[len] left `sh' bits, ignoring any bits that + * `fall out' the left (there never will be any such anyway). + * We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS. + */ +static void +shl(register digit *p, register int len, register int sh) +{ + register int i; + + for (i = 0; i < len; i++) + p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh)); + p[i] = LHALF(p[i] << sh); +} + +/* + * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v. + * + * We do this in base 2-sup-HALF_BITS, so that all intermediate products + * fit within u_long. As a consequence, the maximum length dividend and + * divisor are 4 `digits' in this base (they are shorter if they have + * leading zeros). + */ +u64 +__qdivrem(uq, vq, arq) + u64 uq, vq, *arq; +{ + union uu tmp; + digit *u, *v, *q; + register digit v1, v2; + u_long qhat, rhat, t; + int m, n, d, j, i; + digit uspace[5], vspace[5], qspace[5]; + + /* + * Take care of special cases: divide by zero, and u < v. + */ + if (vq == 0) { + /* divide by zero. */ + static volatile const unsigned int zero = 0; + + tmp.ul[H] = tmp.ul[L] = 1 / zero; + if (arq) + *arq = uq; + return (tmp.q); + } + if (uq < vq) { + if (arq) + *arq = uq; + return (0); + } + u = &uspace[0]; + v = &vspace[0]; + q = &qspace[0]; + + /* + * Break dividend and divisor into digits in base B, then + * count leading zeros to determine m and n. When done, we + * will have: + * u = (u[1]u[2]...u[m+n]) sub B + * v = (v[1]v[2]...v[n]) sub B + * v[1] != 0 + * 1 < n <= 4 (if n = 1, we use a different division algorithm) + * m >= 0 (otherwise u < v, which we already checked) + * m + n = 4 + * and thus + * m = 4 - n <= 2 + */ + tmp.uq = uq; + u[0] = 0; + u[1] = HHALF(tmp.ul[H]); + u[2] = LHALF(tmp.ul[H]); + u[3] = HHALF(tmp.ul[L]); + u[4] = LHALF(tmp.ul[L]); + tmp.uq = vq; + v[1] = HHALF(tmp.ul[H]); + v[2] = LHALF(tmp.ul[H]); + v[3] = HHALF(tmp.ul[L]); + v[4] = LHALF(tmp.ul[L]); + for (n = 4; v[1] == 0; v++) { + if (--n == 1) { + u_long rbj; /* r*B+u[j] (not root boy jim) */ + digit q1, q2, q3, q4; + + /* + * Change of plan, per exercise 16. + * r = 0; + * for j = 1..4: + * q[j] = floor((r*B + u[j]) / v), + * r = (r*B + u[j]) % v; + * We unroll this completely here. + */ + t = v[2]; /* nonzero, by definition */ + q1 = u[1] / t; + rbj = COMBINE(u[1] % t, u[2]); + q2 = rbj / t; + rbj = COMBINE(rbj % t, u[3]); + q3 = rbj / t; + rbj = COMBINE(rbj % t, u[4]); + q4 = rbj / t; + if (arq) + *arq = rbj % t; + tmp.ul[H] = COMBINE(q1, q2); + tmp.ul[L] = COMBINE(q3, q4); + return (tmp.q); + } + } + + /* + * By adjusting q once we determine m, we can guarantee that + * there is a complete four-digit quotient at &qspace[1] when + * we finally stop. + */ + for (m = 4 - n; u[1] == 0; u++) + m--; + for (i = 4 - m; --i >= 0;) + q[i] = 0; + q += 4 - m; + + /* + * Here we run Program D, translated from MIX to C and acquiring + * a few minor changes. + * + * D1: choose multiplier 1 << d to ensure v[1] >= B/2. + */ + d = 0; + for (t = v[1]; t < B / 2; t <<= 1) + d++; + if (d > 0) { + shl(&u[0], m + n, d); /* u <<= d */ + shl(&v[1], n - 1, d); /* v <<= d */ + } + /* + * D2: j = 0. + */ + j = 0; + v1 = v[1]; /* for D3 -- note that v[1..n] are constant */ + v2 = v[2]; /* for D3 */ + do { + register digit uj0, uj1, uj2; + + /* + * D3: Calculate qhat (\^q, in TeX notation). + * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and + * let rhat = (u[j]*B + u[j+1]) mod v[1]. + * While rhat < B and v[2]*qhat > rhat*B+u[j+2], + * decrement qhat and increase rhat correspondingly. + * Note that if rhat >= B, v[2]*qhat < rhat*B. + */ + uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */ + uj1 = u[j + 1]; /* for D3 only */ + uj2 = u[j + 2]; /* for D3 only */ + if (uj0 == v1) { + qhat = B; + rhat = uj1; + goto qhat_too_big; + } else { + u_long nn = COMBINE(uj0, uj1); + qhat = nn / v1; + rhat = nn % v1; + } + while (v2 * qhat > COMBINE(rhat, uj2)) { + qhat_too_big: + qhat--; + if ((rhat += v1) >= B) + break; + } + /* + * D4: Multiply and subtract. + * The variable `t' holds any borrows across the loop. + * We split this up so that we do not require v[0] = 0, + * and to eliminate a final special case. + */ + for (t = 0, i = n; i > 0; i--) { + t = u[i + j] - v[i] * qhat - t; + u[i + j] = LHALF(t); + t = (B - HHALF(t)) & (B - 1); + } + t = u[j] - t; + u[j] = LHALF(t); + /* + * D5: test remainder. + * There is a borrow if and only if HHALF(t) is nonzero; + * in that (rare) case, qhat was too large (by exactly 1). + * Fix it by adding v[1..n] to u[j..j+n]. + */ + if (HHALF(t)) { + qhat--; + for (t = 0, i = n; i > 0; i--) { /* D6: add back. */ + t += u[i + j] + v[i]; + u[i + j] = LHALF(t); + t = HHALF(t); + } + u[j] = LHALF(u[j] + t); + } + q[j] = qhat; + } while (++j <= m); /* D7: loop on j. */ + + /* + * If caller wants the remainder, we have to calculate it as + * u[m..m+n] >> d (this is at most n digits and thus fits in + * u[m+1..m+n], but we may need more source digits). + */ + if (arq) { + if (d) { + for (i = m + n; i > m; --i) + u[i] = (u[i] >> d) | + LHALF(u[i - 1] << (HALF_BITS - d)); + u[i] = 0; + } + tmp.ul[H] = COMBINE(uspace[1], uspace[2]); + tmp.ul[L] = COMBINE(uspace[3], uspace[4]); + *arq = tmp.q; + } + + tmp.ul[H] = COMBINE(qspace[1], qspace[2]); + tmp.ul[L] = COMBINE(qspace[3], qspace[4]); + return (tmp.q); +} + + +/* + * Divide two signed quads. + * ??? if -1/2 should produce -1 on this machine, this code is wrong + */ +s64 +__divdi3(s64 a, s64 b) +{ + u64 ua, ub, uq; + int neg; + + if (a < 0) + ua = -(u64)a, neg = 1; + else + ua = a, neg = 0; + if (b < 0) + ub = -(u64)b, neg ^= 1; + else + ub = b; + uq = __qdivrem(ua, ub, (u64 *)0); + return (neg ? -uq : uq); +} + +/* + * Divide two unsigned quads. + */ +u64 +__udivdi3(a, b) + u64 a, b; +{ + return (__qdivrem(a, b, (u64 *)0)); +} + + +/* + * Return remainder after dividing two unsigned quads. + */ +u_quad_t +__umoddi3(a, b) + u_quad_t a, b; +{ + u_quad_t r; + + (void)__qdivrem(a, b, &r); + return (r); +} + diff --git a/mini-os/lib/printf.c b/mini-os/lib/printf.c new file mode 100644 index 0000000000..bd7beba2c5 --- /dev/null +++ b/mini-os/lib/printf.c @@ -0,0 +1,470 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: printf.c + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: + * + * Date: Aug 2003 + * + * Environment: Xen Minimal OS + * Description: Library functions for printing + * (freebsd port, mainly sys/subr_prf.c) + * + **************************************************************************** + * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $ + **************************************************************************** + * + *- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/libkern/divdi3.c,v 1.6 1999/08/28 00:46:31 peter Exp $ + */ + +#include +#include +#include +#include + +/**************************************************************************** + * RN: printf family of routines + * taken mainly from sys/subr_prf.c + ****************************************************************************/ +char const hex2ascii_data[] = "0123456789abcdefghijklmnopqrstuvwxyz"; +#define hex2ascii(hex) (hex2ascii_data[hex]) +#define NBBY 8 /* number of bits in a byte */ +#define MAXNBUF (sizeof(quad_t) * NBBY + 1) + +static int kvprintf(char const *fmt, void *arg, int radix, va_list ap); + + +int +printf(const char *fmt, ...) +{ + va_list ap; + int retval; + static char printk_buf[1024]; + + va_start(ap, fmt); + retval = kvprintf(fmt, printk_buf, 10, ap); + printk_buf[retval] = '\0'; + va_end(ap); + (void)HYPERVISOR_console_write(printk_buf, strlen(printk_buf)); + return retval; +} + +int +vprintf(const char *fmt, va_list ap) +{ + int retval; + static char printk_buf[1024]; + retval = kvprintf(fmt, printk_buf, 10, ap); + printk_buf[retval] = '\0'; + (void)HYPERVISOR_console_write(printk_buf, strlen(printk_buf)); + return retval; +} + +int +sprintf(char *buf, const char *cfmt, ...) +{ + int retval; + va_list ap; + + va_start(ap, cfmt); + retval = kvprintf(cfmt, (void *)buf, 10, ap); + buf[retval] = '\0'; + va_end(ap); + return retval; +} + +int +vsprintf(char *buf, const char *cfmt, va_list ap) +{ + int retval; + + retval = kvprintf(cfmt, (void *)buf, 10, ap); + buf[retval] = '\0'; + return retval; +} + + +/* + * Put a NUL-terminated ASCII number (base <= 36) in a buffer in reverse + * order; return an optional length and a pointer to the last character + * written in the buffer (i.e., the first character of the string). + * The buffer pointed to by `nbuf' must have length >= MAXNBUF. + */ +static char * +ksprintn(nbuf, ul, base, lenp) + char *nbuf; + u_long ul; + int base, *lenp; +{ + char *p; + + p = nbuf; + *p = '\0'; + do { + *++p = hex2ascii(ul % base); + } while (ul /= base); + if (lenp) + *lenp = p - nbuf; + return (p); +} +/* ksprintn, but for a quad_t. */ +static char * +ksprintqn(nbuf, uq, base, lenp) + char *nbuf; + u_quad_t uq; + int base, *lenp; +{ + char *p; + + p = nbuf; + *p = '\0'; + do { + *++p = hex2ascii(uq % base); + } while (uq /= base); + if (lenp) + *lenp = p - nbuf; + return (p); +} + +/* + * Scaled down version of printf(3). + * + * Two additional formats: + * + * The format %b is supported to decode error registers. + * Its usage is: + * + * printf("reg=%b\n", regval, "*"); + * + * where is the output base expressed as a control character, e.g. + * \10 gives octal; \20 gives hex. Each arg is a sequence of characters, + * the first of which gives the bit number to be inspected (origin 1), and + * the next characters (up to a control character, i.e. a character <= 32), + * give the name of the register. Thus: + * + * kvprintf("reg=%b\n", 3, "\10\2BITTWO\1BITONE\n"); + * + * would produce output: + * + * reg=3 + * + * XXX: %D -- Hexdump, takes pointer and separator string: + * ("%6D", ptr, ":") -> XX:XX:XX:XX:XX:XX + * ("%*D", len, ptr, " " -> XX XX XX XX ... + */ + +/* RN: This normally takes a function for output. + * we always print to a string and the use HYPERCALL for write to console */ +static int +kvprintf(char const *fmt, void *arg, int radix, va_list ap) +{ + +#define PCHAR(c) {int cc=(c); *d++ = cc; retval++; } + + char nbuf[MAXNBUF]; + char *p, *q, *d; + u_char *up; + int ch, n; + u_long ul; + u_quad_t uq; + int base, lflag, qflag, tmp, width, ladjust, sharpflag, neg, sign, dot; + int dwidth; + char padc; + int retval = 0; + + ul = 0; + uq = 0; + d = (char *) arg; + + if (fmt == NULL) + fmt = "(fmt null)\n"; + + if (radix < 2 || radix > 36) + radix = 10; + + for (;;) { + padc = ' '; + width = 0; + while ((ch = (u_char)*fmt++) != '%') { + if (ch == '\0') + return retval; + PCHAR(ch); + } + qflag = 0; lflag = 0; ladjust = 0; sharpflag = 0; neg = 0; + sign = 0; dot = 0; dwidth = 0; +reswitch: switch (ch = (u_char)*fmt++) { + case '.': + dot = 1; + goto reswitch; + case '#': + sharpflag = 1; + goto reswitch; + case '+': + sign = 1; + goto reswitch; + case '-': + ladjust = 1; + goto reswitch; + case '%': + PCHAR(ch); + break; + case '*': + if (!dot) { + width = va_arg(ap, int); + if (width < 0) { + ladjust = !ladjust; + width = -width; + } + } else { + dwidth = va_arg(ap, int); + } + goto reswitch; + case '0': + if (!dot) { + padc = '0'; + goto reswitch; + } + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + for (n = 0;; ++fmt) { + n = n * 10 + ch - '0'; + ch = *fmt; + if (ch < '0' || ch > '9') + break; + } + if (dot) + dwidth = n; + else + width = n; + goto reswitch; + case 'b': + ul = va_arg(ap, int); + p = va_arg(ap, char *); + for (q = ksprintn(nbuf, ul, *p++, NULL); *q;) + PCHAR(*q--); + + if (!ul) + break; + + for (tmp = 0; *p;) { + n = *p++; + if (ul & (1 << (n - 1))) { + PCHAR(tmp ? ',' : '<'); + for (; (n = *p) > ' '; ++p) + PCHAR(n); + tmp = 1; + } else + for (; *p > ' '; ++p) + continue; + } + if (tmp) + PCHAR('>'); + break; + case 'c': + PCHAR(va_arg(ap, int)); + break; + case 'D': + up = va_arg(ap, u_char *); + p = va_arg(ap, char *); + if (!width) + width = 16; + while(width--) { + PCHAR(hex2ascii(*up >> 4)); + PCHAR(hex2ascii(*up & 0x0f)); + up++; + if (width) + for (q=p;*q;q++) + PCHAR(*q); + } + break; + case 'd': + if (qflag) + uq = va_arg(ap, quad_t); + else if (lflag) + ul = va_arg(ap, long); + else + ul = va_arg(ap, int); + sign = 1; + base = 10; + goto number; + case 'l': + if (lflag) { + lflag = 0; + qflag = 1; + } else + lflag = 1; + goto reswitch; + case 'o': + if (qflag) + uq = va_arg(ap, u_quad_t); + else if (lflag) + ul = va_arg(ap, u_long); + else + ul = va_arg(ap, u_int); + base = 8; + goto nosign; + case 'p': + ul = (uintptr_t)va_arg(ap, void *); + base = 16; + sharpflag = (width == 0); + goto nosign; + case 'q': + qflag = 1; + goto reswitch; + case 'n': + case 'r': + if (qflag) + uq = va_arg(ap, u_quad_t); + else if (lflag) + ul = va_arg(ap, u_long); + else + ul = sign ? + (u_long)va_arg(ap, int) : va_arg(ap, u_int); + base = radix; + goto number; + case 's': + p = va_arg(ap, char *); + if (p == NULL) + p = "(null)"; + if (!dot) + n = strlen (p); + else + for (n = 0; n < dwidth && p[n]; n++) + continue; + + width -= n; + + if (!ladjust && width > 0) + while (width--) + PCHAR(padc); + while (n--) + PCHAR(*p++); + if (ladjust && width > 0) + while (width--) + PCHAR(padc); + break; + case 'u': + if (qflag) + uq = va_arg(ap, u_quad_t); + else if (lflag) + ul = va_arg(ap, u_long); + else + ul = va_arg(ap, u_int); + base = 10; + goto nosign; + case 'x': + case 'X': + if (qflag) + uq = va_arg(ap, u_quad_t); + else if (lflag) + ul = va_arg(ap, u_long); + else + ul = va_arg(ap, u_int); + base = 16; + goto nosign; + case 'z': + if (qflag) + uq = va_arg(ap, u_quad_t); + else if (lflag) + ul = va_arg(ap, u_long); + else + ul = sign ? + (u_long)va_arg(ap, int) : va_arg(ap, u_int); + base = 16; + goto number; +nosign: sign = 0; +number: + if (qflag) { + if (sign && (quad_t)uq < 0) { + neg = 1; + uq = -(quad_t)uq; + } + p = ksprintqn(nbuf, uq, base, &tmp); + } else { + if (sign && (long)ul < 0) { + neg = 1; + ul = -(long)ul; + } + p = ksprintn(nbuf, ul, base, &tmp); + } + if (sharpflag && (qflag ? uq != 0 : ul != 0)) { + if (base == 8) + tmp++; + else if (base == 16) + tmp += 2; + } + if (neg) + tmp++; + + if (!ladjust && width && (width -= tmp) > 0) + while (width--) + PCHAR(padc); + if (neg) + PCHAR('-'); + if (sharpflag && (qflag ? uq != 0 : ul != 0)) { + if (base == 8) { + PCHAR('0'); + } else if (base == 16) { + PCHAR('0'); + PCHAR('x'); + } + } + + while (*p) + PCHAR(*p--); + + if (ladjust && width && (width -= tmp) > 0) + while (width--) + PCHAR(padc); + + break; + default: + PCHAR('%'); + if (lflag) + PCHAR('l'); + PCHAR(ch); + break; + } + } +#undef PCHAR +} + diff --git a/mini-os/lib/string.c b/mini-os/lib/string.c new file mode 100644 index 0000000000..c1b9dbfc8e --- /dev/null +++ b/mini-os/lib/string.c @@ -0,0 +1,142 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: string.c + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: + * + * Date: Aug 2003 + * + * Environment: Xen Minimal OS + * Description: Library function for string and memory manipulation + * Origin unknown + * + **************************************************************************** + * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $ + **************************************************************************** + */ + +#include +#include +#include + +int memcmp(const void * cs,const void * ct,size_t count) +{ + const unsigned char *su1, *su2; + signed char res = 0; + + for( su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--) + if ((res = *su1 - *su2) != 0) + break; + return res; +} + +void * memcpy(void * dest,const void *src,size_t count) +{ + char *tmp = (char *) dest, *s = (char *) src; + + while (count--) + *tmp++ = *s++; + + return dest; +} + +int strncmp(const char * cs,const char * ct,size_t count) +{ + register signed char __res = 0; + + while (count) { + if ((__res = *cs - *ct++) != 0 || !*cs++) + break; + count--; + } + + return __res; +} + +int strcmp(const char * cs,const char * ct) +{ + register signed char __res; + + while (1) { + if ((__res = *cs - *ct++) != 0 || !*cs++) + break; + } + + return __res; +} + +char * strcpy(char * dest,const char *src) +{ + char *tmp = dest; + + while ((*dest++ = *src++) != '\0') + /* nothing */; + return tmp; +} + +char * strncpy(char * dest,const char *src,size_t count) +{ + char *tmp = dest; + + while (count-- && (*dest++ = *src++) != '\0') + /* nothing */; + + return tmp; +} + +void * memset(void * s,int c,size_t count) +{ + char *xs = (char *) s; + + while (count--) + *xs++ = c; + + return s; +} + +size_t strnlen(const char * s, size_t count) +{ + const char *sc; + + for (sc = s; count-- && *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} + +size_t strlen(const char * s) +{ + const char *sc; + + for (sc = s; *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} + +char * strchr(const char * s, int c) +{ + for(; *s != (char) c; ++s) + if (*s == '\0') + return NULL; + return (char *) s; +} + +char * strstr(const char * s1,const char * s2) +{ + int l1, l2; + + l2 = strlen(s2); + if (!l2) + return (char *) s1; + l1 = strlen(s1); + while (l1 >= l2) { + l1--; + if (!memcmp(s1,s2,l2)) + return (char *) s1; + s1++; + } + return NULL; +} + diff --git a/mini-os/mm.c b/mini-os/mm.c new file mode 100644 index 0000000000..8ded35cc66 --- /dev/null +++ b/mini-os/mm.c @@ -0,0 +1,375 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: mm.c + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: + * + * Date: Aug 2003 + * + * Environment: Xen Minimal OS + * Description: memory management related functions + * contains buddy page allocator from Xen. + * + **************************************************************************** + * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $ + **************************************************************************** + */ + + +#include +#include +#include +#include +#include + +unsigned long *phys_to_machine_mapping; +extern char *stack; +extern char _text, _etext, _edata, _end; + +static void init_page_allocator(unsigned long min, unsigned long max); + +void init_mm(void) +{ + + unsigned long start_pfn, max_pfn, max_free_pfn; + + unsigned long *pgd = (unsigned long *)start_info.pt_base; + + printk("MM: Init\n"); + + printk(" _text: %p\n", &_text); + printk(" _etext: %p\n", &_etext); + printk(" _edata: %p\n", &_edata); + printk(" stack start: %p\n", &stack); + printk(" _end: %p\n", &_end); + + /* set up minimal memory infos */ + start_pfn = PFN_UP(__pa(&_end)); + max_pfn = start_info.nr_pages; + + printk(" start_pfn: %lx\n", start_pfn); + printk(" max_pfn: %lx\n", max_pfn); + + /* + * we know where free tables start (start_pfn) and how many we + * have (max_pfn). + * + * Currently the hypervisor stores page tables it providesin the + * high region of the this memory range. + * + * next we work out how far down this goes (max_free_pfn) + * + * XXX this assumes the hypervisor provided page tables to be in + * the upper region of our initial memory. I don't know if this + * is always true. + */ + + max_free_pfn = PFN_DOWN(__pa(pgd)); + { + unsigned long *pgd = (unsigned long *)start_info.pt_base; + unsigned long pte; + int i; + printk(" pgd(pa(pgd)): %lx(%lx)", (u_long)pgd, __pa(pgd)); + + for ( i = 0; i < (HYPERVISOR_VIRT_START>>22); i++ ) + { + unsigned long pgde = *pgd++; + if ( !(pgde & 1) ) continue; + pte = machine_to_phys(pgde & PAGE_MASK); + printk(" PT(%x): %lx(%lx)", i, (u_long)__va(pte), pte); + if (PFN_DOWN(pte) <= max_free_pfn) + max_free_pfn = PFN_DOWN(pte); + } + } + max_free_pfn--; + printk(" max_free_pfn: %lx\n", max_free_pfn); + + /* + * now we can initialise the page allocator + */ + printk("MM: Initialise page allocator for %lx(%lx)-%lx(%lx)\n", + (u_long)__va(PFN_PHYS(start_pfn)), PFN_PHYS(start_pfn), + (u_long)__va(PFN_PHYS(max_free_pfn)), PFN_PHYS(max_free_pfn)); + init_page_allocator(PFN_PHYS(start_pfn), PFN_PHYS(max_free_pfn)); + + + /* Now initialise the physical->machine mapping table. */ + + + printk("MM: done\n"); + + +} + +/********************* + * ALLOCATION BITMAP + * One bit per page of memory. Bit set => page is allocated. + */ + +static unsigned long *alloc_bitmap; +#define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8) + +#define allocated_in_map(_pn) \ +(alloc_bitmap[(_pn)/PAGES_PER_MAPWORD] & (1<<((_pn)&(PAGES_PER_MAPWORD-1)))) + + +/* + * Hint regarding bitwise arithmetic in map_{alloc,free}: + * -(1<= n. + * (1<next == NULL) + +#define round_pgdown(_p) ((_p)&PAGE_MASK) +#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) + + +/* + * Initialise allocator, placing addresses [@min,@max] in free pool. + * @min and @max are PHYSICAL addresses. + */ +static void init_page_allocator(unsigned long min, unsigned long max) +{ + int i; + unsigned long range, bitmap_size; + chunk_head_t *ch; + chunk_tail_t *ct; + + for ( i = 0; i < FREELIST_SIZE; i++ ) + { + free_head[i] = &free_tail[i]; + free_tail[i].pprev = &free_head[i]; + free_tail[i].next = NULL; + } + + min = round_pgup (min); + max = round_pgdown(max); + + /* Allocate space for the allocation bitmap. */ + bitmap_size = (max+1) >> (PAGE_SHIFT+3); + bitmap_size = round_pgup(bitmap_size); + alloc_bitmap = (unsigned long *)__va(min); + min += bitmap_size; + range = max - min; + + /* All allocated by default. */ + memset(alloc_bitmap, ~0, bitmap_size); + /* Free up the memory we've been given to play with. */ + map_free(min>>PAGE_SHIFT, range>>PAGE_SHIFT); + + /* The buddy lists are addressed in high memory. */ + min += PAGE_OFFSET; + max += PAGE_OFFSET; + + while ( range != 0 ) + { + /* + * Next chunk is limited by alignment of min, but also + * must not be bigger than remaining range. + */ + for ( i = PAGE_SHIFT; (1<<(i+1)) <= range; i++ ) + if ( min & (1<level = i; + ch->next = free_head[i]; + ch->pprev = &free_head[i]; + ch->next->pprev = &ch->next; + free_head[i] = ch; + ct->level = i; + } +} + + +/* Release a PHYSICAL address range to the allocator. */ +void release_bytes_to_allocator(unsigned long min, unsigned long max) +{ + min = round_pgup (min) + PAGE_OFFSET; + max = round_pgdown(max) + PAGE_OFFSET; + + while ( min < max ) + { + __free_pages(min, 0); + min += PAGE_SIZE; + } +} + + +/* Allocate 2^@order contiguous pages. Returns a VIRTUAL address. */ +unsigned long __get_free_pages(int order) +{ + int i; + chunk_head_t *alloc_ch, *spare_ch; + chunk_tail_t *spare_ct; + + + /* Find smallest order which can satisfy the request. */ + for ( i = order; i < FREELIST_SIZE; i++ ) { + if ( !FREELIST_EMPTY(free_head[i]) ) + break; + } + + if ( i == FREELIST_SIZE ) goto no_memory; + + /* Unlink a chunk. */ + alloc_ch = free_head[i]; + free_head[i] = alloc_ch->next; + alloc_ch->next->pprev = alloc_ch->pprev; + + /* We may have to break the chunk a number of times. */ + while ( i != order ) + { + /* Split into two equal parts. */ + i--; + spare_ch = (chunk_head_t *)((char *)alloc_ch + (1<<(i+PAGE_SHIFT))); + spare_ct = (chunk_tail_t *)((char *)spare_ch + (1<<(i+PAGE_SHIFT)))-1; + + /* Create new header for spare chunk. */ + spare_ch->level = i; + spare_ch->next = free_head[i]; + spare_ch->pprev = &free_head[i]; + spare_ct->level = i; + + /* Link in the spare chunk. */ + spare_ch->next->pprev = &spare_ch->next; + free_head[i] = spare_ch; + } + + map_alloc(__pa(alloc_ch)>>PAGE_SHIFT, 1<> PAGE_SHIFT; + + map_free(pagenr, 1<level != order ) break; + ch = (chunk_head_t *)(p - size); + p -= size; + } + else + { + /* Merge with successor block? */ + if ( allocated_in_map(pagenr+(1<level != order ) break; + } + + /* Okay, unlink the neighbour. */ + *ch->pprev = ch->next; + ch->next->pprev = ch->pprev; + + order++; + size <<= 1; + } + + /* Okay, add the final chunk to the appropriate free list. */ + ch = (chunk_head_t *)p; + ct = (chunk_tail_t *)(p+size)-1; + ct->level = order; + ch->level = order; + ch->pprev = &free_head[order]; + ch->next = free_head[order]; + ch->next->pprev = &ch->next; + free_head[order] = ch; +} diff --git a/mini-os/time.c b/mini-os/time.c new file mode 100644 index 0000000000..ff23b2ee63 --- /dev/null +++ b/mini-os/time.c @@ -0,0 +1,149 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: time.c + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: + * + * Date: Jul 2003 + * + * Environment: Xen Minimal OS + * Description: Simple time and timer functions + * + **************************************************************************** + * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $ + **************************************************************************** + */ + + +#include +#include +#include +#include +#include +#include + +/************************************************************************ + * Time functions + *************************************************************************/ + +static unsigned int rdtsc_bitshift; +static u32 st_scale_f; +static u32 st_scale_i; +static u32 shadow_st_pcc; +static s_time_t shadow_st; +static u32 shadow_wc_version=0; +static long shadow_tv_sec; +static long shadow_tv_usec; +static s_time_t shadow_wc_timestamp; + +/* + * System time. + * We need to read the values from the shared info page "atomically" + * and use the cycle counter value as the "version" number. Clashes + * should be very rare. + */ +inline s_time_t get_s_time(void) +{ + s32 delta_tsc; + u32 low; + u64 delta, tsc; + u32 version; + u64 cpu_freq, scale; + + /* check if our values are still up-to-date */ + while ( (version = HYPERVISOR_shared_info->wc_version) != + shadow_wc_version ) + { + barrier(); + + shadow_wc_version = version; + shadow_tv_sec = HYPERVISOR_shared_info->tv_sec; + shadow_tv_usec = HYPERVISOR_shared_info->tv_usec; + shadow_wc_timestamp = HYPERVISOR_shared_info->wc_timestamp; + shadow_st_pcc = HYPERVISOR_shared_info->st_timestamp; + shadow_st = HYPERVISOR_shared_info->system_time; + + rdtsc_bitshift = HYPERVISOR_shared_info->rdtsc_bitshift; + cpu_freq = HYPERVISOR_shared_info->cpu_freq; + + /* XXX cpu_freq as u32 limits it to 4.29 GHz. Get a better do_div! */ + scale = 1000000000LL << (32 + rdtsc_bitshift); + scale /= cpu_freq; + st_scale_f = scale & 0xffffffff; + st_scale_i = scale >> 32; + + barrier(); + } + + rdtscll(tsc); + low = (u32)(tsc >> rdtsc_bitshift); + delta_tsc = (s32)(low - shadow_st_pcc); + if ( unlikely(delta_tsc < 0) ) delta_tsc = 0; + delta = ((u64)delta_tsc * st_scale_f); + delta >>= 32; + delta += ((u64)delta_tsc * st_scale_i); + + return shadow_st + delta; +} + + +/* + * Wallclock time. + * Based on what the hypervisor tells us, extrapolated using system time. + * Again need to read a number of values from the shared page "atomically". + * this time using a version number. + */ +void gettimeofday(struct timeval *tv) +{ + long usec, sec; + u64 now; + + now = get_s_time(); + usec = ((unsigned long)(now-shadow_wc_timestamp))/1000; + sec = shadow_tv_sec; + usec += shadow_tv_usec; + + while ( usec >= 1000000 ) + { + usec -= 1000000; + sec++; + } + + tv->tv_sec = sec; + tv->tv_usec = usec; +} + + +static void timer_handler(int ev, struct pt_regs *regs) +{ + static int i; + s_time_t now; + + i++; + if (i >= 1000) { + now = get_s_time(); + printf("T(%lld)\n", now); + i = 0; + } +} + + +void init_time(void) +{ + u64 __cpu_khz; + unsigned long cpu_khz; + + __cpu_khz = HYPERVISOR_shared_info->cpu_freq; + cpu_khz = (u32) (__cpu_khz/1000); + + printk("Xen reported: %lu.%03lu MHz processor.\n", + cpu_khz / 1000, cpu_khz % 1000); + + add_ev_action(EV_TIMER, &timer_handler); + enable_ev_action(EV_TIMER); + enable_hypervisor_event(EV_TIMER); + +} diff --git a/mini-os/traps.c b/mini-os/traps.c new file mode 100644 index 0000000000..970efdd3a6 --- /dev/null +++ b/mini-os/traps.c @@ -0,0 +1,150 @@ + +#include +#include +#include + +/* + * These are assembler stubs in entry.S. + * They are the actual entry points for virtual exceptions. + */ +void divide_error(void); +void debug(void); +void int3(void); +void overflow(void); +void bounds(void); +void invalid_op(void); +void device_not_available(void); +void double_fault(void); +void coprocessor_segment_overrun(void); +void invalid_TSS(void); +void segment_not_present(void); +void stack_segment(void); +void general_protection(void); +void page_fault(void); +void coprocessor_error(void); +void simd_coprocessor_error(void); +void alignment_check(void); +void spurious_interrupt_bug(void); +void machine_check(void); + +/* + * C handlers here have their parameter-list constructed by the + * assembler stubs above. Each one gets a pointer to a list + * of register values (to be restored at end of exception). + * Some will also receive an error code -- this is the code that + * was generated by the processor for the underlying real exception. + * + * Note that the page-fault exception is special. It also receives + * the faulting linear address. Normally this would be found in + * register CR2, but that is not accessible in a virtualised OS. + */ + +static void inline do_trap(int trapnr, char *str, + struct pt_regs * regs, long error_code) +{ + printk("Trap\n"); +} + +#define DO_ERROR(trapnr, str, name) \ +void do_##name(struct pt_regs * regs, long error_code) \ +{ \ + do_trap(trapnr, str, regs, error_code); \ +} + +#define DO_ERROR_INFO(trapnr, str, name, sicode, siaddr) \ +void do_##name(struct pt_regs * regs, long error_code) \ +{ \ + do_trap(trapnr, str, regs, error_code); \ +} + +DO_ERROR_INFO( 0, "divide error", divide_error, FPE_INTDIV, regs->eip) +DO_ERROR( 3, "int3", int3) +DO_ERROR( 4, "overflow", overflow) +DO_ERROR( 5, "bounds", bounds) +DO_ERROR_INFO( 6, "invalid operand", invalid_op, ILL_ILLOPN, regs->eip) +DO_ERROR( 7, "device not available", device_not_available) +DO_ERROR( 8, "double fault", double_fault) +DO_ERROR( 9, "coprocessor segment overrun", coprocessor_segment_overrun) +DO_ERROR(10, "invalid TSS", invalid_TSS) +DO_ERROR(11, "segment not present", segment_not_present) +DO_ERROR(12, "stack segment", stack_segment) +DO_ERROR_INFO(17, "alignment check", alignment_check, BUS_ADRALN, 0) +DO_ERROR(18, "machine check", machine_check) + +void do_page_fault(struct pt_regs * regs, long error_code, + unsigned long address) +{ + printk("Page fault\n"); +} + +void do_general_protection(struct pt_regs * regs, long error_code) +{ + printk("GPF\n"); +} + + +void do_debug(struct pt_regs * regs, long error_code) +{ + printk("Debug exception\n"); +#define TF_MASK 0x100 + regs->eflags &= ~TF_MASK; +} + + + +void do_coprocessor_error(struct pt_regs * regs, long error_code) +{ + printk("Copro error\n"); +} + +void simd_math_error(void *eip) +{ + printk("SIMD error\n"); +} + +void do_simd_coprocessor_error(struct pt_regs * regs, + long error_code) +{ + printk("SIMD copro error\n"); +} + +void do_spurious_interrupt_bug(struct pt_regs * regs, + long error_code) +{ +} + +/* + * Submit a virtual IDT to teh hypervisor. This consists of tuples + * (interrupt vector, privilege ring, CS:EIP of handler). + * The 'privilege ring' field specifies the least-privileged ring that + * can trap to that vector using a software-interrupt instruction (INT). + */ +static trap_info_t trap_table[] = { + { 0, 0, __KERNEL_CS, (unsigned long)divide_error }, + { 1, 0, __KERNEL_CS, (unsigned long)debug }, + { 3, 3, __KERNEL_CS, (unsigned long)int3 }, + { 4, 3, __KERNEL_CS, (unsigned long)overflow }, + { 5, 3, __KERNEL_CS, (unsigned long)bounds }, + { 6, 0, __KERNEL_CS, (unsigned long)invalid_op }, + { 7, 0, __KERNEL_CS, (unsigned long)device_not_available }, + { 8, 0, __KERNEL_CS, (unsigned long)double_fault }, + { 9, 0, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun }, + { 10, 0, __KERNEL_CS, (unsigned long)invalid_TSS }, + { 11, 0, __KERNEL_CS, (unsigned long)segment_not_present }, + { 12, 0, __KERNEL_CS, (unsigned long)stack_segment }, + { 13, 0, __KERNEL_CS, (unsigned long)general_protection }, + { 14, 0, __KERNEL_CS, (unsigned long)page_fault }, + { 15, 0, __KERNEL_CS, (unsigned long)spurious_interrupt_bug }, + { 16, 0, __KERNEL_CS, (unsigned long)coprocessor_error }, + { 17, 0, __KERNEL_CS, (unsigned long)alignment_check }, + { 18, 0, __KERNEL_CS, (unsigned long)machine_check }, + { 19, 0, __KERNEL_CS, (unsigned long)simd_coprocessor_error }, + { 0, 0, 0, 0 } +}; + + + +void trap_init(void) +{ + HYPERVISOR_set_trap_table(trap_table); +} diff --git a/mini-os/vmlinux.lds b/mini-os/vmlinux.lds new file mode 100644 index 0000000000..7c4c4f8e9c --- /dev/null +++ b/mini-os/vmlinux.lds @@ -0,0 +1,82 @@ +/* ld script to make i386 Linux kernel + * Written by Martin Mares ; + */ +OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") +OUTPUT_ARCH(i386) +ENTRY(_start) +SECTIONS +{ + . = 0xC0000000 + 0x000000; + _text = .; /* Text and read-only data */ + .text : { + *(.text) + *(.fixup) + *(.gnu.warning) + } = 0x9090 + + _etext = .; /* End of text section */ + + .rodata : { *(.rodata) *(.rodata.*) } + .kstrtab : { *(.kstrtab) } + + . = ALIGN(16); /* Exception table */ + __start___ex_table = .; + __ex_table : { *(__ex_table) } + __stop___ex_table = .; + + __start___ksymtab = .; /* Kernel symbol table */ + __ksymtab : { *(__ksymtab) } + __stop___ksymtab = .; + + .data : { /* Data */ + *(.data) + CONSTRUCTORS + } + + _edata = .; /* End of data section */ + + . = ALIGN(8192); /* init_task */ + .data.init_task : { *(.data.init_task) } + + . = ALIGN(4096); /* Init code and data */ + __init_begin = .; + .text.init : { *(.text.init) } + .data.init : { *(.data.init) } + . = ALIGN(16); + __setup_start = .; + .setup.init : { *(.setup.init) } + __setup_end = .; + __initcall_start = .; + .initcall.init : { *(.initcall.init) } + __initcall_end = .; + . = ALIGN(4096); + __init_end = .; + + . = ALIGN(4096); + .data.page_aligned : { *(.data.idt) } + + . = ALIGN(32); + .data.cacheline_aligned : { *(.data.cacheline_aligned) } + + __bss_start = .; /* BSS */ + .bss : { + *(.bss) + } + _end = . ; + + /* Sections to be discarded */ + /DISCARD/ : { + *(.text.exit) + *(.data.exit) + *(.exitcall.exit) + } + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } +} -- cgit v1.2.3