5 files changed, 1201 insertions, 0 deletions
diff --git a/xenolinux-2.4.23-sparse/arch/xeno/mm/Makefile b/xenolinux-2.4.23-sparse/arch/xeno/mm/Makefile
new file mode 100644
index 0000000000..d0d16114b6
--- /dev/null
+++ b/xenolinux-2.4.23-sparse/arch/xeno/mm/Makefile
@@ -0,0 +1,16 @@
+#
+# Makefile for the linux i386-specific parts of the memory manager.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definition is now in the main makefile...
+
+O_TARGET := mm.o
+
+obj-y	 := init.o fault.o extable.o pageattr.o hypervisor.o ioremap.o
+
+export-objs := pageattr.o
+
+include $(TOPDIR)/Rules.make
diff --git a/xenolinux-2.4.23-sparse/arch/xeno/mm/fault.c b/xenolinux-2.4.23-sparse/arch/xeno/mm/fault.c
new file mode 100644
index 0000000000..f096f4cc1e
--- /dev/null
+++ b/xenolinux-2.4.23-sparse/arch/xeno/mm/fault.c
@@ -0,0 +1,325 @@
+/*
+ *  linux/arch/i386/mm/fault.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ */
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/tty.h>
+#include <linux/vt_kern.h>		/* For unblank_screen() */
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/hardirq.h>
+
+extern void die(const char *,struct pt_regs *,long);
+
+pgd_t *cur_pgd;
+
+extern spinlock_t timerlist_lock;
+
+/*
+ * Unlock any spinlocks which will prevent us from getting the
+ * message out (timerlist_lock is acquired through the
+ * console unblank code)
+ */
+void bust_spinlocks(int yes)
+{
+	spin_lock_init(&timerlist_lock);
+	if (yes) {
+		oops_in_progress = 1;
+	} else {
+		int loglevel_save = console_loglevel;
+#ifdef CONFIG_VT
+		unblank_screen();
+#endif
+		oops_in_progress = 0;
+		/*
+		 * OK, the message is on the console.  Now we call printk()
+		 * without oops_in_progress set so that printk will give klogd
+		 * a poke.  Hold onto your hats...
+		 */
+		console_loglevel = 15;		/* NMI oopser may have shut the console up */
+		printk(" ");
+		console_loglevel = loglevel_save;
+	}
+}
+
+/*
+ * This routine handles page faults.  It determines the address,
+ * and the problem, and then passes it off to one of the appropriate
+ * routines.
+ *
+ * error_code:
+ *	bit 0 == 0 means no page found, 1 means protection fault
+ *	bit 1 == 0 means read, 1 means write
+ *	bit 2 == 0 means kernel, 1 means user-mode
+ */
+asmlinkage void do_page_fault(struct pt_regs *regs, 
+                              unsigned long error_code,
+                              unsigned long address)
+{
+	struct task_struct *tsk = current;
+	struct mm_struct *mm;
+	struct vm_area_struct * vma;
+	unsigned long page;
+	unsigned long fixup;
+	int write;
+	siginfo_t info;
+
+        /* Set the "privileged fault" bit to something sane. */
+        error_code &= 3;
+        error_code |= (regs->xcs & 2) << 1;
+
+#if MMU_UPDATE_DEBUG > 0
+        if ( (error_code == 0) && (address >= TASK_SIZE) )
+        {
+            unsigned long paddr = __pa(address);
+            int i;
+            for ( i = 0; i < mmu_update_queue_idx; i++ )
+            {
+                if ( update_debug_queue[i].ptr == paddr )
+                {
+                    printk("XXX now(EIP=%08lx:ptr=%08lx) "
+                           "then(%s/%d:p/v=%08lx/%08lx)\n",
+                           regs->eip, address,
+                           update_debug_queue[i].file,
+                           update_debug_queue[i].line,
+                           update_debug_queue[i].ptr,
+                           update_debug_queue[i].val);
+                }
+            }
+        }
+#endif
+
+        if ( flush_page_update_queue() != 0 ) return;
+
+	/*
+	 * We fault-in kernel-space virtual memory on-demand. The
+	 * 'reference' page table is init_mm.pgd.
+	 *
+	 * NOTE! We MUST NOT take any locks for this case. We may
+	 * be in an interrupt or a critical region, and should
+	 * only copy the information from the master page table,
+	 * nothing more.
+	 *
+	 * This verifies that the fault happens in kernel space
+	 * (error_code & 4) == 0, and that the fault was not a
+	 * protection error (error_code & 1) == 0.
+	 */
+	if (address >= TASK_SIZE && !(error_code & 5))
+		goto vmalloc_fault;
+
+	mm = tsk->mm;
+	info.si_code = SEGV_MAPERR;
+
+	/*
+	 * If we're in an interrupt or have no user
+	 * context, we must not take the fault..
+	 */
+	if (in_interrupt() || !mm)
+		goto no_context;
+
+	down_read(&mm->mmap_sem);
+
+	vma = find_vma(mm, address);
+	if (!vma)
+		goto bad_area;
+	if (vma->vm_start <= address)
+		goto good_area;
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		goto bad_area;
+	if (error_code & 4) {
+		/*
+		 * accessing the stack below %esp is always a bug.
+		 * The "+ 32" is there due to some instructions (like
+		 * pusha) doing post-decrement on the stack and that
+		 * doesn't show up until later..
+		 */
+		if (address + 32 < regs->esp)
+			goto bad_area;
+	}
+	if (expand_stack(vma, address))
+		goto bad_area;
+/*
+ * Ok, we have a good vm_area for this memory access, so
+ * we can handle it..
+ */
+good_area:
+	info.si_code = SEGV_ACCERR;
+	write = 0;
+	switch (error_code & 3) {
+		default:	/* 3: write, present */
+			/* fall through */
+		case 2:		/* write, not present */
+			if (!(vma->vm_flags & VM_WRITE))
+				goto bad_area;
+			write++;
+			break;
+		case 1:		/* read, present */
+			goto bad_area;
+		case 0:		/* read, not present */
+			if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+				goto bad_area;
+	}
+
+ survive:
+	/*
+	 * If for any reason at all we couldn't handle the fault,
+	 * make sure we exit gracefully rather than endlessly redo
+	 * the fault.
+	 */
+	switch (handle_mm_fault(mm, vma, address, write)) {
+	case 1:
+		tsk->min_flt++;
+		break;
+	case 2:
+		tsk->maj_flt++;
+		break;
+	case 0:
+		goto do_sigbus;
+	default:
+		goto out_of_memory;
+	}
+
+	up_read(&mm->mmap_sem);
+	return;
+
+/*
+ * Something tried to access memory that isn't in our memory map..
+ * Fix it, but check if it's kernel or user first..
+ */
+bad_area:
+	up_read(&mm->mmap_sem);
+
+	/* User mode accesses just cause a SIGSEGV */
+	if (error_code & 4) {
+		tsk->thread.cr2 = address;
+		tsk->thread.error_code = error_code;
+		tsk->thread.trap_no = 14;
+		info.si_signo = SIGSEGV;
+		info.si_errno = 0;
+		/* info.si_code has been set above */
+		info.si_addr = (void *)address;
+		force_sig_info(SIGSEGV, &info, tsk);
+		return;
+	}
+
+no_context:
+	/* Are we prepared to handle this kernel fault?  */
+	if ((fixup = search_exception_table(regs->eip)) != 0) {
+		regs->eip = fixup;
+		return;
+	}
+
+/*
+ * Oops. The kernel tried to access some bad page. We'll have to
+ * terminate things with extreme prejudice.
+ */
+
+	bust_spinlocks(1);
+
+	if (address < PAGE_SIZE)
+		printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
+	else
+		printk(KERN_ALERT "Unable to handle kernel paging request");
+	printk(" at virtual address %08lx\n",address);
+	printk(" printing eip:\n");
+	printk("%08lx\n", regs->eip);
+        page = ((unsigned long *) cur_pgd)[address >> 22];
+        printk(KERN_ALERT "*pde=%08lx(%08lx)\n", page, machine_to_phys(page));
+	if (page & 1) {
+		page &= PAGE_MASK;
+		address &= 0x003ff000;
+                page = machine_to_phys(page);
+		page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT];
+                printk(KERN_ALERT "*pte=%08lx(%08lx)\n", page, 
+                       machine_to_phys(page));
+	}
+	die("Oops", regs, error_code);
+	bust_spinlocks(0);
+	do_exit(SIGKILL);
+
+/*
+ * We ran out of memory, or some other thing happened to us that made
+ * us unable to handle the page fault gracefully.
+ */
+out_of_memory:
+	if (tsk->pid == 1) {
+		yield();
+		goto survive;
+	}
+	up_read(&mm->mmap_sem);
+	printk("VM: killing process %s\n", tsk->comm);
+	if (error_code & 4)
+		do_exit(SIGKILL);
+	goto no_context;
+
+do_sigbus:
+	up_read(&mm->mmap_sem);
+
+	/*
+	 * Send a sigbus, regardless of whether we were in kernel
+	 * or user mode.
+	 */
+	tsk->thread.cr2 = address;
+	tsk->thread.error_code = error_code;
+	tsk->thread.trap_no = 14;
+	info.si_signo = SIGBUS;
+	info.si_errno = 0;
+	info.si_code = BUS_ADRERR;
+	info.si_addr = (void *)address;
+	force_sig_info(SIGBUS, &info, tsk);
+
+	/* Kernel mode? Handle exceptions or die */
+	if (!(error_code & 4))
+		goto no_context;
+	return;
+
+vmalloc_fault:
+	{
+		/*
+		 * Synchronize this task's top level page-table
+		 * with the 'reference' page table.
+		 *
+		 * Do _not_ use "tsk" here. We might be inside
+		 * an interrupt in the middle of a task switch..
+		 */
+		int offset = __pgd_offset(address);
+		pgd_t *pgd, *pgd_k;
+		pmd_t *pmd, *pmd_k;
+		pte_t *pte_k;
+
+		pgd = offset + cur_pgd;
+		pgd_k = init_mm.pgd + offset;
+
+		if (!pgd_present(*pgd_k))
+			goto no_context;
+		set_pgd(pgd, *pgd_k);
+		
+		pmd = pmd_offset(pgd, address);
+		pmd_k = pmd_offset(pgd_k, address);
+		if (!pmd_present(*pmd_k))
+			goto no_context;
+		set_pmd(pmd, *pmd_k);
+                XENO_flush_page_update_queue(); /* flush PMD update */
+
+		pte_k = pte_offset(pmd_k, address);
+		if (!pte_present(*pte_k))
+			goto no_context;
+		return;
+	}
+}
diff --git a/xenolinux-2.4.23-sparse/arch/xeno/mm/hypervisor.c b/xenolinux-2.4.23-sparse/arch/xeno/mm/hypervisor.c
new file mode 100644
index 0000000000..b4784ccc02
--- /dev/null
+++ b/xenolinux-2.4.23-sparse/arch/xeno/mm/hypervisor.c
@@ -0,0 +1,237 @@
+/******************************************************************************
+ * xeno/mm/hypervisor.c
+ * 
+ * Update page tables via the hypervisor.
+ * 
+ * Copyright (c) 2002, K A Fraser
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <asm/hypervisor.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/multicall.h>
+
+/*
+ * This suffices to protect us if we ever move to SMP domains.
+ * Further, it protects us against interrupts. At the very least, this is
+ * required for the network driver which flushes the update queue before
+ * pushing new receive buffers.
+ */
+static spinlock_t update_lock = SPIN_LOCK_UNLOCKED;
+
+#define QUEUE_SIZE 2048
+static mmu_update_t update_queue[QUEUE_SIZE];
+unsigned int mmu_update_queue_idx = 0;
+#define idx mmu_update_queue_idx
+
+#if MMU_UPDATE_DEBUG > 0
+page_update_debug_t update_debug_queue[QUEUE_SIZE] = {{0}};
+#undef queue_l1_entry_update
+#undef queue_l2_entry_update
+static void DEBUG_allow_pt_reads(void)
+{
+    pte_t *pte;
+    mmu_update_t update;
+    int i;
+    for ( i = idx-1; i >= 0; i-- )
+    {
+        pte = update_debug_queue[i].ptep;
+        if ( pte == NULL ) continue;
+        update_debug_queue[i].ptep = NULL;
+        update.ptr = pte;
+        update.val = update_debug_queue[i].pteval;
+        HYPERVISOR_mmu_update(&update, 1);
+    }
+}
+static void DEBUG_disallow_pt_read(unsigned long va)
+{
+    pte_t *pte;
+    pmd_t *pmd;
+    pgd_t *pgd;
+    unsigned long pteval;
+    /*
+     * We may fault because of an already outstanding update.
+     * That's okay -- it'll get fixed up in the fault handler.
+     */
+    mmu_update_t update;
+    pgd = pgd_offset_k(va);
+    pmd = pmd_offset(pgd, va);
+    pte = pte_offset(pmd, va);
+    update.ptr = pte;
+    pteval = *(unsigned long *)pte;
+    update.val = pteval & ~_PAGE_PRESENT;
+    HYPERVISOR_mmu_update(&update, 1);
+    update_debug_queue[idx].ptep = pte;
+    update_debug_queue[idx].pteval = pteval;
+}
+#endif
+
+#if MMU_UPDATE_DEBUG > 1
+#undef queue_pt_switch
+#undef queue_tlb_flush
+#undef queue_invlpg
+#undef queue_pgd_pin
+#undef queue_pgd_unpin
+#undef queue_pte_pin
+#undef queue_pte_unpin
+#endif
+
+
+/*
+ * MULTICALL_flush_page_update_queue:
+ *   This is a version of the flush which queues as part of a multicall.
+ */
+void MULTICALL_flush_page_update_queue(void)
+{
+    unsigned long flags;
+    spin_lock_irqsave(&update_lock, flags);
+    if ( idx != 0 ) 
+    {
+#if MMU_UPDATE_DEBUG > 1
+        printk("Flushing %d entries from pt update queue\n", idx);
+#endif
+#if MMU_UPDATE_DEBUG > 0
+        DEBUG_allow_pt_reads();
+#endif
+        queue_multicall2(__HYPERVISOR_mmu_update, (unsigned long)update_queue, idx);
+        idx = 0;
+    }
+    spin_unlock_irqrestore(&update_lock, flags);
+}
+
+static inline void __flush_page_update_queue(void)
+{
+#if MMU_UPDATE_DEBUG > 1
+    printk("Flushing %d entries from pt update queue\n", idx);
+#endif
+#if MMU_UPDATE_DEBUG > 0
+    DEBUG_allow_pt_reads();
+#endif
+    HYPERVISOR_mmu_update(update_queue, idx);
+    idx = 0;
+}
+
+void _flush_page_update_queue(void)
+{
+    unsigned long flags;
+    spin_lock_irqsave(&update_lock, flags);
+    if ( idx != 0 ) __flush_page_update_queue();
+    spin_unlock_irqrestore(&update_lock, flags);
+}
+
+static inline void increment_index(void)
+{
+    idx++;
+    if ( unlikely(idx == QUEUE_SIZE) ) __flush_page_update_queue();
+}
+
+void queue_l1_entry_update(pte_t *ptr, unsigned long val)
+{
+    unsigned long flags;
+    spin_lock_irqsave(&update_lock, flags);
+#if MMU_UPDATE_DEBUG > 0
+    DEBUG_disallow_pt_read((unsigned long)ptr);
+#endif
+    update_queue[idx].ptr = (unsigned long)ptr;
+    update_queue[idx].val = val;
+    increment_index();
+    spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void queue_l2_entry_update(pmd_t *ptr, unsigned long val)
+{
+    unsigned long flags;
+    spin_lock_irqsave(&update_lock, flags);
+    update_queue[idx].ptr = (unsigned long)ptr;
+    update_queue[idx].val = val;
+    increment_index();
+    spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void queue_pt_switch(unsigned long ptr)
+{
+    unsigned long flags;
+    spin_lock_irqsave(&update_lock, flags);
+    update_queue[idx].ptr  = phys_to_machine(ptr);
+    update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
+    update_queue[idx].val  = MMUEXT_NEW_BASEPTR;
+    increment_index();
+    spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void queue_tlb_flush(void)
+{
+    unsigned long flags;
+    spin_lock_irqsave(&update_lock, flags);
+    update_queue[idx].ptr  = MMU_EXTENDED_COMMAND;
+    update_queue[idx].val  = MMUEXT_TLB_FLUSH;
+    increment_index();
+    spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void queue_invlpg(unsigned long ptr)
+{
+    unsigned long flags;
+    spin_lock_irqsave(&update_lock, flags);
+    update_queue[idx].ptr  = MMU_EXTENDED_COMMAND;
+    update_queue[idx].val  = ptr & PAGE_MASK;
+    update_queue[idx].val |= MMUEXT_INVLPG;
+    increment_index();
+    spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void queue_pgd_pin(unsigned long ptr)
+{
+    unsigned long flags;
+    spin_lock_irqsave(&update_lock, flags);
+    update_queue[idx].ptr  = phys_to_machine(ptr);
+    update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
+    update_queue[idx].val  = MMUEXT_PIN_L2_TABLE;
+    increment_index();
+    spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void queue_pgd_unpin(unsigned long ptr)
+{
+    unsigned long flags;
+    spin_lock_irqsave(&update_lock, flags);
+    update_queue[idx].ptr  = phys_to_machine(ptr);
+    update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
+    update_queue[idx].val  = MMUEXT_UNPIN_TABLE;
+    increment_index();
+    spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void queue_pte_pin(unsigned long ptr)
+{
+    unsigned long flags;
+    spin_lock_irqsave(&update_lock, flags);
+    update_queue[idx].ptr  = phys_to_machine(ptr);
+    update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
+    update_queue[idx].val  = MMUEXT_PIN_L1_TABLE;
+    increment_index();
+    spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void queue_pte_unpin(unsigned long ptr)
+{
+    unsigned long flags;
+    spin_lock_irqsave(&update_lock, flags);
+    update_queue[idx].ptr  = phys_to_machine(ptr);
+    update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
+    update_queue[idx].val  = MMUEXT_UNPIN_TABLE;
+    increment_index();
+    spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void queue_set_ldt(unsigned long ptr, unsigned long len)
+{
+    unsigned long flags;
+    spin_lock_irqsave(&update_lock, flags);
+    update_queue[idx].ptr  = MMU_EXTENDED_COMMAND | ptr;
+    update_queue[idx].val  = MMUEXT_SET_LDT | (len << MMUEXT_CMD_SHIFT);
+    increment_index();
+    spin_unlock_irqrestore(&update_lock, flags);
+}
diff --git a/xenolinux-2.4.23-sparse/arch/xeno/mm/init.c b/xenolinux-2.4.23-sparse/arch/xeno/mm/init.c
new file mode 100644
index 0000000000..883cd03b37
--- /dev/null
+++ b/xenolinux-2.4.23-sparse/arch/xeno/mm/init.c
@@ -0,0 +1,397 @@
+/*
+ *  linux/arch/i386/mm/init.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *
+ *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#ifdef CONFIG_BLK_DEV_INITRD
+#include <linux/blk.h>
+#endif
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/bootmem.h>
+#include <linux/slab.h>
+
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/dma.h>
+#include <asm/apic.h>
+#include <asm/tlb.h>
+
+mmu_gather_t mmu_gathers[NR_CPUS];
+unsigned long highstart_pfn, highend_pfn;
+static unsigned long totalram_pages;
+static unsigned long totalhigh_pages;
+
+int do_check_pgt_cache(int low, int high)
+{
+    int freed = 0;
+    if(pgtable_cache_size > high) {
+        do {
+            if (!QUICKLIST_EMPTY(pgd_quicklist)) {
+                free_pgd_slow(get_pgd_fast());
+                freed++;
+            }
+            if (!QUICKLIST_EMPTY(pte_quicklist)) {
+                pte_free_slow(pte_alloc_one_fast(NULL, 0));
+                freed++;
+            }
+        } while(pgtable_cache_size > low);
+    }
+    return freed;
+}
+
+void show_mem(void)
+{
+    int i, total = 0, reserved = 0;
+    int shared = 0, cached = 0;
+    int highmem = 0;
+
+    printk("Mem-info:\n");
+    show_free_areas();
+    printk("Free swap:       %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
+    i = max_mapnr;
+    while (i-- > 0) {
+        total++;
+        if (PageHighMem(mem_map+i))
+            highmem++;
+        if (PageReserved(mem_map+i))
+            reserved++;
+        else if (PageSwapCache(mem_map+i))
+            cached++;
+        else if (page_count(mem_map+i))
+            shared += page_count(mem_map+i) - 1;
+    }
+    printk("%d pages of RAM\n", total);
+    printk("%d pages of HIGHMEM\n",highmem);
+    printk("%d reserved pages\n",reserved);
+    printk("%d pages shared\n",shared);
+    printk("%d pages swap cached\n",cached);
+    printk("%ld pages in page table cache\n",pgtable_cache_size);
+    show_buffers();
+}
+
+/* References to section boundaries */
+
+extern char _text, _etext, _edata, __bss_start, _end;
+extern char __init_begin, __init_end;
+
+static inline void set_pte_phys (unsigned long vaddr,
+                                 unsigned long phys, pgprot_t prot)
+{
+    pgd_t *pgd;
+    pmd_t *pmd;
+    pte_t *pte;
+
+    pgd = init_mm.pgd + __pgd_offset(vaddr);
+    if (pgd_none(*pgd)) {
+        printk("PAE BUG #00!\n");
+        return;
+    }
+    pmd = pmd_offset(pgd, vaddr);
+    if (pmd_none(*pmd)) {
+        printk("PAE BUG #01!\n");
+        return;
+    }
+    pte = pte_offset(pmd, vaddr);
+
+#if 0 /* Not in Xen, since this breaks clear_fixmap. */
+    if (pte_val(*pte))
+        pte_ERROR(*pte);
+#endif
+
+    /* We queue directly, avoiding hidden phys->machine translation. */
+    queue_l1_entry_update(pte, phys | pgprot_val(prot));
+
+    /*
+     * It's enough to flush this one mapping.
+     * (PGE mappings get flushed as well)
+     */
+    __flush_tlb_one(vaddr);
+}
+
+void __set_fixmap(enum fixed_addresses idx, unsigned long phys, 
+                  pgprot_t flags)
+{
+    unsigned long address = __fix_to_virt(idx);
+
+    if (idx >= __end_of_fixed_addresses) {
+        printk("Invalid __set_fixmap\n");
+        return;
+    }
+    set_pte_phys(address, phys, 
+                 __pgprot(pgprot_val(PAGE_KERNEL)|pgprot_val(flags)));
+}
+
+void clear_fixmap(enum fixed_addresses idx)
+{
+    set_pte_phys(__fix_to_virt(idx), 0, __pgprot(0));
+}
+
+static void __init fixrange_init (unsigned long start, 
+                                  unsigned long end, pgd_t *pgd_base)
+{
+    pgd_t *pgd, *kpgd;
+    pmd_t *pmd, *kpmd;
+    pte_t *pte, *kpte;
+    int i, j;
+    unsigned long vaddr;
+
+    vaddr = start;
+    i = __pgd_offset(vaddr);
+    j = __pmd_offset(vaddr);
+    pgd = pgd_base + i;
+
+    for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) {
+#if CONFIG_X86_PAE
+        if (pgd_none(*pgd)) {
+            pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+            set_pgd(pgd, __pgd(__pa(pmd) + 0x1));
+            if (pmd != pmd_offset(pgd, 0))
+                printk("PAE BUG #02!\n");
+        }
+        pmd = pmd_offset(pgd, vaddr);
+#else
+        pmd = (pmd_t *)pgd;
+#endif
+        for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) {
+            if (pmd_none(*pmd)) {
+                pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+                clear_page(pte);
+                kpgd = pgd_offset_k((unsigned long)pte);
+                kpmd = pmd_offset(kpgd, (unsigned long)pte);
+                kpte = pte_offset(kpmd, (unsigned long)pte);
+                queue_l1_entry_update(kpte,
+                                      (*(unsigned long *)kpte)&~_PAGE_RW);
+
+                set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte)));
+            }
+            vaddr += PMD_SIZE;
+        }
+        j = 0;
+    }
+	
+    XENO_flush_page_update_queue();
+}
+
+
+static void __init zone_sizes_init(void)
+{
+    unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+    unsigned int max_dma, high, low;
+
+    max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+    low = max_low_pfn;
+    high = highend_pfn;
+
+    if (low < max_dma)
+        zones_size[ZONE_DMA] = low;
+    else {
+        zones_size[ZONE_DMA] = max_dma;
+        zones_size[ZONE_NORMAL] = low - max_dma;
+#ifdef CONFIG_HIGHMEM
+        zones_size[ZONE_HIGHMEM] = high - low;
+#endif
+    }
+    free_area_init(zones_size);
+}
+
+/*
+ * paging_init() sets up the page tables - note that the first 8MB are
+ * already mapped by head.S.
+ *
+ * This routines also unmaps the page at virtual kernel address 0, so
+ * that we can trap those pesky NULL-reference errors in the kernel.
+ */
+void __init paging_init(void)
+{
+    unsigned long vaddr;
+
+    zone_sizes_init();
+
+    /*
+     * Fixed mappings, only the page table structure has to be created -
+     * mappings will be set by set_fixmap():
+     */
+    vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
+    fixrange_init(vaddr, HYPERVISOR_VIRT_START, init_mm.pgd);
+
+    /* Switch to the real shared_info page, and clear the dummy page. */
+    set_fixmap(FIX_SHARED_INFO, start_info.shared_info);
+    HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
+    memset(empty_zero_page, 0, sizeof(empty_zero_page));
+
+#ifdef CONFIG_HIGHMEM
+#error
+    kmap_init();
+#endif
+}
+
+static inline int page_is_ram (unsigned long pagenr)
+{
+    return 1;
+}
+
+#ifdef CONFIG_HIGHMEM
+void __init one_highpage_init(struct page *page, int pfn, int bad_ppro)
+{
+    if (!page_is_ram(pfn)) {
+        SetPageReserved(page);
+        return;
+    }
+	
+    if (bad_ppro && page_kills_ppro(pfn)) {
+        SetPageReserved(page);
+        return;
+    }
+	
+    ClearPageReserved(page);
+    set_bit(PG_highmem, &page->flags);
+    atomic_set(&page->count, 1);
+    __free_page(page);
+    totalhigh_pages++;
+}
+#endif /* CONFIG_HIGHMEM */
+
+static void __init set_max_mapnr_init(void)
+{
+#ifdef CONFIG_HIGHMEM
+    highmem_start_page = mem_map + highstart_pfn;
+    max_mapnr = num_physpages = highend_pfn;
+    num_mappedpages = max_low_pfn;
+#else
+    max_mapnr = num_mappedpages = num_physpages = max_low_pfn;
+#endif
+}
+
+static int __init free_pages_init(void)
+{
+#ifdef CONFIG_HIGHMEM
+#error Where is this supposed to be initialised?
+    int bad_ppro;
+#endif
+    int reservedpages, pfn;
+
+    /* this will put all low memory onto the freelists */
+    totalram_pages += free_all_bootmem();
+
+    reservedpages = 0;
+    for (pfn = 0; pfn < max_low_pfn; pfn++) {
+        /*
+         * Only count reserved RAM pages
+         */
+        if (page_is_ram(pfn) && PageReserved(mem_map+pfn))
+            reservedpages++;
+    }
+#ifdef CONFIG_HIGHMEM
+    for (pfn = highend_pfn-1; pfn >= highstart_pfn; pfn--)
+        one_highpage_init((struct page *) (mem_map + pfn), pfn, bad_ppro);
+    totalram_pages += totalhigh_pages;
+#endif
+    return reservedpages;
+}
+
+void __init mem_init(void)
+{
+    int codesize, reservedpages, datasize, initsize;
+
+    if (!mem_map)
+        BUG();
+	
+    set_max_mapnr_init();
+
+    high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
+
+    /* clear the zero-page */
+    memset(empty_zero_page, 0, PAGE_SIZE);
+
+    reservedpages = free_pages_init();
+
+    codesize =  (unsigned long) &_etext - (unsigned long) &_text;
+    datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
+    initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
+
+    printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n",
+           (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+           max_mapnr << (PAGE_SHIFT-10),
+           codesize >> 10,
+           reservedpages << (PAGE_SHIFT-10),
+           datasize >> 10,
+           initsize >> 10,
+           (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
+        );
+
+    boot_cpu_data.wp_works_ok = 1;
+}
+
+void free_initmem(void)
+{
+    unsigned long addr;
+
+    addr = (unsigned long)(&__init_begin);
+    for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
+        ClearPageReserved(virt_to_page(addr));
+        set_page_count(virt_to_page(addr), 1);
+        free_page(addr);
+        totalram_pages++;
+    }
+    printk (KERN_INFO "Freeing unused kernel memory: %dk freed\n", (&__init_end - &__init_begin) >> 10);
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+    if (start < end)
+        printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
+    for (; start < end; start += PAGE_SIZE) {
+        ClearPageReserved(virt_to_page(start));
+        set_page_count(virt_to_page(start), 1);
+        free_page(start);
+        totalram_pages++;
+    }
+}
+#endif
+
+void si_meminfo(struct sysinfo *val)
+{
+    val->totalram = totalram_pages;
+    val->sharedram = 0;
+    val->freeram = nr_free_pages();
+    val->bufferram = atomic_read(&buffermem_pages);
+    val->totalhigh = totalhigh_pages;
+    val->freehigh = nr_free_highpages();
+    val->mem_unit = PAGE_SIZE;
+    return;
+}
+
+#if defined(CONFIG_X86_PAE)
+struct kmem_cache_s *pae_pgd_cachep;
+void __init pgtable_cache_init(void)
+{
+    /*
+     * PAE pgds must be 16-byte aligned:
+	 */
+    pae_pgd_cachep = kmem_cache_create("pae_pgd", 32, 0,
+                                       SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, NULL, NULL);
+    if (!pae_pgd_cachep)
+        panic("init_pae(): Cannot alloc pae_pgd SLAB cache");
+}
+#endif /* CONFIG_X86_PAE */
diff --git a/xenolinux-2.4.23-sparse/arch/xeno/mm/ioremap.c b/xenolinux-2.4.23-sparse/arch/xeno/mm/ioremap.c
new file mode 100644
index 0000000000..eac5c6a63c
--- /dev/null
+++ b/xenolinux-2.4.23-sparse/arch/xeno/mm/ioremap.c
@@ -0,0 +1,226 @@
+/*
+ * arch/xeno/mm/ioremap.c
+ *
+ * Re-map IO memory to kernel address space so that we can access it.
+ *
+ * (C) Copyright 1995 1996 Linus Torvalds
+ *
+ * Modifications for Xenolinux (c) 2003 Keir Fraser
+ */
+
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/vmalloc.h>
+#include <asm/io.h>
+#include <asm/pgalloc.h>
+#include <asm/uaccess.h>
+#include <asm/tlb.h>
+#include <asm/mmu.h>
+
+#if defined(CONFIG_XENO_PRIV)
+
+#define direct_set_pte(_p, _v) queue_unchecked_mmu_update((_p), (_v).pte_low)
+#define __direct_pte(x) ((pte_t) { (x) } )
+#define __direct_mk_pte(page_nr,pgprot) \
+  __direct_pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot))
+#define direct_mk_pte_phys(physpage, pgprot) \
+  __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot)
+
+
+static inline void direct_remap_area_pte(pte_t *pte, 
+                                         unsigned long address, 
+                                         unsigned long size,
+                                         unsigned long machine_addr, 
+                                         pgprot_t prot)
+{
+    unsigned long end;
+
+    address &= ~PMD_MASK;
+    end = address + size;
+    if (end > PMD_SIZE)
+        end = PMD_SIZE;
+    if (address >= end)
+        BUG();
+    do {
+        if (!pte_none(*pte)) {
+            printk("direct_remap_area_pte: page already exists\n");
+            BUG();
+        }
+        direct_set_pte(pte, pte_mkio(direct_mk_pte_phys(machine_addr, prot))); 
+        address += PAGE_SIZE;
+        machine_addr += PAGE_SIZE;
+        pte++;
+    } while (address && (address < end));
+}
+
+static inline int direct_remap_area_pmd(struct mm_struct *mm,
+                                        pmd_t *pmd, 
+                                        unsigned long address, 
+                                        unsigned long size,
+                                        unsigned long machine_addr,
+                                        pgprot_t prot)
+{
+    unsigned long end;
+
+    address &= ~PGDIR_MASK;
+    end = address + size;
+    if (end > PGDIR_SIZE)
+        end = PGDIR_SIZE;
+    machine_addr -= address;
+    if (address >= end)
+        BUG();
+    do {
+        pte_t * pte = pte_alloc(mm, pmd, address);
+        if (!pte)
+            return -ENOMEM;
+        direct_remap_area_pte(pte, address, end - address, 
+                              address + machine_addr, prot);
+        address = (address + PMD_SIZE) & PMD_MASK;
+        pmd++;
+    } while (address && (address < end));
+    return 0;
+}
+ 
+int direct_remap_area_pages(struct mm_struct *mm,
+                            unsigned long address, 
+                            unsigned long machine_addr,
+                            unsigned long size, 
+                            pgprot_t prot)
+{
+    int error = 0;
+    pgd_t * dir;
+    unsigned long end = address + size;
+
+    machine_addr -= address;
+    dir = pgd_offset(mm, address);
+    flush_cache_all();
+    if (address >= end)
+        BUG();
+    spin_lock(&mm->page_table_lock);
+    do {
+        pmd_t *pmd = pmd_alloc(mm, dir, address);
+        error = -ENOMEM;
+        if (!pmd)
+            break;
+        error = direct_remap_area_pmd(mm, pmd, address, end - address,
+                                      machine_addr + address, prot);
+        if (error)
+            break;
+        address = (address + PGDIR_SIZE) & PGDIR_MASK;
+        dir++;
+    } while (address && (address < end));
+    spin_unlock(&mm->page_table_lock);
+    flush_tlb_all();
+    return error;
+}
+
+#endif /* CONFIG_XENO_PRIV */
+
+
+/*
+ * Remap an arbitrary machine address space into the kernel virtual
+ * address space. Needed when a privileged instance of Xenolinux wants
+ * to access space outside its world directly.
+ *
+ * NOTE! We need to allow non-page-aligned mappings too: we will obviously
+ * have to convert them into an offset in a page-aligned mapping, but the
+ * caller shouldn't need to know that small detail.
+ */
+void * __ioremap(unsigned long machine_addr, 
+                 unsigned long size, 
+                 unsigned long flags)
+{
+#if defined(CONFIG_XENO_PRIV)
+    void * addr;
+    struct vm_struct * area;
+    unsigned long offset, last_addr;
+    pgprot_t prot;
+
+    /* Only privileged Xenolinux can make unchecked pagetable updates. */
+    if ( !(start_info.flags & SIF_PRIVILEGED) )
+        return NULL;
+
+    /* Don't allow wraparound or zero size */
+    last_addr = machine_addr + size - 1;
+    if (!size || last_addr < machine_addr)
+        return NULL;
+
+    /* Mappings have to be page-aligned */
+    offset = machine_addr & ~PAGE_MASK;
+    machine_addr &= PAGE_MASK;
+    size = PAGE_ALIGN(last_addr+1) - machine_addr;
+
+    /* Ok, go for it */
+    area = get_vm_area(size, VM_IOREMAP);
+    if (!area)
+        return NULL;
+    addr = area->addr;
+    prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | 
+                    _PAGE_ACCESSED | flags);
+    if (direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(addr), 
+                                machine_addr, size, prot)) {
+        vfree(addr);
+        return NULL;
+    }
+    return (void *) (offset + (char *)addr);
+#else
+    return NULL;
+#endif
+}
+
+void iounmap(void *addr)
+{
+    vfree((void *)((unsigned long)addr & PAGE_MASK));
+}
+
+/* implementation of boot time ioremap for purpose of provising access
+to the vga console for privileged domains. Unlike boot time ioremap on 
+other architectures, ours is permanent and not reclaimed when then vmalloc
+infrastructure is started */
+
+void __init *bt_ioremap(unsigned long machine_addr, unsigned long size)
+{
+        unsigned long offset, last_addr;
+        unsigned int nrpages;
+        enum fixed_addresses idx;
+
+        /* Don't allow wraparound or zero size */
+        last_addr = machine_addr + size - 1;
+        if (!size || last_addr < machine_addr)
+                return NULL;
+
+        /*
+         * Mappings have to be page-aligned
+         */
+        offset = machine_addr & ~PAGE_MASK;
+        machine_addr &= PAGE_MASK;
+        size = PAGE_ALIGN(last_addr) - machine_addr;
+
+        /*
+         * Mappings have to fit in the FIX_BTMAP area.
+         */
+        nrpages = size >> PAGE_SHIFT;
+        if (nrpages > NR_FIX_BTMAPS)
+                return NULL;
+
+        /*
+         * Ok, go for it..
+         */
+        idx = FIX_BTMAP_BEGIN;
+        while (nrpages > 0) {
+                set_fixmap(idx, machine_addr);
+                machine_addr += PAGE_SIZE;
+                --idx;
+                --nrpages;
+        }
+
+	flush_tlb_all();
+
+        return (void*) (offset + fix_to_virt(FIX_BTMAP_BEGIN));
+}
+
+
+#if 0 /* We don't support these functions. They shouldn't be required. */
+void __init bt_iounmap(void *addr, unsigned long size) {}
+#endif