aboutsummaryrefslogtreecommitdiffstats
path: root/xenolinux-2.4.23-sparse/arch/xeno/mm
diff options
context:
space:
mode:
Diffstat (limited to 'xenolinux-2.4.23-sparse/arch/xeno/mm')
-rw-r--r--xenolinux-2.4.23-sparse/arch/xeno/mm/Makefile16
-rw-r--r--xenolinux-2.4.23-sparse/arch/xeno/mm/fault.c325
-rw-r--r--xenolinux-2.4.23-sparse/arch/xeno/mm/hypervisor.c237
-rw-r--r--xenolinux-2.4.23-sparse/arch/xeno/mm/init.c397
-rw-r--r--xenolinux-2.4.23-sparse/arch/xeno/mm/ioremap.c226
5 files changed, 1201 insertions, 0 deletions
diff --git a/xenolinux-2.4.23-sparse/arch/xeno/mm/Makefile b/xenolinux-2.4.23-sparse/arch/xeno/mm/Makefile
new file mode 100644
index 0000000000..d0d16114b6
--- /dev/null
+++ b/xenolinux-2.4.23-sparse/arch/xeno/mm/Makefile
@@ -0,0 +1,16 @@
+#
+# Makefile for the linux i386-specific parts of the memory manager.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definition is now in the main makefile...
+
+O_TARGET := mm.o
+
+obj-y := init.o fault.o extable.o pageattr.o hypervisor.o ioremap.o
+
+export-objs := pageattr.o
+
+include $(TOPDIR)/Rules.make
diff --git a/xenolinux-2.4.23-sparse/arch/xeno/mm/fault.c b/xenolinux-2.4.23-sparse/arch/xeno/mm/fault.c
new file mode 100644
index 0000000000..f096f4cc1e
--- /dev/null
+++ b/xenolinux-2.4.23-sparse/arch/xeno/mm/fault.c
@@ -0,0 +1,325 @@
+/*
+ * linux/arch/i386/mm/fault.c
+ *
+ * Copyright (C) 1995 Linus Torvalds
+ */
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/tty.h>
+#include <linux/vt_kern.h> /* For unblank_screen() */
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/hardirq.h>
+
+extern void die(const char *,struct pt_regs *,long);
+
+pgd_t *cur_pgd;
+
+extern spinlock_t timerlist_lock;
+
+/*
+ * Unlock any spinlocks which will prevent us from getting the
+ * message out (timerlist_lock is acquired through the
+ * console unblank code)
+ */
+void bust_spinlocks(int yes)
+{
+ spin_lock_init(&timerlist_lock);
+ if (yes) {
+ oops_in_progress = 1;
+ } else {
+ int loglevel_save = console_loglevel;
+#ifdef CONFIG_VT
+ unblank_screen();
+#endif
+ oops_in_progress = 0;
+ /*
+ * OK, the message is on the console. Now we call printk()
+ * without oops_in_progress set so that printk will give klogd
+ * a poke. Hold onto your hats...
+ */
+ console_loglevel = 15; /* NMI oopser may have shut the console up */
+ printk(" ");
+ console_loglevel = loglevel_save;
+ }
+}
+
+/*
+ * This routine handles page faults. It determines the address,
+ * and the problem, and then passes it off to one of the appropriate
+ * routines.
+ *
+ * error_code:
+ * bit 0 == 0 means no page found, 1 means protection fault
+ * bit 1 == 0 means read, 1 means write
+ * bit 2 == 0 means kernel, 1 means user-mode
+ */
+asmlinkage void do_page_fault(struct pt_regs *regs,
+ unsigned long error_code,
+ unsigned long address)
+{
+ struct task_struct *tsk = current;
+ struct mm_struct *mm;
+ struct vm_area_struct * vma;
+ unsigned long page;
+ unsigned long fixup;
+ int write;
+ siginfo_t info;
+
+ /* Set the "privileged fault" bit to something sane. */
+ error_code &= 3;
+ error_code |= (regs->xcs & 2) << 1;
+
+#if MMU_UPDATE_DEBUG > 0
+ if ( (error_code == 0) && (address >= TASK_SIZE) )
+ {
+ unsigned long paddr = __pa(address);
+ int i;
+ for ( i = 0; i < mmu_update_queue_idx; i++ )
+ {
+ if ( update_debug_queue[i].ptr == paddr )
+ {
+ printk("XXX now(EIP=%08lx:ptr=%08lx) "
+ "then(%s/%d:p/v=%08lx/%08lx)\n",
+ regs->eip, address,
+ update_debug_queue[i].file,
+ update_debug_queue[i].line,
+ update_debug_queue[i].ptr,
+ update_debug_queue[i].val);
+ }
+ }
+ }
+#endif
+
+ if ( flush_page_update_queue() != 0 ) return;
+
+ /*
+ * We fault-in kernel-space virtual memory on-demand. The
+ * 'reference' page table is init_mm.pgd.
+ *
+ * NOTE! We MUST NOT take any locks for this case. We may
+ * be in an interrupt or a critical region, and should
+ * only copy the information from the master page table,
+ * nothing more.
+ *
+ * This verifies that the fault happens in kernel space
+ * (error_code & 4) == 0, and that the fault was not a
+ * protection error (error_code & 1) == 0.
+ */
+ if (address >= TASK_SIZE && !(error_code & 5))
+ goto vmalloc_fault;
+
+ mm = tsk->mm;
+ info.si_code = SEGV_MAPERR;
+
+ /*
+ * If we're in an interrupt or have no user
+ * context, we must not take the fault..
+ */
+ if (in_interrupt() || !mm)
+ goto no_context;
+
+ down_read(&mm->mmap_sem);
+
+ vma = find_vma(mm, address);
+ if (!vma)
+ goto bad_area;
+ if (vma->vm_start <= address)
+ goto good_area;
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto bad_area;
+ if (error_code & 4) {
+ /*
+ * accessing the stack below %esp is always a bug.
+ * The "+ 32" is there due to some instructions (like
+ * pusha) doing post-decrement on the stack and that
+ * doesn't show up until later..
+ */
+ if (address + 32 < regs->esp)
+ goto bad_area;
+ }
+ if (expand_stack(vma, address))
+ goto bad_area;
+/*
+ * Ok, we have a good vm_area for this memory access, so
+ * we can handle it..
+ */
+good_area:
+ info.si_code = SEGV_ACCERR;
+ write = 0;
+ switch (error_code & 3) {
+ default: /* 3: write, present */
+ /* fall through */
+ case 2: /* write, not present */
+ if (!(vma->vm_flags & VM_WRITE))
+ goto bad_area;
+ write++;
+ break;
+ case 1: /* read, present */
+ goto bad_area;
+ case 0: /* read, not present */
+ if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+ goto bad_area;
+ }
+
+ survive:
+ /*
+ * If for any reason at all we couldn't handle the fault,
+ * make sure we exit gracefully rather than endlessly redo
+ * the fault.
+ */
+ switch (handle_mm_fault(mm, vma, address, write)) {
+ case 1:
+ tsk->min_flt++;
+ break;
+ case 2:
+ tsk->maj_flt++;
+ break;
+ case 0:
+ goto do_sigbus;
+ default:
+ goto out_of_memory;
+ }
+
+ up_read(&mm->mmap_sem);
+ return;
+
+/*
+ * Something tried to access memory that isn't in our memory map..
+ * Fix it, but check if it's kernel or user first..
+ */
+bad_area:
+ up_read(&mm->mmap_sem);
+
+ /* User mode accesses just cause a SIGSEGV */
+ if (error_code & 4) {
+ tsk->thread.cr2 = address;
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_no = 14;
+ info.si_signo = SIGSEGV;
+ info.si_errno = 0;
+ /* info.si_code has been set above */
+ info.si_addr = (void *)address;
+ force_sig_info(SIGSEGV, &info, tsk);
+ return;
+ }
+
+no_context:
+ /* Are we prepared to handle this kernel fault? */
+ if ((fixup = search_exception_table(regs->eip)) != 0) {
+ regs->eip = fixup;
+ return;
+ }
+
+/*
+ * Oops. The kernel tried to access some bad page. We'll have to
+ * terminate things with extreme prejudice.
+ */
+
+ bust_spinlocks(1);
+
+ if (address < PAGE_SIZE)
+ printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
+ else
+ printk(KERN_ALERT "Unable to handle kernel paging request");
+ printk(" at virtual address %08lx\n",address);
+ printk(" printing eip:\n");
+ printk("%08lx\n", regs->eip);
+ page = ((unsigned long *) cur_pgd)[address >> 22];
+ printk(KERN_ALERT "*pde=%08lx(%08lx)\n", page, machine_to_phys(page));
+ if (page & 1) {
+ page &= PAGE_MASK;
+ address &= 0x003ff000;
+ page = machine_to_phys(page);
+ page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT];
+ printk(KERN_ALERT "*pte=%08lx(%08lx)\n", page,
+ machine_to_phys(page));
+ }
+ die("Oops", regs, error_code);
+ bust_spinlocks(0);
+ do_exit(SIGKILL);
+
+/*
+ * We ran out of memory, or some other thing happened to us that made
+ * us unable to handle the page fault gracefully.
+ */
+out_of_memory:
+ if (tsk->pid == 1) {
+ yield();
+ goto survive;
+ }
+ up_read(&mm->mmap_sem);
+ printk("VM: killing process %s\n", tsk->comm);
+ if (error_code & 4)
+ do_exit(SIGKILL);
+ goto no_context;
+
+do_sigbus:
+ up_read(&mm->mmap_sem);
+
+ /*
+ * Send a sigbus, regardless of whether we were in kernel
+ * or user mode.
+ */
+ tsk->thread.cr2 = address;
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_no = 14;
+ info.si_signo = SIGBUS;
+ info.si_errno = 0;
+ info.si_code = BUS_ADRERR;
+ info.si_addr = (void *)address;
+ force_sig_info(SIGBUS, &info, tsk);
+
+ /* Kernel mode? Handle exceptions or die */
+ if (!(error_code & 4))
+ goto no_context;
+ return;
+
+vmalloc_fault:
+ {
+ /*
+ * Synchronize this task's top level page-table
+ * with the 'reference' page table.
+ *
+ * Do _not_ use "tsk" here. We might be inside
+ * an interrupt in the middle of a task switch..
+ */
+ int offset = __pgd_offset(address);
+ pgd_t *pgd, *pgd_k;
+ pmd_t *pmd, *pmd_k;
+ pte_t *pte_k;
+
+ pgd = offset + cur_pgd;
+ pgd_k = init_mm.pgd + offset;
+
+ if (!pgd_present(*pgd_k))
+ goto no_context;
+ set_pgd(pgd, *pgd_k);
+
+ pmd = pmd_offset(pgd, address);
+ pmd_k = pmd_offset(pgd_k, address);
+ if (!pmd_present(*pmd_k))
+ goto no_context;
+ set_pmd(pmd, *pmd_k);
+ XENO_flush_page_update_queue(); /* flush PMD update */
+
+ pte_k = pte_offset(pmd_k, address);
+ if (!pte_present(*pte_k))
+ goto no_context;
+ return;
+ }
+}
diff --git a/xenolinux-2.4.23-sparse/arch/xeno/mm/hypervisor.c b/xenolinux-2.4.23-sparse/arch/xeno/mm/hypervisor.c
new file mode 100644
index 0000000000..b4784ccc02
--- /dev/null
+++ b/xenolinux-2.4.23-sparse/arch/xeno/mm/hypervisor.c
@@ -0,0 +1,237 @@
+/******************************************************************************
+ * xeno/mm/hypervisor.c
+ *
+ * Update page tables via the hypervisor.
+ *
+ * Copyright (c) 2002, K A Fraser
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <asm/hypervisor.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/multicall.h>
+
+/*
+ * This suffices to protect us if we ever move to SMP domains.
+ * Further, it protects us against interrupts. At the very least, this is
+ * required for the network driver which flushes the update queue before
+ * pushing new receive buffers.
+ */
+static spinlock_t update_lock = SPIN_LOCK_UNLOCKED;
+
+#define QUEUE_SIZE 2048
+static mmu_update_t update_queue[QUEUE_SIZE];
+unsigned int mmu_update_queue_idx = 0;
+#define idx mmu_update_queue_idx
+
+#if MMU_UPDATE_DEBUG > 0
+page_update_debug_t update_debug_queue[QUEUE_SIZE] = {{0}};
+#undef queue_l1_entry_update
+#undef queue_l2_entry_update
+static void DEBUG_allow_pt_reads(void)
+{
+ pte_t *pte;
+ mmu_update_t update;
+ int i;
+ for ( i = idx-1; i >= 0; i-- )
+ {
+ pte = update_debug_queue[i].ptep;
+ if ( pte == NULL ) continue;
+ update_debug_queue[i].ptep = NULL;
+ update.ptr = pte;
+ update.val = update_debug_queue[i].pteval;
+ HYPERVISOR_mmu_update(&update, 1);
+ }
+}
+static void DEBUG_disallow_pt_read(unsigned long va)
+{
+ pte_t *pte;
+ pmd_t *pmd;
+ pgd_t *pgd;
+ unsigned long pteval;
+ /*
+ * We may fault because of an already outstanding update.
+ * That's okay -- it'll get fixed up in the fault handler.
+ */
+ mmu_update_t update;
+ pgd = pgd_offset_k(va);
+ pmd = pmd_offset(pgd, va);
+ pte = pte_offset(pmd, va);
+ update.ptr = pte;
+ pteval = *(unsigned long *)pte;
+ update.val = pteval & ~_PAGE_PRESENT;
+ HYPERVISOR_mmu_update(&update, 1);
+ update_debug_queue[idx].ptep = pte;
+ update_debug_queue[idx].pteval = pteval;
+}
+#endif
+
+#if MMU_UPDATE_DEBUG > 1
+#undef queue_pt_switch
+#undef queue_tlb_flush
+#undef queue_invlpg
+#undef queue_pgd_pin
+#undef queue_pgd_unpin
+#undef queue_pte_pin
+#undef queue_pte_unpin
+#endif
+
+
+/*
+ * MULTICALL_flush_page_update_queue:
+ * This is a version of the flush which queues as part of a multicall.
+ */
+void MULTICALL_flush_page_update_queue(void)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ if ( idx != 0 )
+ {
+#if MMU_UPDATE_DEBUG > 1
+ printk("Flushing %d entries from pt update queue\n", idx);
+#endif
+#if MMU_UPDATE_DEBUG > 0
+ DEBUG_allow_pt_reads();
+#endif
+ queue_multicall2(__HYPERVISOR_mmu_update, (unsigned long)update_queue, idx);
+ idx = 0;
+ }
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+static inline void __flush_page_update_queue(void)
+{
+#if MMU_UPDATE_DEBUG > 1
+ printk("Flushing %d entries from pt update queue\n", idx);
+#endif
+#if MMU_UPDATE_DEBUG > 0
+ DEBUG_allow_pt_reads();
+#endif
+ HYPERVISOR_mmu_update(update_queue, idx);
+ idx = 0;
+}
+
+void _flush_page_update_queue(void)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ if ( idx != 0 ) __flush_page_update_queue();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+static inline void increment_index(void)
+{
+ idx++;
+ if ( unlikely(idx == QUEUE_SIZE) ) __flush_page_update_queue();
+}
+
+void queue_l1_entry_update(pte_t *ptr, unsigned long val)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+#if MMU_UPDATE_DEBUG > 0
+ DEBUG_disallow_pt_read((unsigned long)ptr);
+#endif
+ update_queue[idx].ptr = (unsigned long)ptr;
+ update_queue[idx].val = val;
+ increment_index();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void queue_l2_entry_update(pmd_t *ptr, unsigned long val)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ update_queue[idx].ptr = (unsigned long)ptr;
+ update_queue[idx].val = val;
+ increment_index();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void queue_pt_switch(unsigned long ptr)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ update_queue[idx].ptr = phys_to_machine(ptr);
+ update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
+ update_queue[idx].val = MMUEXT_NEW_BASEPTR;
+ increment_index();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void queue_tlb_flush(void)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ update_queue[idx].ptr = MMU_EXTENDED_COMMAND;
+ update_queue[idx].val = MMUEXT_TLB_FLUSH;
+ increment_index();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void queue_invlpg(unsigned long ptr)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ update_queue[idx].ptr = MMU_EXTENDED_COMMAND;
+ update_queue[idx].val = ptr & PAGE_MASK;
+ update_queue[idx].val |= MMUEXT_INVLPG;
+ increment_index();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void queue_pgd_pin(unsigned long ptr)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ update_queue[idx].ptr = phys_to_machine(ptr);
+ update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
+ update_queue[idx].val = MMUEXT_PIN_L2_TABLE;
+ increment_index();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void queue_pgd_unpin(unsigned long ptr)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ update_queue[idx].ptr = phys_to_machine(ptr);
+ update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
+ update_queue[idx].val = MMUEXT_UNPIN_TABLE;
+ increment_index();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void queue_pte_pin(unsigned long ptr)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ update_queue[idx].ptr = phys_to_machine(ptr);
+ update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
+ update_queue[idx].val = MMUEXT_PIN_L1_TABLE;
+ increment_index();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void queue_pte_unpin(unsigned long ptr)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ update_queue[idx].ptr = phys_to_machine(ptr);
+ update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
+ update_queue[idx].val = MMUEXT_UNPIN_TABLE;
+ increment_index();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void queue_set_ldt(unsigned long ptr, unsigned long len)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ update_queue[idx].ptr = MMU_EXTENDED_COMMAND | ptr;
+ update_queue[idx].val = MMUEXT_SET_LDT | (len << MMUEXT_CMD_SHIFT);
+ increment_index();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
diff --git a/xenolinux-2.4.23-sparse/arch/xeno/mm/init.c b/xenolinux-2.4.23-sparse/arch/xeno/mm/init.c
new file mode 100644
index 0000000000..883cd03b37
--- /dev/null
+++ b/xenolinux-2.4.23-sparse/arch/xeno/mm/init.c
@@ -0,0 +1,397 @@
+/*
+ * linux/arch/i386/mm/init.c
+ *
+ * Copyright (C) 1995 Linus Torvalds
+ *
+ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#ifdef CONFIG_BLK_DEV_INITRD
+#include <linux/blk.h>
+#endif
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/bootmem.h>
+#include <linux/slab.h>
+
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/dma.h>
+#include <asm/apic.h>
+#include <asm/tlb.h>
+
+mmu_gather_t mmu_gathers[NR_CPUS];
+unsigned long highstart_pfn, highend_pfn;
+static unsigned long totalram_pages;
+static unsigned long totalhigh_pages;
+
+int do_check_pgt_cache(int low, int high)
+{
+ int freed = 0;
+ if(pgtable_cache_size > high) {
+ do {
+ if (!QUICKLIST_EMPTY(pgd_quicklist)) {
+ free_pgd_slow(get_pgd_fast());
+ freed++;
+ }
+ if (!QUICKLIST_EMPTY(pte_quicklist)) {
+ pte_free_slow(pte_alloc_one_fast(NULL, 0));
+ freed++;
+ }
+ } while(pgtable_cache_size > low);
+ }
+ return freed;
+}
+
+void show_mem(void)
+{
+ int i, total = 0, reserved = 0;
+ int shared = 0, cached = 0;
+ int highmem = 0;
+
+ printk("Mem-info:\n");
+ show_free_areas();
+ printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
+ i = max_mapnr;
+ while (i-- > 0) {
+ total++;
+ if (PageHighMem(mem_map+i))
+ highmem++;
+ if (PageReserved(mem_map+i))
+ reserved++;
+ else if (PageSwapCache(mem_map+i))
+ cached++;
+ else if (page_count(mem_map+i))
+ shared += page_count(mem_map+i) - 1;
+ }
+ printk("%d pages of RAM\n", total);
+ printk("%d pages of HIGHMEM\n",highmem);
+ printk("%d reserved pages\n",reserved);
+ printk("%d pages shared\n",shared);
+ printk("%d pages swap cached\n",cached);
+ printk("%ld pages in page table cache\n",pgtable_cache_size);
+ show_buffers();
+}
+
+/* References to section boundaries */
+
+extern char _text, _etext, _edata, __bss_start, _end;
+extern char __init_begin, __init_end;
+
+static inline void set_pte_phys (unsigned long vaddr,
+ unsigned long phys, pgprot_t prot)
+{
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pgd = init_mm.pgd + __pgd_offset(vaddr);
+ if (pgd_none(*pgd)) {
+ printk("PAE BUG #00!\n");
+ return;
+ }
+ pmd = pmd_offset(pgd, vaddr);
+ if (pmd_none(*pmd)) {
+ printk("PAE BUG #01!\n");
+ return;
+ }
+ pte = pte_offset(pmd, vaddr);
+
+#if 0 /* Not in Xen, since this breaks clear_fixmap. */
+ if (pte_val(*pte))
+ pte_ERROR(*pte);
+#endif
+
+ /* We queue directly, avoiding hidden phys->machine translation. */
+ queue_l1_entry_update(pte, phys | pgprot_val(prot));
+
+ /*
+ * It's enough to flush this one mapping.
+ * (PGE mappings get flushed as well)
+ */
+ __flush_tlb_one(vaddr);
+}
+
+void __set_fixmap(enum fixed_addresses idx, unsigned long phys,
+ pgprot_t flags)
+{
+ unsigned long address = __fix_to_virt(idx);
+
+ if (idx >= __end_of_fixed_addresses) {
+ printk("Invalid __set_fixmap\n");
+ return;
+ }
+ set_pte_phys(address, phys,
+ __pgprot(pgprot_val(PAGE_KERNEL)|pgprot_val(flags)));
+}
+
+void clear_fixmap(enum fixed_addresses idx)
+{
+ set_pte_phys(__fix_to_virt(idx), 0, __pgprot(0));
+}
+
+static void __init fixrange_init (unsigned long start,
+ unsigned long end, pgd_t *pgd_base)
+{
+ pgd_t *pgd, *kpgd;
+ pmd_t *pmd, *kpmd;
+ pte_t *pte, *kpte;
+ int i, j;
+ unsigned long vaddr;
+
+ vaddr = start;
+ i = __pgd_offset(vaddr);
+ j = __pmd_offset(vaddr);
+ pgd = pgd_base + i;
+
+ for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) {
+#if CONFIG_X86_PAE
+ if (pgd_none(*pgd)) {
+ pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+ set_pgd(pgd, __pgd(__pa(pmd) + 0x1));
+ if (pmd != pmd_offset(pgd, 0))
+ printk("PAE BUG #02!\n");
+ }
+ pmd = pmd_offset(pgd, vaddr);
+#else
+ pmd = (pmd_t *)pgd;
+#endif
+ for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) {
+ if (pmd_none(*pmd)) {
+ pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+ clear_page(pte);
+ kpgd = pgd_offset_k((unsigned long)pte);
+ kpmd = pmd_offset(kpgd, (unsigned long)pte);
+ kpte = pte_offset(kpmd, (unsigned long)pte);
+ queue_l1_entry_update(kpte,
+ (*(unsigned long *)kpte)&~_PAGE_RW);
+
+ set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte)));
+ }
+ vaddr += PMD_SIZE;
+ }
+ j = 0;
+ }
+
+ XENO_flush_page_update_queue();
+}
+
+
+static void __init zone_sizes_init(void)
+{
+ unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+ unsigned int max_dma, high, low;
+
+ max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+ low = max_low_pfn;
+ high = highend_pfn;
+
+ if (low < max_dma)
+ zones_size[ZONE_DMA] = low;
+ else {
+ zones_size[ZONE_DMA] = max_dma;
+ zones_size[ZONE_NORMAL] = low - max_dma;
+#ifdef CONFIG_HIGHMEM
+ zones_size[ZONE_HIGHMEM] = high - low;
+#endif
+ }
+ free_area_init(zones_size);
+}
+
+/*
+ * paging_init() sets up the page tables - note that the first 8MB are
+ * already mapped by head.S.
+ *
+ * This routines also unmaps the page at virtual kernel address 0, so
+ * that we can trap those pesky NULL-reference errors in the kernel.
+ */
+void __init paging_init(void)
+{
+ unsigned long vaddr;
+
+ zone_sizes_init();
+
+ /*
+ * Fixed mappings, only the page table structure has to be created -
+ * mappings will be set by set_fixmap():
+ */
+ vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
+ fixrange_init(vaddr, HYPERVISOR_VIRT_START, init_mm.pgd);
+
+ /* Switch to the real shared_info page, and clear the dummy page. */
+ set_fixmap(FIX_SHARED_INFO, start_info.shared_info);
+ HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
+ memset(empty_zero_page, 0, sizeof(empty_zero_page));
+
+#ifdef CONFIG_HIGHMEM
+#error
+ kmap_init();
+#endif
+}
+
+static inline int page_is_ram (unsigned long pagenr)
+{
+ return 1;
+}
+
+#ifdef CONFIG_HIGHMEM
+void __init one_highpage_init(struct page *page, int pfn, int bad_ppro)
+{
+ if (!page_is_ram(pfn)) {
+ SetPageReserved(page);
+ return;
+ }
+
+ if (bad_ppro && page_kills_ppro(pfn)) {
+ SetPageReserved(page);
+ return;
+ }
+
+ ClearPageReserved(page);
+ set_bit(PG_highmem, &page->flags);
+ atomic_set(&page->count, 1);
+ __free_page(page);
+ totalhigh_pages++;
+}
+#endif /* CONFIG_HIGHMEM */
+
+static void __init set_max_mapnr_init(void)
+{
+#ifdef CONFIG_HIGHMEM
+ highmem_start_page = mem_map + highstart_pfn;
+ max_mapnr = num_physpages = highend_pfn;
+ num_mappedpages = max_low_pfn;
+#else
+ max_mapnr = num_mappedpages = num_physpages = max_low_pfn;
+#endif
+}
+
+static int __init free_pages_init(void)
+{
+#ifdef CONFIG_HIGHMEM
+#error Where is this supposed to be initialised?
+ int bad_ppro;
+#endif
+ int reservedpages, pfn;
+
+ /* this will put all low memory onto the freelists */
+ totalram_pages += free_all_bootmem();
+
+ reservedpages = 0;
+ for (pfn = 0; pfn < max_low_pfn; pfn++) {
+ /*
+ * Only count reserved RAM pages
+ */
+ if (page_is_ram(pfn) && PageReserved(mem_map+pfn))
+ reservedpages++;
+ }
+#ifdef CONFIG_HIGHMEM
+ for (pfn = highend_pfn-1; pfn >= highstart_pfn; pfn--)
+ one_highpage_init((struct page *) (mem_map + pfn), pfn, bad_ppro);
+ totalram_pages += totalhigh_pages;
+#endif
+ return reservedpages;
+}
+
+void __init mem_init(void)
+{
+ int codesize, reservedpages, datasize, initsize;
+
+ if (!mem_map)
+ BUG();
+
+ set_max_mapnr_init();
+
+ high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
+
+ /* clear the zero-page */
+ memset(empty_zero_page, 0, PAGE_SIZE);
+
+ reservedpages = free_pages_init();
+
+ codesize = (unsigned long) &_etext - (unsigned long) &_text;
+ datasize = (unsigned long) &_edata - (unsigned long) &_etext;
+ initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
+
+ printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n",
+ (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+ max_mapnr << (PAGE_SHIFT-10),
+ codesize >> 10,
+ reservedpages << (PAGE_SHIFT-10),
+ datasize >> 10,
+ initsize >> 10,
+ (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
+ );
+
+ boot_cpu_data.wp_works_ok = 1;
+}
+
+void free_initmem(void)
+{
+ unsigned long addr;
+
+ addr = (unsigned long)(&__init_begin);
+ for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
+ ClearPageReserved(virt_to_page(addr));
+ set_page_count(virt_to_page(addr), 1);
+ free_page(addr);
+ totalram_pages++;
+ }
+ printk (KERN_INFO "Freeing unused kernel memory: %dk freed\n", (&__init_end - &__init_begin) >> 10);
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+ if (start < end)
+ printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
+ for (; start < end; start += PAGE_SIZE) {
+ ClearPageReserved(virt_to_page(start));
+ set_page_count(virt_to_page(start), 1);
+ free_page(start);
+ totalram_pages++;
+ }
+}
+#endif
+
+void si_meminfo(struct sysinfo *val)
+{
+ val->totalram = totalram_pages;
+ val->sharedram = 0;
+ val->freeram = nr_free_pages();
+ val->bufferram = atomic_read(&buffermem_pages);
+ val->totalhigh = totalhigh_pages;
+ val->freehigh = nr_free_highpages();
+ val->mem_unit = PAGE_SIZE;
+ return;
+}
+
+#if defined(CONFIG_X86_PAE)
+struct kmem_cache_s *pae_pgd_cachep;
+void __init pgtable_cache_init(void)
+{
+ /*
+ * PAE pgds must be 16-byte aligned:
+ */
+ pae_pgd_cachep = kmem_cache_create("pae_pgd", 32, 0,
+ SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, NULL, NULL);
+ if (!pae_pgd_cachep)
+ panic("init_pae(): Cannot alloc pae_pgd SLAB cache");
+}
+#endif /* CONFIG_X86_PAE */
diff --git a/xenolinux-2.4.23-sparse/arch/xeno/mm/ioremap.c b/xenolinux-2.4.23-sparse/arch/xeno/mm/ioremap.c
new file mode 100644
index 0000000000..eac5c6a63c
--- /dev/null
+++ b/xenolinux-2.4.23-sparse/arch/xeno/mm/ioremap.c
@@ -0,0 +1,226 @@
+/*
+ * arch/xeno/mm/ioremap.c
+ *
+ * Re-map IO memory to kernel address space so that we can access it.
+ *
+ * (C) Copyright 1995 1996 Linus Torvalds
+ *
+ * Modifications for Xenolinux (c) 2003 Keir Fraser
+ */
+
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/vmalloc.h>
+#include <asm/io.h>
+#include <asm/pgalloc.h>
+#include <asm/uaccess.h>
+#include <asm/tlb.h>
+#include <asm/mmu.h>
+
+#if defined(CONFIG_XENO_PRIV)
+
+#define direct_set_pte(_p, _v) queue_unchecked_mmu_update((_p), (_v).pte_low)
+#define __direct_pte(x) ((pte_t) { (x) } )
+#define __direct_mk_pte(page_nr,pgprot) \
+ __direct_pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot))
+#define direct_mk_pte_phys(physpage, pgprot) \
+ __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot)
+
+
+static inline void direct_remap_area_pte(pte_t *pte,
+ unsigned long address,
+ unsigned long size,
+ unsigned long machine_addr,
+ pgprot_t prot)
+{
+ unsigned long end;
+
+ address &= ~PMD_MASK;
+ end = address + size;
+ if (end > PMD_SIZE)
+ end = PMD_SIZE;
+ if (address >= end)
+ BUG();
+ do {
+ if (!pte_none(*pte)) {
+ printk("direct_remap_area_pte: page already exists\n");
+ BUG();
+ }
+ direct_set_pte(pte, pte_mkio(direct_mk_pte_phys(machine_addr, prot)));
+ address += PAGE_SIZE;
+ machine_addr += PAGE_SIZE;
+ pte++;
+ } while (address && (address < end));
+}
+
+static inline int direct_remap_area_pmd(struct mm_struct *mm,
+ pmd_t *pmd,
+ unsigned long address,
+ unsigned long size,
+ unsigned long machine_addr,
+ pgprot_t prot)
+{
+ unsigned long end;
+
+ address &= ~PGDIR_MASK;
+ end = address + size;
+ if (end > PGDIR_SIZE)
+ end = PGDIR_SIZE;
+ machine_addr -= address;
+ if (address >= end)
+ BUG();
+ do {
+ pte_t * pte = pte_alloc(mm, pmd, address);
+ if (!pte)
+ return -ENOMEM;
+ direct_remap_area_pte(pte, address, end - address,
+ address + machine_addr, prot);
+ address = (address + PMD_SIZE) & PMD_MASK;
+ pmd++;
+ } while (address && (address < end));
+ return 0;
+}
+
+int direct_remap_area_pages(struct mm_struct *mm,
+ unsigned long address,
+ unsigned long machine_addr,
+ unsigned long size,
+ pgprot_t prot)
+{
+ int error = 0;
+ pgd_t * dir;
+ unsigned long end = address + size;
+
+ machine_addr -= address;
+ dir = pgd_offset(mm, address);
+ flush_cache_all();
+ if (address >= end)
+ BUG();
+ spin_lock(&mm->page_table_lock);
+ do {
+ pmd_t *pmd = pmd_alloc(mm, dir, address);
+ error = -ENOMEM;
+ if (!pmd)
+ break;
+ error = direct_remap_area_pmd(mm, pmd, address, end - address,
+ machine_addr + address, prot);
+ if (error)
+ break;
+ address = (address + PGDIR_SIZE) & PGDIR_MASK;
+ dir++;
+ } while (address && (address < end));
+ spin_unlock(&mm->page_table_lock);
+ flush_tlb_all();
+ return error;
+}
+
+#endif /* CONFIG_XENO_PRIV */
+
+
+/*
+ * Remap an arbitrary machine address space into the kernel virtual
+ * address space. Needed when a privileged instance of Xenolinux wants
+ * to access space outside its world directly.
+ *
+ * NOTE! We need to allow non-page-aligned mappings too: we will obviously
+ * have to convert them into an offset in a page-aligned mapping, but the
+ * caller shouldn't need to know that small detail.
+ */
+void * __ioremap(unsigned long machine_addr,
+ unsigned long size,
+ unsigned long flags)
+{
+#if defined(CONFIG_XENO_PRIV)
+ void * addr;
+ struct vm_struct * area;
+ unsigned long offset, last_addr;
+ pgprot_t prot;
+
+ /* Only privileged Xenolinux can make unchecked pagetable updates. */
+ if ( !(start_info.flags & SIF_PRIVILEGED) )
+ return NULL;
+
+ /* Don't allow wraparound or zero size */
+ last_addr = machine_addr + size - 1;
+ if (!size || last_addr < machine_addr)
+ return NULL;
+
+ /* Mappings have to be page-aligned */
+ offset = machine_addr & ~PAGE_MASK;
+ machine_addr &= PAGE_MASK;
+ size = PAGE_ALIGN(last_addr+1) - machine_addr;
+
+ /* Ok, go for it */
+ area = get_vm_area(size, VM_IOREMAP);
+ if (!area)
+ return NULL;
+ addr = area->addr;
+ prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY |
+ _PAGE_ACCESSED | flags);
+ if (direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(addr),
+ machine_addr, size, prot)) {
+ vfree(addr);
+ return NULL;
+ }
+ return (void *) (offset + (char *)addr);
+#else
+ return NULL;
+#endif
+}
+
+void iounmap(void *addr)
+{
+ vfree((void *)((unsigned long)addr & PAGE_MASK));
+}
+
+/* implementation of boot time ioremap for purpose of provising access
+to the vga console for privileged domains. Unlike boot time ioremap on
+other architectures, ours is permanent and not reclaimed when then vmalloc
+infrastructure is started */
+
+void __init *bt_ioremap(unsigned long machine_addr, unsigned long size)
+{
+ unsigned long offset, last_addr;
+ unsigned int nrpages;
+ enum fixed_addresses idx;
+
+ /* Don't allow wraparound or zero size */
+ last_addr = machine_addr + size - 1;
+ if (!size || last_addr < machine_addr)
+ return NULL;
+
+ /*
+ * Mappings have to be page-aligned
+ */
+ offset = machine_addr & ~PAGE_MASK;
+ machine_addr &= PAGE_MASK;
+ size = PAGE_ALIGN(last_addr) - machine_addr;
+
+ /*
+ * Mappings have to fit in the FIX_BTMAP area.
+ */
+ nrpages = size >> PAGE_SHIFT;
+ if (nrpages > NR_FIX_BTMAPS)
+ return NULL;
+
+ /*
+ * Ok, go for it..
+ */
+ idx = FIX_BTMAP_BEGIN;
+ while (nrpages > 0) {
+ set_fixmap(idx, machine_addr);
+ machine_addr += PAGE_SIZE;
+ --idx;
+ --nrpages;
+ }
+
+ flush_tlb_all();
+
+ return (void*) (offset + fix_to_virt(FIX_BTMAP_BEGIN));
+}
+
+
+#if 0 /* We don't support these functions. They shouldn't be required. */
+void __init bt_iounmap(void *addr, unsigned long size) {}
+#endif