aboutsummaryrefslogtreecommitdiffstats
path: root/linux-2.6-xen-sparse/arch/i386/mm
diff options
context:
space:
mode:
Diffstat (limited to 'linux-2.6-xen-sparse/arch/i386/mm')
-rw-r--r--linux-2.6-xen-sparse/arch/i386/mm/Makefile18
-rw-r--r--linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c769
-rw-r--r--linux-2.6-xen-sparse/arch/i386/mm/highmem-xen.c136
-rw-r--r--linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c432
-rw-r--r--linux-2.6-xen-sparse/arch/i386/mm/init-xen.c850
-rw-r--r--linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c443
-rw-r--r--linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c727
7 files changed, 0 insertions, 3375 deletions
diff --git a/linux-2.6-xen-sparse/arch/i386/mm/Makefile b/linux-2.6-xen-sparse/arch/i386/mm/Makefile
deleted file mode 100644
index 2b33b20038..0000000000
--- a/linux-2.6-xen-sparse/arch/i386/mm/Makefile
+++ /dev/null
@@ -1,18 +0,0 @@
-#
-# Makefile for the linux i386-specific parts of the memory manager.
-#
-
-obj-y := init.o pgtable.o fault.o ioremap.o extable.o pageattr.o mmap.o
-
-obj-$(CONFIG_NUMA) += discontig.o
-obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
-obj-$(CONFIG_HIGHMEM) += highmem.o
-obj-$(CONFIG_BOOT_IOREMAP) += boot_ioremap.o
-
-ifdef CONFIG_XEN
-include $(srctree)/scripts/Makefile.xen
-
-obj-y += hypervisor.o
-
-obj-y := $(call cherrypickxen, $(obj-y))
-endif
diff --git a/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c b/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c
deleted file mode 100644
index 78639201bc..0000000000
--- a/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c
+++ /dev/null
@@ -1,769 +0,0 @@
-/*
- * linux/arch/i386/mm/fault.c
- *
- * Copyright (C) 1995 Linus Torvalds
- */
-
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/tty.h>
-#include <linux/vt_kern.h> /* For unblank_screen() */
-#include <linux/highmem.h>
-#include <linux/module.h>
-#include <linux/kprobes.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/desc.h>
-#include <asm/kdebug.h>
-
-extern void die(const char *,struct pt_regs *,long);
-
-#ifdef CONFIG_KPROBES
-ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
-int register_page_fault_notifier(struct notifier_block *nb)
-{
- vmalloc_sync_all();
- return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
-}
-
-int unregister_page_fault_notifier(struct notifier_block *nb)
-{
- return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
-}
-
-static inline int notify_page_fault(enum die_val val, const char *str,
- struct pt_regs *regs, long err, int trap, int sig)
-{
- struct die_args args = {
- .regs = regs,
- .str = str,
- .err = err,
- .trapnr = trap,
- .signr = sig
- };
- return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
-}
-#else
-static inline int notify_page_fault(enum die_val val, const char *str,
- struct pt_regs *regs, long err, int trap, int sig)
-{
- return NOTIFY_DONE;
-}
-#endif
-
-
-/*
- * Unlock any spinlocks which will prevent us from getting the
- * message out
- */
-void bust_spinlocks(int yes)
-{
- int loglevel_save = console_loglevel;
-
- if (yes) {
- oops_in_progress = 1;
- return;
- }
-#ifdef CONFIG_VT
- unblank_screen();
-#endif
- oops_in_progress = 0;
- /*
- * OK, the message is on the console. Now we call printk()
- * without oops_in_progress set so that printk will give klogd
- * a poke. Hold onto your hats...
- */
- console_loglevel = 15; /* NMI oopser may have shut the console up */
- printk(" ");
- console_loglevel = loglevel_save;
-}
-
-/*
- * Return EIP plus the CS segment base. The segment limit is also
- * adjusted, clamped to the kernel/user address space (whichever is
- * appropriate), and returned in *eip_limit.
- *
- * The segment is checked, because it might have been changed by another
- * task between the original faulting instruction and here.
- *
- * If CS is no longer a valid code segment, or if EIP is beyond the
- * limit, or if it is a kernel address when CS is not a kernel segment,
- * then the returned value will be greater than *eip_limit.
- *
- * This is slow, but is very rarely executed.
- */
-static inline unsigned long get_segment_eip(struct pt_regs *regs,
- unsigned long *eip_limit)
-{
- unsigned long eip = regs->eip;
- unsigned seg = regs->xcs & 0xffff;
- u32 seg_ar, seg_limit, base, *desc;
-
- /* Unlikely, but must come before segment checks. */
- if (unlikely(regs->eflags & VM_MASK)) {
- base = seg << 4;
- *eip_limit = base + 0xffff;
- return base + (eip & 0xffff);
- }
-
- /* The standard kernel/user address space limit. */
- *eip_limit = (seg & 2) ? USER_DS.seg : KERNEL_DS.seg;
-
- /* By far the most common cases. */
- if (likely(seg == __USER_CS || seg == GET_KERNEL_CS()))
- return eip;
-
- /* Check the segment exists, is within the current LDT/GDT size,
- that kernel/user (ring 0..3) has the appropriate privilege,
- that it's a code segment, and get the limit. */
- __asm__ ("larl %3,%0; lsll %3,%1"
- : "=&r" (seg_ar), "=r" (seg_limit) : "0" (0), "rm" (seg));
- if ((~seg_ar & 0x9800) || eip > seg_limit) {
- *eip_limit = 0;
- return 1; /* So that returned eip > *eip_limit. */
- }
-
- /* Get the GDT/LDT descriptor base.
- When you look for races in this code remember that
- LDT and other horrors are only used in user space. */
- if (seg & (1<<2)) {
- /* Must lock the LDT while reading it. */
- down(&current->mm->context.sem);
- desc = current->mm->context.ldt;
- desc = (void *)desc + (seg & ~7);
- } else {
- /* Must disable preemption while reading the GDT. */
- desc = (u32 *)get_cpu_gdt_table(get_cpu());
- desc = (void *)desc + (seg & ~7);
- }
-
- /* Decode the code segment base from the descriptor */
- base = get_desc_base((unsigned long *)desc);
-
- if (seg & (1<<2)) {
- up(&current->mm->context.sem);
- } else
- put_cpu();
-
- /* Adjust EIP and segment limit, and clamp at the kernel limit.
- It's legitimate for segments to wrap at 0xffffffff. */
- seg_limit += base;
- if (seg_limit < *eip_limit && seg_limit >= base)
- *eip_limit = seg_limit;
- return eip + base;
-}
-
-/*
- * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
- * Check that here and ignore it.
- */
-static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
-{
- unsigned long limit;
- unsigned long instr = get_segment_eip (regs, &limit);
- int scan_more = 1;
- int prefetch = 0;
- int i;
-
- for (i = 0; scan_more && i < 15; i++) {
- unsigned char opcode;
- unsigned char instr_hi;
- unsigned char instr_lo;
-
- if (instr > limit)
- break;
- if (__get_user(opcode, (unsigned char __user *) instr))
- break;
-
- instr_hi = opcode & 0xf0;
- instr_lo = opcode & 0x0f;
- instr++;
-
- switch (instr_hi) {
- case 0x20:
- case 0x30:
- /* Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes. */
- scan_more = ((instr_lo & 7) == 0x6);
- break;
-
- case 0x60:
- /* 0x64 thru 0x67 are valid prefixes in all modes. */
- scan_more = (instr_lo & 0xC) == 0x4;
- break;
- case 0xF0:
- /* 0xF0, 0xF2, and 0xF3 are valid prefixes */
- scan_more = !instr_lo || (instr_lo>>1) == 1;
- break;
- case 0x00:
- /* Prefetch instruction is 0x0F0D or 0x0F18 */
- scan_more = 0;
- if (instr > limit)
- break;
- if (__get_user(opcode, (unsigned char __user *) instr))
- break;
- prefetch = (instr_lo == 0xF) &&
- (opcode == 0x0D || opcode == 0x18);
- break;
- default:
- scan_more = 0;
- break;
- }
- }
- return prefetch;
-}
-
-static inline int is_prefetch(struct pt_regs *regs, unsigned long addr,
- unsigned long error_code)
-{
- if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
- boot_cpu_data.x86 >= 6)) {
- /* Catch an obscure case of prefetch inside an NX page. */
- if (nx_enabled && (error_code & 16))
- return 0;
- return __is_prefetch(regs, addr);
- }
- return 0;
-}
-
-static noinline void force_sig_info_fault(int si_signo, int si_code,
- unsigned long address, struct task_struct *tsk)
-{
- siginfo_t info;
-
- info.si_signo = si_signo;
- info.si_errno = 0;
- info.si_code = si_code;
- info.si_addr = (void __user *)address;
- force_sig_info(si_signo, &info, tsk);
-}
-
-fastcall void do_invalid_op(struct pt_regs *, unsigned long);
-
-#ifdef CONFIG_X86_PAE
-static void dump_fault_path(unsigned long address)
-{
- unsigned long *p, page;
- unsigned long mfn;
-
- page = read_cr3();
- p = (unsigned long *)__va(page);
- p += (address >> 30) * 2;
- printk(KERN_ALERT "%08lx -> *pde = %08lx:%08lx\n", page, p[1], p[0]);
- if (p[0] & 1) {
- mfn = (p[0] >> PAGE_SHIFT) | (p[1] << 20);
- page = mfn_to_pfn(mfn) << PAGE_SHIFT;
- p = (unsigned long *)__va(page);
- address &= 0x3fffffff;
- p += (address >> 21) * 2;
- printk(KERN_ALERT "%08lx -> *pme = %08lx:%08lx\n",
- page, p[1], p[0]);
- mfn = (p[0] >> PAGE_SHIFT) | (p[1] << 20);
-#ifdef CONFIG_HIGHPTE
- if (mfn_to_pfn(mfn) >= highstart_pfn)
- return;
-#endif
- if (p[0] & 1) {
- page = mfn_to_pfn(mfn) << PAGE_SHIFT;
- p = (unsigned long *) __va(page);
- address &= 0x001fffff;
- p += (address >> 12) * 2;
- printk(KERN_ALERT "%08lx -> *pte = %08lx:%08lx\n",
- page, p[1], p[0]);
- }
- }
-}
-#else
-static void dump_fault_path(unsigned long address)
-{
- unsigned long page;
-
- page = read_cr3();
- page = ((unsigned long *) __va(page))[address >> 22];
- if (oops_may_print())
- printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page,
- machine_to_phys(page));
- /*
- * We must not directly access the pte in the highpte
- * case if the page table is located in highmem.
- * And lets rather not kmap-atomic the pte, just in case
- * it's allocated already.
- */
-#ifdef CONFIG_HIGHPTE
- if ((page >> PAGE_SHIFT) >= highstart_pfn)
- return;
-#endif
- if ((page & 1) && oops_may_print()) {
- page &= PAGE_MASK;
- address &= 0x003ff000;
- page = machine_to_phys(page);
- page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT];
- printk(KERN_ALERT "*pte = ma %08lx pa %08lx\n", page,
- machine_to_phys(page));
- }
-}
-#endif
-
-static int spurious_fault(struct pt_regs *regs,
- unsigned long address,
- unsigned long error_code)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
-
- /* Reserved-bit violation or user access to kernel space? */
- if (error_code & 0x0c)
- return 0;
-
- pgd = init_mm.pgd + pgd_index(address);
- if (!pgd_present(*pgd))
- return 0;
-
- pud = pud_offset(pgd, address);
- if (!pud_present(*pud))
- return 0;
-
- pmd = pmd_offset(pud, address);
- if (!pmd_present(*pmd))
- return 0;
-
- pte = pte_offset_kernel(pmd, address);
- if (!pte_present(*pte))
- return 0;
- if ((error_code & 0x02) && !pte_write(*pte))
- return 0;
-#ifdef CONFIG_X86_PAE
- if ((error_code & 0x10) && (pte_val(*pte) & _PAGE_NX))
- return 0;
-#endif
-
- return 1;
-}
-
-static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
-{
- unsigned index = pgd_index(address);
- pgd_t *pgd_k;
- pud_t *pud, *pud_k;
- pmd_t *pmd, *pmd_k;
-
- pgd += index;
- pgd_k = init_mm.pgd + index;
-
- if (!pgd_present(*pgd_k))
- return NULL;
-
- /*
- * set_pgd(pgd, *pgd_k); here would be useless on PAE
- * and redundant with the set_pmd() on non-PAE. As would
- * set_pud.
- */
-
- pud = pud_offset(pgd, address);
- pud_k = pud_offset(pgd_k, address);
- if (!pud_present(*pud_k))
- return NULL;
-
- pmd = pmd_offset(pud, address);
- pmd_k = pmd_offset(pud_k, address);
- if (!pmd_present(*pmd_k))
- return NULL;
- if (!pmd_present(*pmd))
-#ifndef CONFIG_XEN
- set_pmd(pmd, *pmd_k);
-#else
- /*
- * When running on Xen we must launder *pmd_k through
- * pmd_val() to ensure that _PAGE_PRESENT is correctly set.
- */
- set_pmd(pmd, __pmd(pmd_val(*pmd_k)));
-#endif
- else
- BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
- return pmd_k;
-}
-
-/*
- * Handle a fault on the vmalloc or module mapping area
- *
- * This assumes no large pages in there.
- */
-static inline int vmalloc_fault(unsigned long address)
-{
- unsigned long pgd_paddr;
- pmd_t *pmd_k;
- pte_t *pte_k;
- /*
- * Synchronize this task's top level page-table
- * with the 'reference' page table.
- *
- * Do _not_ use "current" here. We might be inside
- * an interrupt in the middle of a task switch..
- */
- pgd_paddr = read_cr3();
- pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
- if (!pmd_k)
- return -1;
- pte_k = pte_offset_kernel(pmd_k, address);
- if (!pte_present(*pte_k))
- return -1;
- return 0;
-}
-
-/*
- * This routine handles page faults. It determines the address,
- * and the problem, and then passes it off to one of the appropriate
- * routines.
- *
- * error_code:
- * bit 0 == 0 means no page found, 1 means protection fault
- * bit 1 == 0 means read, 1 means write
- * bit 2 == 0 means kernel, 1 means user-mode
- * bit 3 == 1 means use of reserved bit detected
- * bit 4 == 1 means fault was an instruction fetch
- */
-fastcall void __kprobes do_page_fault(struct pt_regs *regs,
- unsigned long error_code)
-{
- struct task_struct *tsk;
- struct mm_struct *mm;
- struct vm_area_struct * vma;
- unsigned long address;
- int write, si_code;
-
- /* get the address */
- address = read_cr2();
-
- /* Set the "privileged fault" bit to something sane. */
- error_code &= ~4;
- error_code |= (regs->xcs & 2) << 1;
- if (regs->eflags & X86_EFLAGS_VM)
- error_code |= 4;
-
- tsk = current;
-
- si_code = SEGV_MAPERR;
-
- /*
- * We fault-in kernel-space virtual memory on-demand. The
- * 'reference' page table is init_mm.pgd.
- *
- * NOTE! We MUST NOT take any locks for this case. We may
- * be in an interrupt or a critical region, and should
- * only copy the information from the master page table,
- * nothing more.
- *
- * This verifies that the fault happens in kernel space
- * (error_code & 4) == 0, and that the fault was not a
- * protection error (error_code & 9) == 0.
- */
- if (unlikely(address >= TASK_SIZE)) {
-#ifdef CONFIG_XEN
- /* Faults in hypervisor area can never be patched up. */
- if (address >= hypervisor_virt_start)
- goto bad_area_nosemaphore;
-#endif
- if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0)
- return;
- /* Can take a spurious fault if mapping changes R/O -> R/W. */
- if (spurious_fault(regs, address, error_code))
- return;
- if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
- SIGSEGV) == NOTIFY_STOP)
- return;
- /*
- * Don't take the mm semaphore here. If we fixup a prefetch
- * fault we could otherwise deadlock.
- */
- goto bad_area_nosemaphore;
- }
-
- if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
- SIGSEGV) == NOTIFY_STOP)
- return;
-
- /* It's safe to allow irq's after cr2 has been saved and the vmalloc
- fault has been handled. */
- if (regs->eflags & (X86_EFLAGS_IF|VM_MASK))
- local_irq_enable();
-
- mm = tsk->mm;
-
- /*
- * If we're in an interrupt, have no user context or are running in an
- * atomic region then we must not take the fault..
- */
- if (in_atomic() || !mm)
- goto bad_area_nosemaphore;
-
- /* When running in the kernel we expect faults to occur only to
- * addresses in user space. All other faults represent errors in the
- * kernel and should generate an OOPS. Unfortunatly, in the case of an
- * erroneous fault occurring in a code path which already holds mmap_sem
- * we will deadlock attempting to validate the fault against the
- * address space. Luckily the kernel only validly references user
- * space from well defined areas of code, which are listed in the
- * exceptions table.
- *
- * As the vast majority of faults will be valid we will only perform
- * the source reference check when there is a possibilty of a deadlock.
- * Attempt to lock the address space, if we cannot we then validate the
- * source. If this is invalid we can skip the address space check,
- * thus avoiding the deadlock.
- */
- if (!down_read_trylock(&mm->mmap_sem)) {
- if ((error_code & 4) == 0 &&
- !search_exception_tables(regs->eip))
- goto bad_area_nosemaphore;
- down_read(&mm->mmap_sem);
- }
-
- vma = find_vma(mm, address);
- if (!vma)
- goto bad_area;
- if (vma->vm_start <= address)
- goto good_area;
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto bad_area;
- if (error_code & 4) {
- /*
- * Accessing the stack below %esp is always a bug.
- * The large cushion allows instructions like enter
- * and pusha to work. ("enter $65535,$31" pushes
- * 32 pointers and then decrements %esp by 65535.)
- */
- if (address + 65536 + 32 * sizeof(unsigned long) < regs->esp)
- goto bad_area;
- }
- if (expand_stack(vma, address))
- goto bad_area;
-/*
- * Ok, we have a good vm_area for this memory access, so
- * we can handle it..
- */
-good_area:
- si_code = SEGV_ACCERR;
- write = 0;
- switch (error_code & 3) {
- default: /* 3: write, present */
-#ifdef TEST_VERIFY_AREA
- if (regs->cs == GET_KERNEL_CS())
- printk("WP fault at %08lx\n", regs->eip);
-#endif
- /* fall through */
- case 2: /* write, not present */
- if (!(vma->vm_flags & VM_WRITE))
- goto bad_area;
- write++;
- break;
- case 1: /* read, present */
- goto bad_area;
- case 0: /* read, not present */
- if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
- goto bad_area;
- }
-
- survive:
- /*
- * If for any reason at all we couldn't handle the fault,
- * make sure we exit gracefully rather than endlessly redo
- * the fault.
- */
- switch (handle_mm_fault(mm, vma, address, write)) {
- case VM_FAULT_MINOR:
- tsk->min_flt++;
- break;
- case VM_FAULT_MAJOR:
- tsk->maj_flt++;
- break;
- case VM_FAULT_SIGBUS:
- goto do_sigbus;
- case VM_FAULT_OOM:
- goto out_of_memory;
- default:
- BUG();
- }
-
- /*
- * Did it hit the DOS screen memory VA from vm86 mode?
- */
- if (regs->eflags & VM_MASK) {
- unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT;
- if (bit < 32)
- tsk->thread.screen_bitmap |= 1 << bit;
- }
- up_read(&mm->mmap_sem);
- return;
-
-/*
- * Something tried to access memory that isn't in our memory map..
- * Fix it, but check if it's kernel or user first..
- */
-bad_area:
- up_read(&mm->mmap_sem);
-
-bad_area_nosemaphore:
- /* User mode accesses just cause a SIGSEGV */
- if (error_code & 4) {
- /*
- * Valid to do another page fault here because this one came
- * from user space.
- */
- if (is_prefetch(regs, address, error_code))
- return;
-
- tsk->thread.cr2 = address;
- /* Kernel addresses are always protection faults */
- tsk->thread.error_code = error_code | (address >= TASK_SIZE);
- tsk->thread.trap_no = 14;
- force_sig_info_fault(SIGSEGV, si_code, address, tsk);
- return;
- }
-
-#ifdef CONFIG_X86_F00F_BUG
- /*
- * Pentium F0 0F C7 C8 bug workaround.
- */
- if (boot_cpu_data.f00f_bug) {
- unsigned long nr;
-
- nr = (address - idt_descr.address) >> 3;
-
- if (nr == 6) {
- do_invalid_op(regs, 0);
- return;
- }
- }
-#endif
-
-no_context:
- /* Are we prepared to handle this kernel fault? */
- if (fixup_exception(regs))
- return;
-
- /*
- * Valid to do another page fault here, because if this fault
- * had been triggered by is_prefetch fixup_exception would have
- * handled it.
- */
- if (is_prefetch(regs, address, error_code))
- return;
-
-/*
- * Oops. The kernel tried to access some bad page. We'll have to
- * terminate things with extreme prejudice.
- */
-
- bust_spinlocks(1);
-
- if (oops_may_print()) {
- #ifdef CONFIG_X86_PAE
- if (error_code & 16) {
- pte_t *pte = lookup_address(address);
-
- if (pte && pte_present(*pte) && !pte_exec_kernel(*pte))
- printk(KERN_CRIT "kernel tried to execute "
- "NX-protected page - exploit attempt? "
- "(uid: %d)\n", current->uid);
- }
- #endif
- if (address < PAGE_SIZE)
- printk(KERN_ALERT "BUG: unable to handle kernel NULL "
- "pointer dereference");
- else
- printk(KERN_ALERT "BUG: unable to handle kernel paging"
- " request");
- printk(" at virtual address %08lx\n",address);
- printk(KERN_ALERT " printing eip:\n");
- printk("%08lx\n", regs->eip);
- }
- dump_fault_path(address);
- tsk->thread.cr2 = address;
- tsk->thread.trap_no = 14;
- tsk->thread.error_code = error_code;
- die("Oops", regs, error_code);
- bust_spinlocks(0);
- do_exit(SIGKILL);
-
-/*
- * We ran out of memory, or some other thing happened to us that made
- * us unable to handle the page fault gracefully.
- */
-out_of_memory:
- up_read(&mm->mmap_sem);
- if (tsk->pid == 1) {
- yield();
- down_read(&mm->mmap_sem);
- goto survive;
- }
- printk("VM: killing process %s\n", tsk->comm);
- if (error_code & 4)
- do_exit(SIGKILL);
- goto no_context;
-
-do_sigbus:
- up_read(&mm->mmap_sem);
-
- /* Kernel mode? Handle exceptions or die */
- if (!(error_code & 4))
- goto no_context;
-
- /* User space => ok to do another page fault */
- if (is_prefetch(regs, address, error_code))
- return;
-
- tsk->thread.cr2 = address;
- tsk->thread.error_code = error_code;
- tsk->thread.trap_no = 14;
- force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
-}
-
-#if !HAVE_SHARED_KERNEL_PMD
-void vmalloc_sync_all(void)
-{
- /*
- * Note that races in the updates of insync and start aren't
- * problematic: insync can only get set bits added, and updates to
- * start are only improving performance (without affecting correctness
- * if undone).
- */
- static DECLARE_BITMAP(insync, PTRS_PER_PGD);
- static unsigned long start = TASK_SIZE;
- unsigned long address;
-
- BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
- for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) {
- if (!test_bit(pgd_index(address), insync)) {
- unsigned long flags;
- struct page *page;
-
- spin_lock_irqsave(&pgd_lock, flags);
- for (page = pgd_list; page; page =
- (struct page *)page->index)
- if (!vmalloc_sync_one(page_address(page),
- address)) {
- BUG_ON(page != pgd_list);
- break;
- }
- spin_unlock_irqrestore(&pgd_lock, flags);
- if (!page)
- set_bit(pgd_index(address), insync);
- }
- if (address == start && test_bit(pgd_index(address), insync))
- start = address + PGDIR_SIZE;
- }
-}
-#endif
diff --git a/linux-2.6-xen-sparse/arch/i386/mm/highmem-xen.c b/linux-2.6-xen-sparse/arch/i386/mm/highmem-xen.c
deleted file mode 100644
index 20838cce53..0000000000
--- a/linux-2.6-xen-sparse/arch/i386/mm/highmem-xen.c
+++ /dev/null
@@ -1,136 +0,0 @@
-#include <linux/highmem.h>
-#include <linux/module.h>
-
-void *kmap(struct page *page)
-{
- might_sleep();
- if (!PageHighMem(page))
- return page_address(page);
- return kmap_high(page);
-}
-
-void kunmap(struct page *page)
-{
- if (in_interrupt())
- BUG();
- if (!PageHighMem(page))
- return;
- kunmap_high(page);
-}
-
-/*
- * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because
- * no global lock is needed and because the kmap code must perform a global TLB
- * invalidation when the kmap pool wraps.
- *
- * However when holding an atomic kmap is is not legal to sleep, so atomic
- * kmaps are appropriate for short, tight code paths only.
- */
-static void *__kmap_atomic(struct page *page, enum km_type type, pgprot_t prot)
-{
- enum fixed_addresses idx;
- unsigned long vaddr;
-
- /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
- inc_preempt_count();
- if (!PageHighMem(page))
- return page_address(page);
-
- idx = type + KM_TYPE_NR*smp_processor_id();
- vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-#ifdef CONFIG_DEBUG_HIGHMEM
- if (!pte_none(*(kmap_pte-idx)))
- BUG();
-#endif
- set_pte_at_sync(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot));
-
- return (void*) vaddr;
-}
-
-void *kmap_atomic(struct page *page, enum km_type type)
-{
- return __kmap_atomic(page, type, kmap_prot);
-}
-
-/* Same as kmap_atomic but with PAGE_KERNEL_RO page protection. */
-void *kmap_atomic_pte(struct page *page, enum km_type type)
-{
- return __kmap_atomic(page, type,
- test_bit(PG_pinned, &page->flags)
- ? PAGE_KERNEL_RO : kmap_prot);
-}
-
-void kunmap_atomic(void *kvaddr, enum km_type type)
-{
-#if defined(CONFIG_DEBUG_HIGHMEM) || defined(CONFIG_XEN)
- unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
- enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
-
- if (vaddr < FIXADDR_START) { // FIXME
- dec_preempt_count();
- preempt_check_resched();
- return;
- }
-#endif
-
-#if defined(CONFIG_DEBUG_HIGHMEM)
- if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx))
- BUG();
-
- /*
- * force other mappings to Oops if they'll try to access
- * this pte without first remap it
- */
- pte_clear(&init_mm, vaddr, kmap_pte-idx);
- __flush_tlb_one(vaddr);
-#elif defined(CONFIG_XEN)
- /*
- * We must ensure there are no dangling pagetable references when
- * returning memory to Xen (decrease_reservation).
- * XXX TODO: We could make this faster by only zapping when
- * kmap_flush_unused is called but that is trickier and more invasive.
- */
- pte_clear(&init_mm, vaddr, kmap_pte-idx);
-#endif
-
- dec_preempt_count();
- preempt_check_resched();
-}
-
-/* This is the same as kmap_atomic() but can map memory that doesn't
- * have a struct page associated with it.
- */
-void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
-{
- enum fixed_addresses idx;
- unsigned long vaddr;
-
- inc_preempt_count();
-
- idx = type + KM_TYPE_NR*smp_processor_id();
- vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
- set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot));
- __flush_tlb_one(vaddr);
-
- return (void*) vaddr;
-}
-
-struct page *kmap_atomic_to_page(void *ptr)
-{
- unsigned long idx, vaddr = (unsigned long)ptr;
- pte_t *pte;
-
- if (vaddr < FIXADDR_START)
- return virt_to_page(ptr);
-
- idx = virt_to_fix(vaddr);
- pte = kmap_pte - (idx - FIX_KMAP_BEGIN);
- return pte_page(*pte);
-}
-
-EXPORT_SYMBOL(kmap);
-EXPORT_SYMBOL(kunmap);
-EXPORT_SYMBOL(kmap_atomic);
-EXPORT_SYMBOL(kmap_atomic_pte);
-EXPORT_SYMBOL(kunmap_atomic);
-EXPORT_SYMBOL(kmap_atomic_to_page);
diff --git a/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c b/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c
deleted file mode 100644
index 3222b9d5ae..0000000000
--- a/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c
+++ /dev/null
@@ -1,432 +0,0 @@
-/******************************************************************************
- * mm/hypervisor.c
- *
- * Update page tables via the hypervisor.
- *
- * Copyright (c) 2002-2004, K A Fraser
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/vmalloc.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/hypervisor.h>
-#include <xen/balloon.h>
-#include <xen/features.h>
-#include <xen/interface/memory.h>
-#include <linux/module.h>
-#include <linux/percpu.h>
-#include <asm/tlbflush.h>
-
-void xen_l1_entry_update(pte_t *ptr, pte_t val)
-{
- mmu_update_t u;
- u.ptr = virt_to_machine(ptr);
- u.val = __pte_val(val);
- BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
-}
-
-void xen_l2_entry_update(pmd_t *ptr, pmd_t val)
-{
- mmu_update_t u;
- u.ptr = virt_to_machine(ptr);
- u.val = __pmd_val(val);
- BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
-}
-
-#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
-void xen_l3_entry_update(pud_t *ptr, pud_t val)
-{
- mmu_update_t u;
- u.ptr = virt_to_machine(ptr);
- u.val = __pud_val(val);
- BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
-}
-#endif
-
-#ifdef CONFIG_X86_64
-void xen_l4_entry_update(pgd_t *ptr, pgd_t val)
-{
- mmu_update_t u;
- u.ptr = virt_to_machine(ptr);
- u.val = __pgd_val(val);
- BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
-}
-#endif /* CONFIG_X86_64 */
-
-void xen_pt_switch(unsigned long ptr)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_NEW_BASEPTR;
- op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
- BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void xen_new_user_pt(unsigned long ptr)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_NEW_USER_BASEPTR;
- op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
- BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void xen_tlb_flush(void)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
- BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-EXPORT_SYMBOL(xen_tlb_flush);
-
-void xen_invlpg(unsigned long ptr)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_INVLPG_LOCAL;
- op.arg1.linear_addr = ptr & PAGE_MASK;
- BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-EXPORT_SYMBOL(xen_invlpg);
-
-#ifdef CONFIG_SMP
-
-void xen_tlb_flush_all(void)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_TLB_FLUSH_ALL;
- BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void xen_tlb_flush_mask(cpumask_t *mask)
-{
- struct mmuext_op op;
- if ( cpus_empty(*mask) )
- return;
- op.cmd = MMUEXT_TLB_FLUSH_MULTI;
- set_xen_guest_handle(op.arg2.vcpumask, mask->bits);
- BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void xen_invlpg_all(unsigned long ptr)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_INVLPG_ALL;
- op.arg1.linear_addr = ptr & PAGE_MASK;
- BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void xen_invlpg_mask(cpumask_t *mask, unsigned long ptr)
-{
- struct mmuext_op op;
- if ( cpus_empty(*mask) )
- return;
- op.cmd = MMUEXT_INVLPG_MULTI;
- op.arg1.linear_addr = ptr & PAGE_MASK;
- set_xen_guest_handle(op.arg2.vcpumask, mask->bits);
- BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-#endif /* CONFIG_SMP */
-
-void xen_pgd_pin(unsigned long ptr)
-{
- struct mmuext_op op;
-#ifdef CONFIG_X86_64
- op.cmd = MMUEXT_PIN_L4_TABLE;
-#elif defined(CONFIG_X86_PAE)
- op.cmd = MMUEXT_PIN_L3_TABLE;
-#else
- op.cmd = MMUEXT_PIN_L2_TABLE;
-#endif
- op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
- BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void xen_pgd_unpin(unsigned long ptr)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_UNPIN_TABLE;
- op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
- BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void xen_set_ldt(unsigned long ptr, unsigned long len)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_SET_LDT;
- op.arg1.linear_addr = ptr;
- op.arg2.nr_ents = len;
- BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-/*
- * Bitmap is indexed by page number. If bit is set, the page is part of a
- * xen_create_contiguous_region() area of memory.
- */
-unsigned long *contiguous_bitmap;
-
-static void contiguous_bitmap_set(
- unsigned long first_page, unsigned long nr_pages)
-{
- unsigned long start_off, end_off, curr_idx, end_idx;
-
- curr_idx = first_page / BITS_PER_LONG;
- start_off = first_page & (BITS_PER_LONG-1);
- end_idx = (first_page + nr_pages) / BITS_PER_LONG;
- end_off = (first_page + nr_pages) & (BITS_PER_LONG-1);
-
- if (curr_idx == end_idx) {
- contiguous_bitmap[curr_idx] |=
- ((1UL<<end_off)-1) & -(1UL<<start_off);
- } else {
- contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
- while ( ++curr_idx < end_idx )
- contiguous_bitmap[curr_idx] = ~0UL;
- contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
- }
-}
-
-static void contiguous_bitmap_clear(
- unsigned long first_page, unsigned long nr_pages)
-{
- unsigned long start_off, end_off, curr_idx, end_idx;
-
- curr_idx = first_page / BITS_PER_LONG;
- start_off = first_page & (BITS_PER_LONG-1);
- end_idx = (first_page + nr_pages) / BITS_PER_LONG;
- end_off = (first_page + nr_pages) & (BITS_PER_LONG-1);
-
- if (curr_idx == end_idx) {
- contiguous_bitmap[curr_idx] &=
- -(1UL<<end_off) | ((1UL<<start_off)-1);
- } else {
- contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
- while ( ++curr_idx != end_idx )
- contiguous_bitmap[curr_idx] = 0;
- contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
- }
-}
-
-/* Protected by balloon_lock. */
-#define MAX_CONTIG_ORDER 9 /* 2MB */
-static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
-static multicall_entry_t cr_mcl[1<<MAX_CONTIG_ORDER];
-
-/* Ensure multi-page extents are contiguous in machine memory. */
-int xen_create_contiguous_region(
- unsigned long vstart, unsigned int order, unsigned int address_bits)
-{
- unsigned long *in_frames = discontig_frames, out_frame;
- unsigned long frame, i, flags;
- long rc;
- int success;
- struct xen_memory_exchange exchange = {
- .in = {
- .nr_extents = 1UL << order,
- .extent_order = 0,
- .domid = DOMID_SELF
- },
- .out = {
- .nr_extents = 1,
- .extent_order = order,
- .address_bits = address_bits,
- .domid = DOMID_SELF
- }
- };
-
- /*
- * Currently an auto-translated guest will not perform I/O, nor will
- * it require PAE page directories below 4GB. Therefore any calls to
- * this function are redundant and can be ignored.
- */
- if (xen_feature(XENFEAT_auto_translated_physmap))
- return 0;
-
- if (unlikely(order > MAX_CONTIG_ORDER))
- return -ENOMEM;
-
- set_xen_guest_handle(exchange.in.extent_start, in_frames);
- set_xen_guest_handle(exchange.out.extent_start, &out_frame);
-
- scrub_pages(vstart, 1 << order);
-
- balloon_lock(flags);
-
- /* 1. Zap current PTEs, remembering MFNs. */
- for (i = 0; i < (1UL<<order); i++) {
- in_frames[i] = pfn_to_mfn((__pa(vstart) >> PAGE_SHIFT) + i);
- MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE),
- __pte_ma(0), 0);
- set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i,
- INVALID_P2M_ENTRY);
- }
- if (HYPERVISOR_multicall_check(cr_mcl, i, NULL))
- BUG();
-
- /* 2. Get a new contiguous memory extent. */
- out_frame = __pa(vstart) >> PAGE_SHIFT;
- rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
- success = (exchange.nr_exchanged == (1UL << order));
- BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
- BUG_ON(success && (rc != 0));
-#if CONFIG_XEN_COMPAT <= 0x030002
- if (unlikely(rc == -ENOSYS)) {
- /* Compatibility when XENMEM_exchange is unsupported. */
- if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
- &exchange.in) != (1UL << order))
- BUG();
- success = (HYPERVISOR_memory_op(XENMEM_populate_physmap,
- &exchange.out) == 1);
- if (!success) {
- /* Couldn't get special memory: fall back to normal. */
- for (i = 0; i < (1UL<<order); i++)
- in_frames[i] = (__pa(vstart)>>PAGE_SHIFT) + i;
- if (HYPERVISOR_memory_op(XENMEM_populate_physmap,
- &exchange.in) != (1UL<<order))
- BUG();
- }
- }
-#endif
-
- /* 3. Map the new extent in place of old pages. */
- for (i = 0; i < (1UL<<order); i++) {
- frame = success ? (out_frame + i) : in_frames[i];
- MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE),
- pfn_pte_ma(frame, PAGE_KERNEL), 0);
- set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame);
- }
-
- cr_mcl[i - 1].args[MULTI_UVMFLAGS_INDEX] = order
- ? UVMF_TLB_FLUSH|UVMF_ALL
- : UVMF_INVLPG|UVMF_ALL;
- if (HYPERVISOR_multicall_check(cr_mcl, i, NULL))
- BUG();
-
- if (success)
- contiguous_bitmap_set(__pa(vstart) >> PAGE_SHIFT,
- 1UL << order);
-
- balloon_unlock(flags);
-
- return success ? 0 : -ENOMEM;
-}
-EXPORT_SYMBOL_GPL(xen_create_contiguous_region);
-
-void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
-{
- unsigned long *out_frames = discontig_frames, in_frame;
- unsigned long frame, i, flags;
- long rc;
- int success;
- struct xen_memory_exchange exchange = {
- .in = {
- .nr_extents = 1,
- .extent_order = order,
- .domid = DOMID_SELF
- },
- .out = {
- .nr_extents = 1UL << order,
- .extent_order = 0,
- .domid = DOMID_SELF
- }
- };
-
- if (xen_feature(XENFEAT_auto_translated_physmap) ||
- !test_bit(__pa(vstart) >> PAGE_SHIFT, contiguous_bitmap))
- return;
-
- if (unlikely(order > MAX_CONTIG_ORDER))
- return;
-
- set_xen_guest_handle(exchange.in.extent_start, &in_frame);
- set_xen_guest_handle(exchange.out.extent_start, out_frames);
-
- scrub_pages(vstart, 1 << order);
-
- balloon_lock(flags);
-
- contiguous_bitmap_clear(__pa(vstart) >> PAGE_SHIFT, 1UL << order);
-
- /* 1. Find start MFN of contiguous extent. */
- in_frame = pfn_to_mfn(__pa(vstart) >> PAGE_SHIFT);
-
- /* 2. Zap current PTEs. */
- for (i = 0; i < (1UL<<order); i++) {
- MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE),
- __pte_ma(0), 0);
- set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i,
- INVALID_P2M_ENTRY);
- out_frames[i] = (__pa(vstart) >> PAGE_SHIFT) + i;
- }
- if (HYPERVISOR_multicall_check(cr_mcl, i, NULL))
- BUG();
-
- /* 3. Do the exchange for non-contiguous MFNs. */
- rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
- success = (exchange.nr_exchanged == 1);
- BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
- BUG_ON(success && (rc != 0));
-#if CONFIG_XEN_COMPAT <= 0x030002
- if (unlikely(rc == -ENOSYS)) {
- /* Compatibility when XENMEM_exchange is unsupported. */
- if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
- &exchange.in) != 1)
- BUG();
- if (HYPERVISOR_memory_op(XENMEM_populate_physmap,
- &exchange.out) != (1UL << order))
- BUG();
- success = 1;
- }
-#endif
-
- /* 4. Map new pages in place of old pages. */
- for (i = 0; i < (1UL<<order); i++) {
- frame = success ? out_frames[i] : (in_frame + i);
- MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE),
- pfn_pte_ma(frame, PAGE_KERNEL), 0);
- set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame);
- }
-
- cr_mcl[i - 1].args[MULTI_UVMFLAGS_INDEX] = order
- ? UVMF_TLB_FLUSH|UVMF_ALL
- : UVMF_INVLPG|UVMF_ALL;
- if (HYPERVISOR_multicall_check(cr_mcl, i, NULL))
- BUG();
-
- balloon_unlock(flags);
-}
-EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
-
-#ifdef __i386__
-int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b)
-{
- __u32 *lp = (__u32 *)((char *)ldt + entry * 8);
- maddr_t mach_lp = arbitrary_virt_to_machine(lp);
- return HYPERVISOR_update_descriptor(
- mach_lp, (u64)entry_a | ((u64)entry_b<<32));
-}
-#endif
diff --git a/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c b/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c
deleted file mode 100644
index 9a04cb6b1e..0000000000
--- a/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c
+++ /dev/null
@@ -1,850 +0,0 @@
-/*
- * linux/arch/i386/mm/init.c
- *
- * Copyright (C) 1995 Linus Torvalds
- *
- * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
- */
-
-#include <linux/module.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/hugetlb.h>
-#include <linux/swap.h>
-#include <linux/smp.h>
-#include <linux/init.h>
-#include <linux/highmem.h>
-#include <linux/pagemap.h>
-#include <linux/poison.h>
-#include <linux/bootmem.h>
-#include <linux/slab.h>
-#include <linux/proc_fs.h>
-#include <linux/efi.h>
-#include <linux/memory_hotplug.h>
-#include <linux/initrd.h>
-#include <linux/cpumask.h>
-#include <linux/dma-mapping.h>
-#include <linux/scatterlist.h>
-
-#include <asm/processor.h>
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/dma.h>
-#include <asm/fixmap.h>
-#include <asm/e820.h>
-#include <asm/apic.h>
-#include <asm/tlb.h>
-#include <asm/tlbflush.h>
-#include <asm/sections.h>
-#include <asm/hypervisor.h>
-#include <asm/swiotlb.h>
-
-extern unsigned long *contiguous_bitmap;
-
-unsigned int __VMALLOC_RESERVE = 128 << 20;
-
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-unsigned long highstart_pfn, highend_pfn;
-
-static int noinline do_test_wp_bit(void);
-
-/*
- * Creates a middle page table and puts a pointer to it in the
- * given global directory entry. This only returns the gd entry
- * in non-PAE compilation mode, since the middle layer is folded.
- */
-static pmd_t * __init one_md_table_init(pgd_t *pgd)
-{
- pud_t *pud;
- pmd_t *pmd_table;
-
-#ifdef CONFIG_X86_PAE
- pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
- make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables);
- set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
- pud = pud_offset(pgd, 0);
- if (pmd_table != pmd_offset(pud, 0))
- BUG();
-#else
- pud = pud_offset(pgd, 0);
- pmd_table = pmd_offset(pud, 0);
-#endif
-
- return pmd_table;
-}
-
-/*
- * Create a page table and place a pointer to it in a middle page
- * directory entry.
- */
-static pte_t * __init one_page_table_init(pmd_t *pmd)
-{
- if (pmd_none(*pmd)) {
- pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
- make_lowmem_page_readonly(page_table,
- XENFEAT_writable_page_tables);
- set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
- if (page_table != pte_offset_kernel(pmd, 0))
- BUG();
-
- return page_table;
- }
-
- return pte_offset_kernel(pmd, 0);
-}
-
-/*
- * This function initializes a certain range of kernel virtual memory
- * with new bootmem page tables, everywhere page tables are missing in
- * the given range.
- */
-
-/*
- * NOTE: The pagetables are allocated contiguous on the physical space
- * so we can cache the place of the first one and move around without
- * checking the pgd every time.
- */
-static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- int pgd_idx, pmd_idx;
- unsigned long vaddr;
-
- vaddr = start;
- pgd_idx = pgd_index(vaddr);
- pmd_idx = pmd_index(vaddr);
- pgd = pgd_base + pgd_idx;
-
- for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
- if (pgd_none(*pgd))
- one_md_table_init(pgd);
- pud = pud_offset(pgd, vaddr);
- pmd = pmd_offset(pud, vaddr);
- for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) {
- if (vaddr < hypervisor_virt_start && pmd_none(*pmd))
- one_page_table_init(pmd);
-
- vaddr += PMD_SIZE;
- }
- pmd_idx = 0;
- }
-}
-
-static inline int is_kernel_text(unsigned long addr)
-{
- if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end)
- return 1;
- return 0;
-}
-
-/*
- * This maps the physical memory to kernel virtual address space, a total
- * of max_low_pfn pages, by creating page tables starting from address
- * PAGE_OFFSET.
- */
-static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
-{
- unsigned long pfn;
- pgd_t *pgd;
- pmd_t *pmd;
- pte_t *pte;
- int pgd_idx, pmd_idx, pte_ofs;
-
- unsigned long max_ram_pfn = xen_start_info->nr_pages;
- if (max_ram_pfn > max_low_pfn)
- max_ram_pfn = max_low_pfn;
-
- pgd_idx = pgd_index(PAGE_OFFSET);
- pgd = pgd_base + pgd_idx;
- pfn = 0;
- pmd_idx = pmd_index(PAGE_OFFSET);
- pte_ofs = pte_index(PAGE_OFFSET);
-
- for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
-#ifdef CONFIG_XEN
- /*
- * Native linux hasn't PAE-paging enabled yet at this
- * point. When running as xen domain we are in PAE
- * mode already, thus we can't simply hook a empty
- * pmd. That would kill the mappings we are currently
- * using ...
- */
- pmd = pmd_offset(pud_offset(pgd, PAGE_OFFSET), PAGE_OFFSET);
-#else
- pmd = one_md_table_init(pgd);
-#endif
- if (pfn >= max_low_pfn)
- continue;
- pmd += pmd_idx;
- for (; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) {
- unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET;
- if (address >= hypervisor_virt_start)
- continue;
-
- /* Map with big pages if possible, otherwise create normal page tables. */
- if (cpu_has_pse) {
- unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1;
-
- if (is_kernel_text(address) || is_kernel_text(address2))
- set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
- else
- set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
- pfn += PTRS_PER_PTE;
- } else {
- pte = one_page_table_init(pmd);
-
- pte += pte_ofs;
- for (; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) {
- /* XEN: Only map initial RAM allocation. */
- if ((pfn >= max_ram_pfn) || pte_present(*pte))
- continue;
- if (is_kernel_text(address))
- set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
- else
- set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
- }
- pte_ofs = 0;
- }
- }
- pmd_idx = 0;
- }
-}
-
-#ifndef CONFIG_XEN
-
-static inline int page_kills_ppro(unsigned long pagenr)
-{
- if (pagenr >= 0x70000 && pagenr <= 0x7003F)
- return 1;
- return 0;
-}
-
-#else
-
-#define page_kills_ppro(p) 0
-
-#endif
-
-extern int is_available_memory(efi_memory_desc_t *);
-
-int page_is_ram(unsigned long pagenr)
-{
- int i;
- unsigned long addr, end;
-
- if (efi_enabled) {
- efi_memory_desc_t *md;
- void *p;
-
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- md = p;
- if (!is_available_memory(md))
- continue;
- addr = (md->phys_addr+PAGE_SIZE-1) >> PAGE_SHIFT;
- end = (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> PAGE_SHIFT;
-
- if ((pagenr >= addr) && (pagenr < end))
- return 1;
- }
- return 0;
- }
-
- for (i = 0; i < e820.nr_map; i++) {
-
- if (e820.map[i].type != E820_RAM) /* not usable memory */
- continue;
- /*
- * !!!FIXME!!! Some BIOSen report areas as RAM that
- * are not. Notably the 640->1Mb area. We need a sanity
- * check here.
- */
- addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
- end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT;
- if ((pagenr >= addr) && (pagenr < end))
- return 1;
- }
- return 0;
-}
-
-#ifdef CONFIG_HIGHMEM
-pte_t *kmap_pte;
-pgprot_t kmap_prot;
-
-#define kmap_get_fixmap_pte(vaddr) \
- pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), vaddr), (vaddr)), (vaddr))
-
-static void __init kmap_init(void)
-{
- unsigned long kmap_vstart;
-
- /* cache the first kmap pte */
- kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
- kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
-
- kmap_prot = PAGE_KERNEL;
-}
-
-static void __init permanent_kmaps_init(pgd_t *pgd_base)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
- unsigned long vaddr;
-
- vaddr = PKMAP_BASE;
- page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
-
- pgd = swapper_pg_dir + pgd_index(vaddr);
- pud = pud_offset(pgd, vaddr);
- pmd = pmd_offset(pud, vaddr);
- pte = pte_offset_kernel(pmd, vaddr);
- pkmap_page_table = pte;
-}
-
-static void __meminit free_new_highpage(struct page *page, int pfn)
-{
- init_page_count(page);
- if (pfn < xen_start_info->nr_pages)
- __free_page(page);
- totalhigh_pages++;
-}
-
-void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
-{
- if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
- ClearPageReserved(page);
- free_new_highpage(page, pfn);
- } else
- SetPageReserved(page);
-}
-
-static int add_one_highpage_hotplug(struct page *page, unsigned long pfn)
-{
- free_new_highpage(page, pfn);
- totalram_pages++;
-#ifdef CONFIG_FLATMEM
- max_mapnr = max(pfn, max_mapnr);
-#endif
- num_physpages++;
- return 0;
-}
-
-/*
- * Not currently handling the NUMA case.
- * Assuming single node and all memory that
- * has been added dynamically that would be
- * onlined here is in HIGHMEM
- */
-void online_page(struct page *page)
-{
- ClearPageReserved(page);
- add_one_highpage_hotplug(page, page_to_pfn(page));
-}
-
-
-#ifdef CONFIG_NUMA
-extern void set_highmem_pages_init(int);
-#else
-static void __init set_highmem_pages_init(int bad_ppro)
-{
- int pfn;
- for (pfn = highstart_pfn; pfn < highend_pfn; pfn++)
- add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
- totalram_pages += totalhigh_pages;
-}
-#endif /* CONFIG_FLATMEM */
-
-#else
-#define kmap_init() do { } while (0)
-#define permanent_kmaps_init(pgd_base) do { } while (0)
-#define set_highmem_pages_init(bad_ppro) do { } while (0)
-#endif /* CONFIG_HIGHMEM */
-
-unsigned long long __PAGE_KERNEL = _PAGE_KERNEL;
-EXPORT_SYMBOL(__PAGE_KERNEL);
-unsigned long long __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
-
-#ifdef CONFIG_NUMA
-extern void __init remap_numa_kva(void);
-#else
-#define remap_numa_kva() do {} while (0)
-#endif
-
-pgd_t *swapper_pg_dir;
-
-static void __init pagetable_init (void)
-{
- unsigned long vaddr;
- pgd_t *pgd_base = (pgd_t *)xen_start_info->pt_base;
-
- swapper_pg_dir = pgd_base;
- init_mm.pgd = pgd_base;
-
- /* Enable PSE if available */
- if (cpu_has_pse) {
- set_in_cr4(X86_CR4_PSE);
- }
-
- /* Enable PGE if available */
- if (cpu_has_pge) {
- set_in_cr4(X86_CR4_PGE);
- __PAGE_KERNEL |= _PAGE_GLOBAL;
- __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;
- }
-
- kernel_physical_mapping_init(pgd_base);
- remap_numa_kva();
-
- /*
- * Fixed mappings, only the page table structure has to be
- * created - mappings will be set by set_fixmap():
- */
- vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
- page_table_range_init(vaddr, hypervisor_virt_start, pgd_base);
-
- permanent_kmaps_init(pgd_base);
-}
-
-#if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP)
-/*
- * Swap suspend & friends need this for resume because things like the intel-agp
- * driver might have split up a kernel 4MB mapping.
- */
-char __nosavedata swsusp_pg_dir[PAGE_SIZE]
- __attribute__ ((aligned (PAGE_SIZE)));
-
-static inline void save_pg_dir(void)
-{
- memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE);
-}
-#else
-static inline void save_pg_dir(void)
-{
-}
-#endif
-
-void zap_low_mappings (void)
-{
- int i;
-
- save_pg_dir();
-
- /*
- * Zap initial low-memory mappings.
- *
- * Note that "pgd_clear()" doesn't do it for
- * us, because pgd_clear() is a no-op on i386.
- */
- for (i = 0; i < USER_PTRS_PER_PGD; i++)
-#if defined(CONFIG_X86_PAE) && !defined(CONFIG_XEN)
- set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
-#else
- set_pgd(swapper_pg_dir+i, __pgd(0));
-#endif
- flush_tlb_all();
-}
-
-static int disable_nx __initdata = 0;
-u64 __supported_pte_mask __read_mostly = ~_PAGE_NX;
-EXPORT_SYMBOL(__supported_pte_mask);
-
-/*
- * noexec = on|off
- *
- * Control non executable mappings.
- *
- * on Enable
- * off Disable
- */
-void __init noexec_setup(const char *str)
-{
- if (!strncmp(str, "on",2) && cpu_has_nx) {
- __supported_pte_mask |= _PAGE_NX;
- disable_nx = 0;
- } else if (!strncmp(str,"off",3)) {
- disable_nx = 1;
- __supported_pte_mask &= ~_PAGE_NX;
- }
-}
-
-int nx_enabled = 0;
-#ifdef CONFIG_X86_PAE
-
-static void __init set_nx(void)
-{
- unsigned int v[4], l, h;
-
- if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
- cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
- if ((v[3] & (1 << 20)) && !disable_nx) {
- rdmsr(MSR_EFER, l, h);
- l |= EFER_NX;
- wrmsr(MSR_EFER, l, h);
- nx_enabled = 1;
- __supported_pte_mask |= _PAGE_NX;
- }
- }
-}
-
-/*
- * Enables/disables executability of a given kernel page and
- * returns the previous setting.
- */
-int __init set_kernel_exec(unsigned long vaddr, int enable)
-{
- pte_t *pte;
- int ret = 1;
-
- if (!nx_enabled)
- goto out;
-
- pte = lookup_address(vaddr);
- BUG_ON(!pte);
-
- if (!pte_exec_kernel(*pte))
- ret = 0;
-
- if (enable)
- pte->pte_high &= ~(1 << (_PAGE_BIT_NX - 32));
- else
- pte->pte_high |= 1 << (_PAGE_BIT_NX - 32);
- __flush_tlb_all();
-out:
- return ret;
-}
-
-#endif
-
-/*
- * paging_init() sets up the page tables - note that the first 8MB are
- * already mapped by head.S.
- *
- * This routines also unmaps the page at virtual kernel address 0, so
- * that we can trap those pesky NULL-reference errors in the kernel.
- */
-void __init paging_init(void)
-{
- int i;
-
-#ifdef CONFIG_X86_PAE
- set_nx();
- if (nx_enabled)
- printk("NX (Execute Disable) protection: active\n");
-#endif
-
- pagetable_init();
-
-#if defined(CONFIG_X86_PAE) && !defined(CONFIG_XEN)
- /*
- * We will bail out later - printk doesn't work right now so
- * the user would just see a hanging kernel.
- * when running as xen domain we are already in PAE mode at
- * this point.
- */
- if (cpu_has_pae)
- set_in_cr4(X86_CR4_PAE);
-#endif
- __flush_tlb_all();
-
- kmap_init();
-
- /* Switch to the real shared_info page, and clear the
- * dummy page. */
- set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
- HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
- memset(empty_zero_page, 0, sizeof(empty_zero_page));
-
- /* Setup mapping of lower 1st MB */
- for (i = 0; i < NR_FIX_ISAMAPS; i++)
- if (is_initial_xendomain())
- set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
- else
- __set_fixmap(FIX_ISAMAP_BEGIN - i,
- virt_to_machine(empty_zero_page),
- PAGE_KERNEL_RO);
-}
-
-/*
- * Test if the WP bit works in supervisor mode. It isn't supported on 386's
- * and also on some strange 486's (NexGen etc.). All 586+'s are OK. This
- * used to involve black magic jumps to work around some nasty CPU bugs,
- * but fortunately the switch to using exceptions got rid of all that.
- */
-
-static void __init test_wp_bit(void)
-{
- printk("Checking if this processor honours the WP bit even in supervisor mode... ");
-
- /* Any page-aligned address will do, the test is non-destructive */
- __set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_READONLY);
- boot_cpu_data.wp_works_ok = do_test_wp_bit();
- clear_fixmap(FIX_WP_TEST);
-
- if (!boot_cpu_data.wp_works_ok) {
- printk("No.\n");
-#ifdef CONFIG_X86_WP_WORKS_OK
- panic("This kernel doesn't support CPU's with broken WP. Recompile it for a 386!");
-#endif
- } else {
- printk("Ok.\n");
- }
-}
-
-static void __init set_max_mapnr_init(void)
-{
-#ifdef CONFIG_HIGHMEM
- num_physpages = highend_pfn;
-#else
- num_physpages = max_low_pfn;
-#endif
-#ifdef CONFIG_FLATMEM
- max_mapnr = num_physpages;
-#endif
-}
-
-static struct kcore_list kcore_mem, kcore_vmalloc;
-
-void __init mem_init(void)
-{
- extern int ppro_with_ram_bug(void);
- int codesize, reservedpages, datasize, initsize;
- int tmp;
- int bad_ppro;
- unsigned long pfn;
-
- contiguous_bitmap = alloc_bootmem_low_pages(
- (max_low_pfn + 2*BITS_PER_LONG) >> 3);
- BUG_ON(!contiguous_bitmap);
- memset(contiguous_bitmap, 0, (max_low_pfn + 2*BITS_PER_LONG) >> 3);
-
-#if defined(CONFIG_SWIOTLB)
- swiotlb_init();
-#endif
-
-#ifdef CONFIG_FLATMEM
- if (!mem_map)
- BUG();
-#endif
-
- bad_ppro = ppro_with_ram_bug();
-
-#ifdef CONFIG_HIGHMEM
- /* check that fixmap and pkmap do not overlap */
- if (PKMAP_BASE+LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) {
- printk(KERN_ERR "fixmap and kmap areas overlap - this will crash\n");
- printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n",
- PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, FIXADDR_START);
- BUG();
- }
-#endif
-
- set_max_mapnr_init();
-
-#ifdef CONFIG_HIGHMEM
- high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
-#else
- high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
-#endif
- printk("vmalloc area: %lx-%lx, maxmem %lx\n",
- VMALLOC_START,VMALLOC_END,MAXMEM);
- BUG_ON(VMALLOC_START > VMALLOC_END);
-
- /* this will put all low memory onto the freelists */
- totalram_pages += free_all_bootmem();
- /* XEN: init and count low-mem pages outside initial allocation. */
- for (pfn = xen_start_info->nr_pages; pfn < max_low_pfn; pfn++) {
- ClearPageReserved(pfn_to_page(pfn));
- init_page_count(pfn_to_page(pfn));
- totalram_pages++;
- }
-
- reservedpages = 0;
- for (tmp = 0; tmp < max_low_pfn; tmp++)
- /*
- * Only count reserved RAM pages
- */
- if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
- reservedpages++;
-
- set_highmem_pages_init(bad_ppro);
-
- codesize = (unsigned long) &_etext - (unsigned long) &_text;
- datasize = (unsigned long) &_edata - (unsigned long) &_etext;
- initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
-
- kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
- kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
- VMALLOC_END-VMALLOC_START);
-
- printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n",
- (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
- num_physpages << (PAGE_SHIFT-10),
- codesize >> 10,
- reservedpages << (PAGE_SHIFT-10),
- datasize >> 10,
- initsize >> 10,
- (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
- );
-
-#ifdef CONFIG_X86_PAE
- if (!cpu_has_pae)
- panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
-#endif
- if (boot_cpu_data.wp_works_ok < 0)
- test_wp_bit();
-
- /*
- * Subtle. SMP is doing it's boot stuff late (because it has to
- * fork idle threads) - but it also needs low mappings for the
- * protected-mode entry to work. We zap these entries only after
- * the WP-bit has been tested.
- */
-#ifndef CONFIG_SMP
- zap_low_mappings();
-#endif
-
- set_bit(PG_pinned, &virt_to_page(init_mm.pgd)->flags);
-}
-
-/*
- * this is for the non-NUMA, single node SMP system case.
- * Specifically, in the case of x86, we will always add
- * memory to the highmem for now.
- */
-#ifdef CONFIG_MEMORY_HOTPLUG
-#ifndef CONFIG_NEED_MULTIPLE_NODES
-int arch_add_memory(int nid, u64 start, u64 size)
-{
- struct pglist_data *pgdata = &contig_page_data;
- struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
- unsigned long start_pfn = start >> PAGE_SHIFT;
- unsigned long nr_pages = size >> PAGE_SHIFT;
-
- return __add_pages(zone, start_pfn, nr_pages);
-}
-
-int remove_memory(u64 start, u64 size)
-{
- return -EINVAL;
-}
-#endif
-#endif
-
-kmem_cache_t *pgd_cache;
-kmem_cache_t *pmd_cache;
-
-void __init pgtable_cache_init(void)
-{
- if (PTRS_PER_PMD > 1) {
- pmd_cache = kmem_cache_create("pmd",
- PTRS_PER_PMD*sizeof(pmd_t),
- PTRS_PER_PMD*sizeof(pmd_t),
- 0,
- pmd_ctor,
- NULL);
- if (!pmd_cache)
- panic("pgtable_cache_init(): cannot create pmd cache");
- }
- pgd_cache = kmem_cache_create("pgd",
-#ifndef CONFIG_XEN
- PTRS_PER_PGD*sizeof(pgd_t),
- PTRS_PER_PGD*sizeof(pgd_t),
-#else
- PAGE_SIZE,
- PAGE_SIZE,
-#endif
- 0,
- pgd_ctor,
- PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
- if (!pgd_cache)
- panic("pgtable_cache_init(): Cannot create pgd cache");
-}
-
-/*
- * This function cannot be __init, since exceptions don't work in that
- * section. Put this after the callers, so that it cannot be inlined.
- */
-static int noinline do_test_wp_bit(void)
-{
- char tmp_reg;
- int flag;
-
- __asm__ __volatile__(
- " movb %0,%1 \n"
- "1: movb %1,%0 \n"
- " xorl %2,%2 \n"
- "2: \n"
- ".section __ex_table,\"a\"\n"
- " .align 4 \n"
- " .long 1b,2b \n"
- ".previous \n"
- :"=m" (*(char *)fix_to_virt(FIX_WP_TEST)),
- "=q" (tmp_reg),
- "=r" (flag)
- :"2" (1)
- :"memory");
-
- return flag;
-}
-
-#ifdef CONFIG_DEBUG_RODATA
-
-void mark_rodata_ro(void)
-{
- unsigned long addr = (unsigned long)__start_rodata;
-
- for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE)
- change_page_attr(virt_to_page(addr), 1, PAGE_KERNEL_RO);
-
- printk("Write protecting the kernel read-only data: %uk\n",
- (__end_rodata - __start_rodata) >> 10);
-
- /*
- * change_page_attr() requires a global_flush_tlb() call after it.
- * We do this after the printk so that if something went wrong in the
- * change, the printk gets out at least to give a better debug hint
- * of who is the culprit.
- */
- global_flush_tlb();
-}
-#endif
-
-void free_init_pages(char *what, unsigned long begin, unsigned long end)
-{
- unsigned long addr;
-
- for (addr = begin; addr < end; addr += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(addr));
- init_page_count(virt_to_page(addr));
- memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
- free_page(addr);
- totalram_pages++;
- }
- printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
-}
-
-void free_initmem(void)
-{
- free_init_pages("unused kernel memory",
- (unsigned long)(&__init_begin),
- (unsigned long)(&__init_end));
-}
-
-#ifdef CONFIG_BLK_DEV_INITRD
-void free_initrd_mem(unsigned long start, unsigned long end)
-{
- free_init_pages("initrd memory", start, end);
-}
-#endif
-
diff --git a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c
deleted file mode 100644
index 6d6edd203a..0000000000
--- a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c
+++ /dev/null
@@ -1,443 +0,0 @@
-/*
- * arch/i386/mm/ioremap.c
- *
- * Re-map IO memory to kernel address space so that we can access it.
- * This is needed for high PCI addresses that aren't mapped in the
- * 640k-1MB IO memory area on PC's
- *
- * (C) Copyright 1995 1996 Linus Torvalds
- */
-
-#include <linux/vmalloc.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <asm/io.h>
-#include <asm/fixmap.h>
-#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
-
-#define ISA_START_ADDRESS 0x0
-#define ISA_END_ADDRESS 0x100000
-
-static int direct_remap_area_pte_fn(pte_t *pte,
- struct page *pmd_page,
- unsigned long address,
- void *data)
-{
- mmu_update_t **v = (mmu_update_t **)data;
-
- BUG_ON(!pte_none(*pte));
-
- (*v)->ptr = ((u64)pfn_to_mfn(page_to_pfn(pmd_page)) <<
- PAGE_SHIFT) | ((unsigned long)pte & ~PAGE_MASK);
- (*v)++;
-
- return 0;
-}
-
-static int __direct_remap_pfn_range(struct mm_struct *mm,
- unsigned long address,
- unsigned long mfn,
- unsigned long size,
- pgprot_t prot,
- domid_t domid)
-{
- int rc;
- unsigned long i, start_address;
- mmu_update_t *u, *v, *w;
-
- u = v = w = (mmu_update_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
- if (u == NULL)
- return -ENOMEM;
-
- start_address = address;
-
- flush_cache_all();
-
- for (i = 0; i < size; i += PAGE_SIZE) {
- if ((v - u) == (PAGE_SIZE / sizeof(mmu_update_t))) {
- /* Flush a full batch after filling in the PTE ptrs. */
- rc = apply_to_page_range(mm, start_address,
- address - start_address,
- direct_remap_area_pte_fn, &w);
- if (rc)
- goto out;
- rc = -EFAULT;
- if (HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0)
- goto out;
- v = w = u;
- start_address = address;
- }
-
- /*
- * Fill in the machine address: PTE ptr is done later by
- * __direct_remap_area_pages().
- */
- v->val = __pte_val(pfn_pte_ma(mfn, prot));
-
- mfn++;
- address += PAGE_SIZE;
- v++;
- }
-
- if (v != u) {
- /* Final batch. */
- rc = apply_to_page_range(mm, start_address,
- address - start_address,
- direct_remap_area_pte_fn, &w);
- if (rc)
- goto out;
- rc = -EFAULT;
- if (unlikely(HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0))
- goto out;
- }
-
- rc = 0;
-
- out:
- flush_tlb_all();
-
- free_page((unsigned long)u);
-
- return rc;
-}
-
-int direct_remap_pfn_range(struct vm_area_struct *vma,
- unsigned long address,
- unsigned long mfn,
- unsigned long size,
- pgprot_t prot,
- domid_t domid)
-{
- if (xen_feature(XENFEAT_auto_translated_physmap))
- return remap_pfn_range(vma, address, mfn, size, prot);
-
- if (domid == DOMID_SELF)
- return -EINVAL;
-
- vma->vm_flags |= VM_IO | VM_RESERVED;
-
- vma->vm_mm->context.has_foreign_mappings = 1;
-
- return __direct_remap_pfn_range(
- vma->vm_mm, address, mfn, size, prot, domid);
-}
-EXPORT_SYMBOL(direct_remap_pfn_range);
-
-int direct_kernel_remap_pfn_range(unsigned long address,
- unsigned long mfn,
- unsigned long size,
- pgprot_t prot,
- domid_t domid)
-{
- return __direct_remap_pfn_range(
- &init_mm, address, mfn, size, prot, domid);
-}
-EXPORT_SYMBOL(direct_kernel_remap_pfn_range);
-
-static int lookup_pte_fn(
- pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
-{
- uint64_t *ptep = (uint64_t *)data;
- if (ptep)
- *ptep = ((uint64_t)pfn_to_mfn(page_to_pfn(pmd_page)) <<
- PAGE_SHIFT) | ((unsigned long)pte & ~PAGE_MASK);
- return 0;
-}
-
-int create_lookup_pte_addr(struct mm_struct *mm,
- unsigned long address,
- uint64_t *ptep)
-{
- return apply_to_page_range(mm, address, PAGE_SIZE,
- lookup_pte_fn, ptep);
-}
-
-EXPORT_SYMBOL(create_lookup_pte_addr);
-
-static int noop_fn(
- pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
-{
- return 0;
-}
-
-int touch_pte_range(struct mm_struct *mm,
- unsigned long address,
- unsigned long size)
-{
- return apply_to_page_range(mm, address, size, noop_fn, NULL);
-}
-
-EXPORT_SYMBOL(touch_pte_range);
-
-/*
- * Does @address reside within a non-highmem page that is local to this virtual
- * machine (i.e., not an I/O page, nor a memory page belonging to another VM).
- * See the comment that accompanies mfn_to_local_pfn() in page.h to understand
- * why this works.
- */
-static inline int is_local_lowmem(unsigned long address)
-{
- extern unsigned long max_low_pfn;
- return (mfn_to_local_pfn(address >> PAGE_SHIFT) < max_low_pfn);
-}
-
-/*
- * Generic mapping function (not visible outside):
- */
-
-/*
- * Remap an arbitrary physical address space into the kernel virtual
- * address space. Needed when the kernel wants to access high addresses
- * directly.
- *
- * NOTE! We need to allow non-page-aligned mappings too: we will obviously
- * have to convert them into an offset in a page-aligned mapping, but the
- * caller shouldn't need to know that small detail.
- */
-void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags)
-{
- void __iomem * addr;
- struct vm_struct * area;
- unsigned long offset, last_addr;
- domid_t domid = DOMID_IO;
-
- /* Don't allow wraparound or zero size */
- last_addr = phys_addr + size - 1;
- if (!size || last_addr < phys_addr)
- return NULL;
-
- /*
- * Don't remap the low PCI/ISA area, it's always mapped..
- */
- if (is_initial_xendomain() &&
- phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
- return (void __iomem *) isa_bus_to_virt(phys_addr);
-
- /*
- * Don't allow anybody to remap normal RAM that we're using..
- */
- if (is_local_lowmem(phys_addr)) {
- char *t_addr, *t_end;
- struct page *page;
-
- t_addr = bus_to_virt(phys_addr);
- t_end = t_addr + (size - 1);
-
- for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++)
- if(!PageReserved(page))
- return NULL;
-
- domid = DOMID_SELF;
- }
-
- /*
- * Mappings have to be page-aligned
- */
- offset = phys_addr & ~PAGE_MASK;
- phys_addr &= PAGE_MASK;
- size = PAGE_ALIGN(last_addr+1) - phys_addr;
-
- /*
- * Ok, go for it..
- */
- area = get_vm_area(size, VM_IOREMAP | (flags << 20));
- if (!area)
- return NULL;
- area->phys_addr = phys_addr;
- addr = (void __iomem *) area->addr;
- flags |= _KERNPG_TABLE;
- if (__direct_remap_pfn_range(&init_mm, (unsigned long)addr,
- phys_addr>>PAGE_SHIFT,
- size, __pgprot(flags), domid)) {
- vunmap((void __force *) addr);
- return NULL;
- }
- return (void __iomem *) (offset + (char __iomem *)addr);
-}
-EXPORT_SYMBOL(__ioremap);
-
-/**
- * ioremap_nocache - map bus memory into CPU space
- * @offset: bus address of the memory
- * @size: size of the resource to map
- *
- * ioremap_nocache performs a platform specific sequence of operations to
- * make bus memory CPU accessible via the readb/readw/readl/writeb/
- * writew/writel functions and the other mmio helpers. The returned
- * address is not guaranteed to be usable directly as a virtual
- * address.
- *
- * This version of ioremap ensures that the memory is marked uncachable
- * on the CPU as well as honouring existing caching rules from things like
- * the PCI bus. Note that there are other caches and buffers on many
- * busses. In particular driver authors should read up on PCI writes
- *
- * It's useful if some control registers are in such an area and
- * write combining or read caching is not desirable:
- *
- * Must be freed with iounmap.
- */
-
-void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
-{
- unsigned long last_addr;
- void __iomem *p = __ioremap(phys_addr, size, _PAGE_PCD);
- if (!p)
- return p;
-
- /* Guaranteed to be > phys_addr, as per __ioremap() */
- last_addr = phys_addr + size - 1;
-
- if (is_local_lowmem(last_addr)) {
- struct page *ppage = virt_to_page(bus_to_virt(phys_addr));
- unsigned long npages;
-
- phys_addr &= PAGE_MASK;
-
- /* This might overflow and become zero.. */
- last_addr = PAGE_ALIGN(last_addr);
-
- /* .. but that's ok, because modulo-2**n arithmetic will make
- * the page-aligned "last - first" come out right.
- */
- npages = (last_addr - phys_addr) >> PAGE_SHIFT;
-
- if (change_page_attr(ppage, npages, PAGE_KERNEL_NOCACHE) < 0) {
- iounmap(p);
- p = NULL;
- }
- global_flush_tlb();
- }
-
- return p;
-}
-EXPORT_SYMBOL(ioremap_nocache);
-
-/**
- * iounmap - Free a IO remapping
- * @addr: virtual address from ioremap_*
- *
- * Caller must ensure there is only one unmapping for the same pointer.
- */
-void iounmap(volatile void __iomem *addr)
-{
- struct vm_struct *p, *o;
-
- if ((void __force *)addr <= high_memory)
- return;
-
- /*
- * __ioremap special-cases the PCI/ISA range by not instantiating a
- * vm_area and by simply returning an address into the kernel mapping
- * of ISA space. So handle that here.
- */
- if ((unsigned long) addr >= fix_to_virt(FIX_ISAMAP_BEGIN))
- return;
-
- addr = (volatile void __iomem *)(PAGE_MASK & (unsigned long __force)addr);
-
- /* Use the vm area unlocked, assuming the caller
- ensures there isn't another iounmap for the same address
- in parallel. Reuse of the virtual address is prevented by
- leaving it in the global lists until we're done with it.
- cpa takes care of the direct mappings. */
- read_lock(&vmlist_lock);
- for (p = vmlist; p; p = p->next) {
- if (p->addr == addr)
- break;
- }
- read_unlock(&vmlist_lock);
-
- if (!p) {
- printk("iounmap: bad address %p\n", addr);
- dump_stack();
- return;
- }
-
- /* Reset the direct mapping. Can block */
- if ((p->flags >> 20) && is_local_lowmem(p->phys_addr)) {
- /* p->size includes the guard page, but cpa doesn't like that */
- change_page_attr(virt_to_page(bus_to_virt(p->phys_addr)),
- (p->size - PAGE_SIZE) >> PAGE_SHIFT,
- PAGE_KERNEL);
- global_flush_tlb();
- }
-
- /* Finally remove it */
- o = remove_vm_area((void *)addr);
- BUG_ON(p != o || o == NULL);
- kfree(p);
-}
-EXPORT_SYMBOL(iounmap);
-
-void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
-{
- unsigned long offset, last_addr;
- unsigned int nrpages;
- enum fixed_addresses idx;
-
- /* Don't allow wraparound or zero size */
- last_addr = phys_addr + size - 1;
- if (!size || last_addr < phys_addr)
- return NULL;
-
- /*
- * Don't remap the low PCI/ISA area, it's always mapped..
- */
- if (is_initial_xendomain() &&
- phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
- return isa_bus_to_virt(phys_addr);
-
- /*
- * Mappings have to be page-aligned
- */
- offset = phys_addr & ~PAGE_MASK;
- phys_addr &= PAGE_MASK;
- size = PAGE_ALIGN(last_addr) - phys_addr;
-
- /*
- * Mappings have to fit in the FIX_BTMAP area.
- */
- nrpages = size >> PAGE_SHIFT;
- if (nrpages > NR_FIX_BTMAPS)
- return NULL;
-
- /*
- * Ok, go for it..
- */
- idx = FIX_BTMAP_BEGIN;
- while (nrpages > 0) {
- set_fixmap(idx, phys_addr);
- phys_addr += PAGE_SIZE;
- --idx;
- --nrpages;
- }
- return (void*) (offset + fix_to_virt(FIX_BTMAP_BEGIN));
-}
-
-void __init bt_iounmap(void *addr, unsigned long size)
-{
- unsigned long virt_addr;
- unsigned long offset;
- unsigned int nrpages;
- enum fixed_addresses idx;
-
- virt_addr = (unsigned long)addr;
- if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN))
- return;
- if (virt_addr >= fix_to_virt(FIX_ISAMAP_BEGIN))
- return;
- offset = virt_addr & ~PAGE_MASK;
- nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT;
-
- idx = FIX_BTMAP_BEGIN;
- while (nrpages > 0) {
- clear_fixmap(idx);
- --idx;
- --nrpages;
- }
-}
diff --git a/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c b/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c
deleted file mode 100644
index df20e591c1..0000000000
--- a/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c
+++ /dev/null
@@ -1,727 +0,0 @@
-/*
- * linux/arch/i386/mm/pgtable.c
- */
-
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/smp.h>
-#include <linux/highmem.h>
-#include <linux/slab.h>
-#include <linux/pagemap.h>
-#include <linux/spinlock.h>
-#include <linux/module.h>
-
-#include <asm/system.h>
-#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
-#include <asm/fixmap.h>
-#include <asm/e820.h>
-#include <asm/tlb.h>
-#include <asm/tlbflush.h>
-#include <asm/io.h>
-#include <asm/mmu_context.h>
-
-#include <xen/features.h>
-#include <asm/hypervisor.h>
-
-static void pgd_test_and_unpin(pgd_t *pgd);
-
-void show_mem(void)
-{
- int total = 0, reserved = 0;
- int shared = 0, cached = 0;
- int highmem = 0;
- struct page *page;
- pg_data_t *pgdat;
- unsigned long i;
- unsigned long flags;
-
- printk(KERN_INFO "Mem-info:\n");
- show_free_areas();
- printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
- for_each_online_pgdat(pgdat) {
- pgdat_resize_lock(pgdat, &flags);
- for (i = 0; i < pgdat->node_spanned_pages; ++i) {
- page = pgdat_page_nr(pgdat, i);
- total++;
- if (PageHighMem(page))
- highmem++;
- if (PageReserved(page))
- reserved++;
- else if (PageSwapCache(page))
- cached++;
- else if (page_count(page))
- shared += page_count(page) - 1;
- }
- pgdat_resize_unlock(pgdat, &flags);
- }
- printk(KERN_INFO "%d pages of RAM\n", total);
- printk(KERN_INFO "%d pages of HIGHMEM\n", highmem);
- printk(KERN_INFO "%d reserved pages\n", reserved);
- printk(KERN_INFO "%d pages shared\n", shared);
- printk(KERN_INFO "%d pages swap cached\n", cached);
-
- printk(KERN_INFO "%lu pages dirty\n", global_page_state(NR_FILE_DIRTY));
- printk(KERN_INFO "%lu pages writeback\n",
- global_page_state(NR_WRITEBACK));
- printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED));
- printk(KERN_INFO "%lu pages slab\n", global_page_state(NR_SLAB));
- printk(KERN_INFO "%lu pages pagetables\n",
- global_page_state(NR_PAGETABLE));
-}
-
-/*
- * Associate a virtual page frame with a given physical page frame
- * and protection flags for that frame.
- */
-static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
-
- pgd = swapper_pg_dir + pgd_index(vaddr);
- if (pgd_none(*pgd)) {
- BUG();
- return;
- }
- pud = pud_offset(pgd, vaddr);
- if (pud_none(*pud)) {
- BUG();
- return;
- }
- pmd = pmd_offset(pud, vaddr);
- if (pmd_none(*pmd)) {
- BUG();
- return;
- }
- pte = pte_offset_kernel(pmd, vaddr);
- if (pgprot_val(flags))
- /* <pfn,flags> stored as-is, to permit clearing entries */
- set_pte(pte, pfn_pte(pfn, flags));
- else
- pte_clear(&init_mm, vaddr, pte);
-
- /*
- * It's enough to flush this one mapping.
- * (PGE mappings get flushed as well)
- */
- __flush_tlb_one(vaddr);
-}
-
-/*
- * Associate a virtual page frame with a given physical page frame
- * and protection flags for that frame.
- */
-static void set_pte_pfn_ma(unsigned long vaddr, unsigned long pfn,
- pgprot_t flags)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
-
- pgd = swapper_pg_dir + pgd_index(vaddr);
- if (pgd_none(*pgd)) {
- BUG();
- return;
- }
- pud = pud_offset(pgd, vaddr);
- if (pud_none(*pud)) {
- BUG();
- return;
- }
- pmd = pmd_offset(pud, vaddr);
- if (pmd_none(*pmd)) {
- BUG();
- return;
- }
- pte = pte_offset_kernel(pmd, vaddr);
- if (pgprot_val(flags))
- /* <pfn,flags> stored as-is, to permit clearing entries */
- set_pte(pte, pfn_pte_ma(pfn, flags));
- else
- pte_clear(&init_mm, vaddr, pte);
-
- /*
- * It's enough to flush this one mapping.
- * (PGE mappings get flushed as well)
- */
- __flush_tlb_one(vaddr);
-}
-
-/*
- * Associate a large virtual page frame with a given physical page frame
- * and protection flags for that frame. pfn is for the base of the page,
- * vaddr is what the page gets mapped to - both must be properly aligned.
- * The pmd must already be instantiated. Assumes PAE mode.
- */
-void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
-
- if (vaddr & (PMD_SIZE-1)) { /* vaddr is misaligned */
- printk(KERN_WARNING "set_pmd_pfn: vaddr misaligned\n");
- return; /* BUG(); */
- }
- if (pfn & (PTRS_PER_PTE-1)) { /* pfn is misaligned */
- printk(KERN_WARNING "set_pmd_pfn: pfn misaligned\n");
- return; /* BUG(); */
- }
- pgd = swapper_pg_dir + pgd_index(vaddr);
- if (pgd_none(*pgd)) {
- printk(KERN_WARNING "set_pmd_pfn: pgd_none\n");
- return; /* BUG(); */
- }
- pud = pud_offset(pgd, vaddr);
- pmd = pmd_offset(pud, vaddr);
- set_pmd(pmd, pfn_pmd(pfn, flags));
- /*
- * It's enough to flush this one mapping.
- * (PGE mappings get flushed as well)
- */
- __flush_tlb_one(vaddr);
-}
-
-static int nr_fixmaps = 0;
-unsigned long hypervisor_virt_start = HYPERVISOR_VIRT_START;
-unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - 2 * PAGE_SIZE);
-EXPORT_SYMBOL(__FIXADDR_TOP);
-
-void __init set_fixaddr_top(unsigned long top)
-{
- BUG_ON(nr_fixmaps > 0);
- hypervisor_virt_start = top;
- __FIXADDR_TOP = hypervisor_virt_start - 2 * PAGE_SIZE;
-}
-
-void __set_fixmap (enum fixed_addresses idx, maddr_t phys, pgprot_t flags)
-{
- unsigned long address = __fix_to_virt(idx);
-
- if (idx >= __end_of_fixed_addresses) {
- BUG();
- return;
- }
- switch (idx) {
- case FIX_WP_TEST:
-#ifdef CONFIG_X86_F00F_BUG
- case FIX_F00F_IDT:
-#endif
- case FIX_VDSO:
- set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
- break;
- default:
- set_pte_pfn_ma(address, phys >> PAGE_SHIFT, flags);
- break;
- }
- nr_fixmaps++;
-}
-
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
-{
- pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
- if (pte)
- make_lowmem_page_readonly(pte, XENFEAT_writable_page_tables);
- return pte;
-}
-
-struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
-{
- struct page *pte;
-
-#ifdef CONFIG_HIGHPTE
- pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
-#else
- pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
-#endif
- if (pte) {
- SetPageForeign(pte, pte_free);
- init_page_count(pte);
- }
- return pte;
-}
-
-void pte_free(struct page *pte)
-{
- unsigned long pfn = page_to_pfn(pte);
-
- if (!PageHighMem(pte)) {
- unsigned long va = (unsigned long)__va(pfn << PAGE_SHIFT);
-
- if (!pte_write(*virt_to_ptep(va)))
- if (HYPERVISOR_update_va_mapping(
- va, pfn_pte(pfn, PAGE_KERNEL), 0))
- BUG();
- } else
- clear_bit(PG_pinned, &pte->flags);
-
- ClearPageForeign(pte);
- init_page_count(pte);
-
- __free_page(pte);
-}
-
-void pmd_ctor(void *pmd, kmem_cache_t *cache, unsigned long flags)
-{
- memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
-}
-
-/*
- * List of all pgd's needed for non-PAE so it can invalidate entries
- * in both cached and uncached pgd's; not needed for PAE since the
- * kernel pmd is shared. If PAE were not to share the pmd a similar
- * tactic would be needed. This is essentially codepath-based locking
- * against pageattr.c; it is the unique case in which a valid change
- * of kernel pagetables can't be lazily synchronized by vmalloc faults.
- * vmalloc faults work because attached pagetables are never freed.
- * The locking scheme was chosen on the basis of manfred's
- * recommendations and having no core impact whatsoever.
- * -- wli
- */
-DEFINE_SPINLOCK(pgd_lock);
-struct page *pgd_list;
-
-static inline void pgd_list_add(pgd_t *pgd)
-{
- struct page *page = virt_to_page(pgd);
- page->index = (unsigned long)pgd_list;
- if (pgd_list)
- set_page_private(pgd_list, (unsigned long)&page->index);
- pgd_list = page;
- set_page_private(page, (unsigned long)&pgd_list);
-}
-
-static inline void pgd_list_del(pgd_t *pgd)
-{
- struct page *next, **pprev, *page = virt_to_page(pgd);
- next = (struct page *)page->index;
- pprev = (struct page **)page_private(page);
- *pprev = next;
- if (next)
- set_page_private(next, (unsigned long)pprev);
-}
-
-void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
-{
- unsigned long flags;
-
- if (PTRS_PER_PMD > 1) {
- if (HAVE_SHARED_KERNEL_PMD)
- clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
- swapper_pg_dir + USER_PTRS_PER_PGD,
- KERNEL_PGD_PTRS);
- } else {
- spin_lock_irqsave(&pgd_lock, flags);
- clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
- swapper_pg_dir + USER_PTRS_PER_PGD,
- KERNEL_PGD_PTRS);
- memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
- pgd_list_add(pgd);
- spin_unlock_irqrestore(&pgd_lock, flags);
- }
-}
-
-/* never called when PTRS_PER_PMD > 1 */
-void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
-{
- unsigned long flags; /* can be called from interrupt context */
-
- spin_lock_irqsave(&pgd_lock, flags);
- pgd_list_del(pgd);
- spin_unlock_irqrestore(&pgd_lock, flags);
-
- pgd_test_and_unpin(pgd);
-}
-
-pgd_t *pgd_alloc(struct mm_struct *mm)
-{
- int i;
- pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
- pmd_t **pmd;
- unsigned long flags;
-
- pgd_test_and_unpin(pgd);
-
- if (PTRS_PER_PMD == 1 || !pgd)
- return pgd;
-
- if (HAVE_SHARED_KERNEL_PMD) {
- for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
- pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
- if (!pmd)
- goto out_oom;
- set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
- }
- return pgd;
- }
-
- /*
- * We can race save/restore (if we sleep during a GFP_KERNEL memory
- * allocation). We therefore store virtual addresses of pmds as they
- * do not change across save/restore, and poke the machine addresses
- * into the pgdir under the pgd_lock.
- */
- pmd = kmalloc(PTRS_PER_PGD * sizeof(pmd_t *), GFP_KERNEL);
- if (!pmd) {
- kmem_cache_free(pgd_cache, pgd);
- return NULL;
- }
-
- /* Allocate pmds, remember virtual addresses. */
- for (i = 0; i < PTRS_PER_PGD; ++i) {
- pmd[i] = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
- if (!pmd[i])
- goto out_oom;
- }
-
- spin_lock_irqsave(&pgd_lock, flags);
-
- /* Protect against save/restore: move below 4GB under pgd_lock. */
- if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) {
- int rc = xen_create_contiguous_region(
- (unsigned long)pgd, 0, 32);
- if (rc) {
- spin_unlock_irqrestore(&pgd_lock, flags);
- goto out_oom;
- }
- }
-
- /* Copy kernel pmd contents and write-protect the new pmds. */
- for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
- unsigned long v = (unsigned long)i << PGDIR_SHIFT;
- pgd_t *kpgd = pgd_offset_k(v);
- pud_t *kpud = pud_offset(kpgd, v);
- pmd_t *kpmd = pmd_offset(kpud, v);
- memcpy(pmd[i], kpmd, PAGE_SIZE);
- make_lowmem_page_readonly(
- pmd[i], XENFEAT_writable_page_tables);
- }
-
- /* It is safe to poke machine addresses of pmds under the pmd_lock. */
- for (i = 0; i < PTRS_PER_PGD; i++)
- set_pgd(&pgd[i], __pgd(1 + __pa(pmd[i])));
-
- /* Ensure this pgd gets picked up and pinned on save/restore. */
- pgd_list_add(pgd);
-
- spin_unlock_irqrestore(&pgd_lock, flags);
-
- kfree(pmd);
-
- return pgd;
-
-out_oom:
- if (HAVE_SHARED_KERNEL_PMD) {
- for (i--; i >= 0; i--)
- kmem_cache_free(pmd_cache,
- (void *)__va(pgd_val(pgd[i])-1));
- } else {
- for (i--; i >= 0; i--)
- kmem_cache_free(pmd_cache, pmd[i]);
- kfree(pmd);
- }
- kmem_cache_free(pgd_cache, pgd);
- return NULL;
-}
-
-void pgd_free(pgd_t *pgd)
-{
- int i;
-
- /*
- * After this the pgd should not be pinned for the duration of this
- * function's execution. We should never sleep and thus never race:
- * 1. User pmds will not become write-protected under our feet due
- * to a concurrent mm_pin_all().
- * 2. The machine addresses in PGD entries will not become invalid
- * due to a concurrent save/restore.
- */
- pgd_test_and_unpin(pgd);
-
- /* in the PAE case user pgd entries are overwritten before usage */
- if (PTRS_PER_PMD > 1) {
- for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
- pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
- kmem_cache_free(pmd_cache, pmd);
- }
-
- if (!HAVE_SHARED_KERNEL_PMD) {
- unsigned long flags;
- spin_lock_irqsave(&pgd_lock, flags);
- pgd_list_del(pgd);
- spin_unlock_irqrestore(&pgd_lock, flags);
-
- for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
- pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
- make_lowmem_page_writable(
- pmd, XENFEAT_writable_page_tables);
- memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
- kmem_cache_free(pmd_cache, pmd);
- }
-
- if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
- xen_destroy_contiguous_region(
- (unsigned long)pgd, 0);
- }
- }
-
- /* in the non-PAE case, free_pgtables() clears user pgd entries */
- kmem_cache_free(pgd_cache, pgd);
-}
-
-void make_lowmem_page_readonly(void *va, unsigned int feature)
-{
- pte_t *pte;
- int rc;
-
- if (xen_feature(feature))
- return;
-
- pte = virt_to_ptep(va);
- rc = HYPERVISOR_update_va_mapping(
- (unsigned long)va, pte_wrprotect(*pte), 0);
- BUG_ON(rc);
-}
-
-void make_lowmem_page_writable(void *va, unsigned int feature)
-{
- pte_t *pte;
- int rc;
-
- if (xen_feature(feature))
- return;
-
- pte = virt_to_ptep(va);
- rc = HYPERVISOR_update_va_mapping(
- (unsigned long)va, pte_mkwrite(*pte), 0);
- BUG_ON(rc);
-}
-
-void make_page_readonly(void *va, unsigned int feature)
-{
- pte_t *pte;
- int rc;
-
- if (xen_feature(feature))
- return;
-
- pte = virt_to_ptep(va);
- rc = HYPERVISOR_update_va_mapping(
- (unsigned long)va, pte_wrprotect(*pte), 0);
- if (rc) /* fallback? */
- xen_l1_entry_update(pte, pte_wrprotect(*pte));
- if ((unsigned long)va >= (unsigned long)high_memory) {
- unsigned long pfn = pte_pfn(*pte);
-#ifdef CONFIG_HIGHMEM
- if (pfn >= highstart_pfn)
- kmap_flush_unused(); /* flush stale writable kmaps */
- else
-#endif
- make_lowmem_page_readonly(
- phys_to_virt(pfn << PAGE_SHIFT), feature);
- }
-}
-
-void make_page_writable(void *va, unsigned int feature)
-{
- pte_t *pte;
- int rc;
-
- if (xen_feature(feature))
- return;
-
- pte = virt_to_ptep(va);
- rc = HYPERVISOR_update_va_mapping(
- (unsigned long)va, pte_mkwrite(*pte), 0);
- if (rc) /* fallback? */
- xen_l1_entry_update(pte, pte_mkwrite(*pte));
- if ((unsigned long)va >= (unsigned long)high_memory) {
- unsigned long pfn = pte_pfn(*pte);
-#ifdef CONFIG_HIGHMEM
- if (pfn < highstart_pfn)
-#endif
- make_lowmem_page_writable(
- phys_to_virt(pfn << PAGE_SHIFT), feature);
- }
-}
-
-void make_pages_readonly(void *va, unsigned int nr, unsigned int feature)
-{
- if (xen_feature(feature))
- return;
-
- while (nr-- != 0) {
- make_page_readonly(va, feature);
- va = (void *)((unsigned long)va + PAGE_SIZE);
- }
-}
-
-void make_pages_writable(void *va, unsigned int nr, unsigned int feature)
-{
- if (xen_feature(feature))
- return;
-
- while (nr-- != 0) {
- make_page_writable(va, feature);
- va = (void *)((unsigned long)va + PAGE_SIZE);
- }
-}
-
-static inline void pgd_walk_set_prot(struct page *page, pgprot_t flags)
-{
- unsigned long pfn = page_to_pfn(page);
- int rc;
-
- if (PageHighMem(page)) {
- if (pgprot_val(flags) & _PAGE_RW)
- clear_bit(PG_pinned, &page->flags);
- else
- set_bit(PG_pinned, &page->flags);
- } else {
- rc = HYPERVISOR_update_va_mapping(
- (unsigned long)__va(pfn << PAGE_SHIFT),
- pfn_pte(pfn, flags), 0);
- if (rc)
- BUG();
- }
-}
-
-static void pgd_walk(pgd_t *pgd_base, pgprot_t flags)
-{
- pgd_t *pgd = pgd_base;
- pud_t *pud;
- pmd_t *pmd;
- int g, u, m, rc;
-
- if (xen_feature(XENFEAT_auto_translated_physmap))
- return;
-
- for (g = 0; g < USER_PTRS_PER_PGD; g++, pgd++) {
- if (pgd_none(*pgd))
- continue;
- pud = pud_offset(pgd, 0);
- if (PTRS_PER_PUD > 1) /* not folded */
- pgd_walk_set_prot(virt_to_page(pud),flags);
- for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
- if (pud_none(*pud))
- continue;
- pmd = pmd_offset(pud, 0);
- if (PTRS_PER_PMD > 1) /* not folded */
- pgd_walk_set_prot(virt_to_page(pmd),flags);
- for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
- if (pmd_none(*pmd))
- continue;
- pgd_walk_set_prot(pmd_page(*pmd),flags);
- }
- }
- }
-
- rc = HYPERVISOR_update_va_mapping(
- (unsigned long)pgd_base,
- pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
- UVMF_TLB_FLUSH);
- if (rc)
- BUG();
-}
-
-static void __pgd_pin(pgd_t *pgd)
-{
- pgd_walk(pgd, PAGE_KERNEL_RO);
- kmap_flush_unused();
- xen_pgd_pin(__pa(pgd));
- set_bit(PG_pinned, &virt_to_page(pgd)->flags);
-}
-
-static void __pgd_unpin(pgd_t *pgd)
-{
- xen_pgd_unpin(__pa(pgd));
- pgd_walk(pgd, PAGE_KERNEL);
- clear_bit(PG_pinned, &virt_to_page(pgd)->flags);
-}
-
-static void pgd_test_and_unpin(pgd_t *pgd)
-{
- if (test_bit(PG_pinned, &virt_to_page(pgd)->flags))
- __pgd_unpin(pgd);
-}
-
-void mm_pin(struct mm_struct *mm)
-{
- if (xen_feature(XENFEAT_writable_page_tables))
- return;
- spin_lock(&mm->page_table_lock);
- __pgd_pin(mm->pgd);
- spin_unlock(&mm->page_table_lock);
-}
-
-void mm_unpin(struct mm_struct *mm)
-{
- if (xen_feature(XENFEAT_writable_page_tables))
- return;
- spin_lock(&mm->page_table_lock);
- __pgd_unpin(mm->pgd);
- spin_unlock(&mm->page_table_lock);
-}
-
-void mm_pin_all(void)
-{
- struct page *page;
- unsigned long flags;
-
- if (xen_feature(XENFEAT_writable_page_tables))
- return;
-
- /*
- * Allow uninterrupted access to the pgd_list. Also protects
- * __pgd_pin() by disabling preemption.
- * All other CPUs must be at a safe point (e.g., in stop_machine
- * or offlined entirely).
- */
- spin_lock_irqsave(&pgd_lock, flags);
- for (page = pgd_list; page; page = (struct page *)page->index) {
- if (!test_bit(PG_pinned, &page->flags))
- __pgd_pin((pgd_t *)page_address(page));
- }
- spin_unlock_irqrestore(&pgd_lock, flags);
-}
-
-void _arch_dup_mmap(struct mm_struct *mm)
-{
- if (!test_bit(PG_pinned, &virt_to_page(mm->pgd)->flags))
- mm_pin(mm);
-}
-
-void _arch_exit_mmap(struct mm_struct *mm)
-{
- struct task_struct *tsk = current;
-
- task_lock(tsk);
-
- /*
- * We aggressively remove defunct pgd from cr3. We execute unmap_vmas()
- * *much* faster this way, as no tlb flushes means bigger wrpt batches.
- */
- if (tsk->active_mm == mm) {
- tsk->active_mm = &init_mm;
- atomic_inc(&init_mm.mm_count);
-
- switch_mm(mm, &init_mm, tsk);
-
- atomic_dec(&mm->mm_count);
- BUG_ON(atomic_read(&mm->mm_count) == 0);
- }
-
- task_unlock(tsk);
-
- if (test_bit(PG_pinned, &virt_to_page(mm->pgd)->flags) &&
- (atomic_read(&mm->mm_count) == 1) &&
- !mm->context.has_foreign_mappings)
- mm_unpin(mm);
-}