aboutsummaryrefslogtreecommitdiffstats
path: root/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c
diff options
context:
space:
mode:
authorChristian Limpach <Christian.Limpach@xensource.com>2007-01-12 14:30:49 +0000
committerChristian Limpach <Christian.Limpach@xensource.com>2007-01-12 14:30:49 +0000
commit432e7b62cb60b6f6f45e52b3db0268560d9b7cdb (patch)
tree5933250c7c05b423abff5ab16f19b70832820dbb /linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c
parenta25c73ef380731b0ab4347b854e81827cb6a6131 (diff)
downloadxen-432e7b62cb60b6f6f45e52b3db0268560d9b7cdb.tar.gz
xen-432e7b62cb60b6f6f45e52b3db0268560d9b7cdb.tar.bz2
xen-432e7b62cb60b6f6f45e52b3db0268560d9b7cdb.zip
[linux] Update to linux-2.6.17.
Signed-off-by: Christian Limpach <Christian.Limpach@xensource.com> --HG-- rename : patches/linux-2.6.16.33/blktap-aio-16_03_06.patch => patches/linux-2.6.17/blktap-aio-16_03_06.patch rename : patches/linux-2.6.16.33/fix-hz-suspend.patch => patches/linux-2.6.17/fix-hz-suspend.patch rename : patches/linux-2.6.16.33/fix-ide-cd-pio-mode.patch => patches/linux-2.6.17/fix-ide-cd-pio-mode.patch rename : patches/linux-2.6.16.33/git-2a8a3d5b65e86ec1dfef7d268c64a909eab94af7.patch => patches/linux-2.6.17/git-2a8a3d5b65e86ec1dfef7d268c64a909eab94af7.patch rename : patches/linux-2.6.16.33/git-2efe55a9cec8418f0e0cde3dc3787a42fddc4411.patch => patches/linux-2.6.17/git-2efe55a9cec8418f0e0cde3dc3787a42fddc4411.patch rename : patches/linux-2.6.16.33/git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch => patches/linux-2.6.17/git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch rename : patches/linux-2.6.16.33/git-4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f.patch => patches/linux-2.6.17/git-4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f.patch rename : patches/linux-2.6.16.33/git-dbaab49f92ff6ae6255762a948375e4036cbdbd2.patch => patches/linux-2.6.17/git-dbaab49f92ff6ae6255762a948375e4036cbdbd2.patch rename : patches/linux-2.6.16.33/i386-mach-io-check-nmi.patch => patches/linux-2.6.17/i386-mach-io-check-nmi.patch rename : patches/linux-2.6.16.33/ipv6-no-autoconf.patch => patches/linux-2.6.17/ipv6-no-autoconf.patch rename : patches/linux-2.6.16.33/kasprintf.patch => patches/linux-2.6.17/kasprintf.patch rename : patches/linux-2.6.16.33/linux-2.6.19-rc1-kexec-move_segment_code-i386.patch => patches/linux-2.6.17/linux-2.6.19-rc1-kexec-move_segment_code-i386.patch rename : patches/linux-2.6.16.33/linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch => patches/linux-2.6.17/linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch rename : patches/linux-2.6.16.33/net-csum.patch => patches/linux-2.6.17/net-csum.patch rename : patches/linux-2.6.16.33/net-gso-0-base.patch => patches/linux-2.6.17/net-gso-0-base.patch rename : patches/linux-2.6.16.33/net-gso-1-check-dodgy.patch => patches/linux-2.6.17/net-gso-1-check-dodgy.patch rename : patches/linux-2.6.16.33/net-gso-2-checksum-fix.patch => patches/linux-2.6.17/net-gso-2-checksum-fix.patch rename : patches/linux-2.6.16.33/net-gso-3-fix-errorcheck.patch => patches/linux-2.6.17/net-gso-3-fix-errorcheck.patch rename : patches/linux-2.6.16.33/net-gso-4-kill-warnon.patch => patches/linux-2.6.17/net-gso-4-kill-warnon.patch rename : patches/linux-2.6.16.33/net-gso-5-rcv-mss.patch => patches/linux-2.6.17/net-gso-5-rcv-mss.patch rename : patches/linux-2.6.16.33/net-gso-6-linear-segmentation.patch => patches/linux-2.6.17/net-gso-6-linear-segmentation.patch rename : patches/linux-2.6.16.33/pmd-shared.patch => patches/linux-2.6.17/pmd-shared.patch rename : patches/linux-2.6.16.33/rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch => patches/linux-2.6.17/rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch rename : patches/linux-2.6.16.33/series => patches/linux-2.6.17/series rename : patches/linux-2.6.16.33/vsnprintf.patch => patches/linux-2.6.17/vsnprintf.patch rename : patches/linux-2.6.16.33/x86-elfnote-as-preprocessor-macro.patch => patches/linux-2.6.17/x86-elfnote-as-preprocessor-macro.patch rename : patches/linux-2.6.16.33/x86-increase-interrupt-vector-range.patch => patches/linux-2.6.17/x86-increase-interrupt-vector-range.patch rename : patches/linux-2.6.16.33/x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch => patches/linux-2.6.17/x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch rename : patches/linux-2.6.16.33/x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch => patches/linux-2.6.17/x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch rename : patches/linux-2.6.16.33/xen-hotplug.patch => patches/linux-2.6.17/xen-hotplug.patch rename : patches/linux-2.6.16.33/xenoprof-generic.patch => patches/linux-2.6.17/xenoprof-generic.patch
Diffstat (limited to 'linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c')
-rw-r--r--linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c226
1 files changed, 146 insertions, 80 deletions
diff --git a/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c b/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c
index 4939ab106e..7b620769f5 100644
--- a/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c
@@ -252,8 +252,9 @@ static void dump_fault_path(unsigned long address)
page = read_cr3();
page = ((unsigned long *) __va(page))[address >> 22];
- printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page,
- machine_to_phys(page));
+ if (oops_may_print())
+ printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page,
+ machine_to_phys(page));
/*
* We must not directly access the pte in the highpte
* case, the page table might be allocated in highmem.
@@ -261,7 +262,7 @@ static void dump_fault_path(unsigned long address)
* it's allocated already.
*/
#ifndef CONFIG_HIGHPTE
- if (page & 1) {
+ if ((page & 1) && oops_may_print()) {
page &= PAGE_MASK;
address &= 0x003ff000;
page = machine_to_phys(page);
@@ -311,6 +312,76 @@ static int spurious_fault(struct pt_regs *regs,
return 1;
}
+static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
+{
+ unsigned index = pgd_index(address);
+ pgd_t *pgd_k;
+ pud_t *pud, *pud_k;
+ pmd_t *pmd, *pmd_k;
+
+ pgd += index;
+ pgd_k = init_mm.pgd + index;
+
+ if (!pgd_present(*pgd_k))
+ return NULL;
+
+ /*
+ * set_pgd(pgd, *pgd_k); here would be useless on PAE
+ * and redundant with the set_pmd() on non-PAE. As would
+ * set_pud.
+ */
+
+ pud = pud_offset(pgd, address);
+ pud_k = pud_offset(pgd_k, address);
+ if (!pud_present(*pud_k))
+ return NULL;
+
+ pmd = pmd_offset(pud, address);
+ pmd_k = pmd_offset(pud_k, address);
+ if (!pmd_present(*pmd_k))
+ return NULL;
+ if (!pmd_present(*pmd))
+#ifndef CONFIG_XEN
+ set_pmd(pmd, *pmd_k);
+#else
+ /*
+ * When running on Xen we must launder *pmd_k through
+ * pmd_val() to ensure that _PAGE_PRESENT is correctly set.
+ */
+ set_pmd(pmd, __pmd(pmd_val(*pmd_k)));
+#endif
+ else
+ BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
+ return pmd_k;
+}
+
+/*
+ * Handle a fault on the vmalloc or module mapping area
+ *
+ * This assumes no large pages in there.
+ */
+static inline int vmalloc_fault(unsigned long address)
+{
+ unsigned long pgd_paddr;
+ pmd_t *pmd_k;
+ pte_t *pte_k;
+ /*
+ * Synchronize this task's top level page-table
+ * with the 'reference' page table.
+ *
+ * Do _not_ use "current" here. We might be inside
+ * an interrupt in the middle of a task switch..
+ */
+ pgd_paddr = read_cr3();
+ pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
+ if (!pmd_k)
+ return -1;
+ pte_k = pte_offset_kernel(pmd_k, address);
+ if (!pte_present(*pte_k))
+ return -1;
+ return 0;
+}
+
/*
* This routine handles page faults. It determines the address,
* and the problem, and then passes it off to one of the appropriate
@@ -320,6 +391,8 @@ static int spurious_fault(struct pt_regs *regs,
* bit 0 == 0 means no page found, 1 means protection fault
* bit 1 == 0 means read, 1 means write
* bit 2 == 0 means kernel, 1 means user-mode
+ * bit 3 == 1 means use of reserved bit detected
+ * bit 4 == 1 means fault was an instruction fetch
*/
fastcall void __kprobes do_page_fault(struct pt_regs *regs,
unsigned long error_code)
@@ -339,13 +412,6 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
if (regs->eflags & X86_EFLAGS_VM)
error_code |= 4;
- if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
- SIGSEGV) == NOTIFY_STOP)
- return;
- /* It's safe to allow irq's after cr2 has been saved */
- if (regs->eflags & (X86_EFLAGS_IF|VM_MASK))
- local_irq_enable();
-
tsk = current;
si_code = SEGV_MAPERR;
@@ -361,25 +427,37 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
*
* This verifies that the fault happens in kernel space
* (error_code & 4) == 0, and that the fault was not a
- * protection error (error_code & 1) == 0.
+ * protection error (error_code & 9) == 0.
*/
- if (unlikely(address >= TASK_SIZE)) {
+ if (unlikely(address >= TASK_SIZE)) {
#ifdef CONFIG_XEN
/* Faults in hypervisor area can never be patched up. */
if (address >= hypervisor_virt_start)
goto bad_area_nosemaphore;
#endif
- if (!(error_code & 5))
- goto vmalloc_fault;
+ if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0)
+ return;
/* Can take a spurious fault if mapping changes R/O -> R/W. */
if (spurious_fault(regs, address, error_code))
return;
+ if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
+ SIGSEGV) == NOTIFY_STOP)
+ return;
/*
* Don't take the mm semaphore here. If we fixup a prefetch
* fault we could otherwise deadlock.
*/
goto bad_area_nosemaphore;
- }
+ }
+
+ if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
+ SIGSEGV) == NOTIFY_STOP)
+ return;
+
+ /* It's safe to allow irq's after cr2 has been saved and the vmalloc
+ fault has been handled. */
+ if (regs->eflags & (X86_EFLAGS_IF|VM_MASK))
+ local_irq_enable();
mm = tsk->mm;
@@ -550,21 +628,27 @@ no_context:
bust_spinlocks(1);
-#ifdef CONFIG_X86_PAE
- if (error_code & 16) {
- pte_t *pte = lookup_address(address);
+ if (oops_may_print()) {
+ #ifdef CONFIG_X86_PAE
+ if (error_code & 16) {
+ pte_t *pte = lookup_address(address);
- if (pte && pte_present(*pte) && !pte_exec_kernel(*pte))
- printk(KERN_CRIT "kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n", current->uid);
+ if (pte && pte_present(*pte) && !pte_exec_kernel(*pte))
+ printk(KERN_CRIT "kernel tried to execute "
+ "NX-protected page - exploit attempt? "
+ "(uid: %d)\n", current->uid);
+ }
+ #endif
+ if (address < PAGE_SIZE)
+ printk(KERN_ALERT "BUG: unable to handle kernel NULL "
+ "pointer dereference");
+ else
+ printk(KERN_ALERT "BUG: unable to handle kernel paging"
+ " request");
+ printk(" at virtual address %08lx\n",address);
+ printk(KERN_ALERT " printing eip:\n");
+ printk("%08lx\n", regs->eip);
}
-#endif
- if (address < PAGE_SIZE)
- printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
- else
- printk(KERN_ALERT "Unable to handle kernel paging request");
- printk(" at virtual address %08lx\n",address);
- printk(KERN_ALERT " printing eip:\n");
- printk("%08lx\n", regs->eip);
dump_fault_path(address);
tsk->thread.cr2 = address;
tsk->thread.trap_no = 14;
@@ -604,59 +688,41 @@ do_sigbus:
tsk->thread.error_code = error_code;
tsk->thread.trap_no = 14;
force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
- return;
-
-vmalloc_fault:
- {
- /*
- * Synchronize this task's top level page-table
- * with the 'reference' page table.
- *
- * Do _not_ use "tsk" here. We might be inside
- * an interrupt in the middle of a task switch..
- */
- int index = pgd_index(address);
- unsigned long pgd_paddr;
- pgd_t *pgd, *pgd_k;
- pud_t *pud, *pud_k;
- pmd_t *pmd, *pmd_k;
- pte_t *pte_k;
-
- pgd_paddr = read_cr3();
- pgd = index + (pgd_t *)__va(pgd_paddr);
- pgd_k = init_mm.pgd + index;
-
- if (!pgd_present(*pgd_k))
- goto no_context;
-
- /*
- * set_pgd(pgd, *pgd_k); here would be useless on PAE
- * and redundant with the set_pmd() on non-PAE. As would
- * set_pud.
- */
+}
- pud = pud_offset(pgd, address);
- pud_k = pud_offset(pgd_k, address);
- if (!pud_present(*pud_k))
- goto no_context;
-
- pmd = pmd_offset(pud, address);
- pmd_k = pmd_offset(pud_k, address);
- if (!pmd_present(*pmd_k))
- goto no_context;
-#ifndef CONFIG_XEN
- set_pmd(pmd, *pmd_k);
-#else
- /*
- * When running on Xen we must launder *pmd_k through
- * pmd_val() to ensure that _PAGE_PRESENT is correctly set.
- */
- set_pmd(pmd, __pmd(pmd_val(*pmd_k)));
-#endif
+#ifndef CONFIG_X86_PAE
+void vmalloc_sync_all(void)
+{
+ /*
+ * Note that races in the updates of insync and start aren't
+ * problematic: insync can only get set bits added, and updates to
+ * start are only improving performance (without affecting correctness
+ * if undone).
+ */
+ static DECLARE_BITMAP(insync, PTRS_PER_PGD);
+ static unsigned long start = TASK_SIZE;
+ unsigned long address;
- pte_k = pte_offset_kernel(pmd_k, address);
- if (!pte_present(*pte_k))
- goto no_context;
- return;
+ BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
+ for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) {
+ if (!test_bit(pgd_index(address), insync)) {
+ unsigned long flags;
+ struct page *page;
+
+ spin_lock_irqsave(&pgd_lock, flags);
+ for (page = pgd_list; page; page =
+ (struct page *)page->index)
+ if (!vmalloc_sync_one(page_address(page),
+ address)) {
+ BUG_ON(page != pgd_list);
+ break;
+ }
+ spin_unlock_irqrestore(&pgd_lock, flags);
+ if (!page)
+ set_bit(pgd_index(address), insync);
+ }
+ if (address == start && test_bit(pgd_index(address), insync))
+ start = address + PGDIR_SIZE;
}
}
+#endif