aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>2006-03-24 12:14:58 +0100
committerkaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>2006-03-24 12:14:58 +0100
commit1a8bbf93b8f9d10585d3d796fa6dda5eefae15b6 (patch)
tree400700e1c8c57582e8c868633dafbeac5f016302
parent7e273af3ec8363a4d961119d0f055ca8d78ba31f (diff)
downloadxen-1a8bbf93b8f9d10585d3d796fa6dda5eefae15b6.tar.gz
xen-1a8bbf93b8f9d10585d3d796fa6dda5eefae15b6.tar.bz2
xen-1a8bbf93b8f9d10585d3d796fa6dda5eefae15b6.zip
Detect spurious faults taken in the hypervisor that are
due to writable pagetable logic. Signed-off-by: Keir Fraser <keir@xensource.com>
-rw-r--r--xen/arch/x86/mm.c5
-rw-r--r--xen/arch/x86/traps.c47
-rw-r--r--xen/arch/x86/x86_32/traps.c85
-rw-r--r--xen/arch/x86/x86_64/traps.c82
-rw-r--r--xen/include/asm-x86/processor.h1
5 files changed, 178 insertions, 42 deletions
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 54b1a52184..4977a6d2da 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -3351,8 +3351,9 @@ int ptwr_do_page_fault(struct domain *d, unsigned long addr,
* permissions in page directories by writing back to the linear mapping.
*/
if ( (flags = l1e_get_flags(pte) & WRPT_PTE_FLAGS) == WRPT_PTE_FLAGS )
- return !__put_user(
- pte.l1, &linear_pg_table[l1_linear_offset(addr)].l1);
+ return __put_user(
+ pte.l1, &linear_pg_table[l1_linear_offset(addr)].l1) ?
+ 0 : EXCRET_not_a_fault;
/* We are looking only for read-only mappings of p.t. pages. */
if ( ((flags | _PAGE_RW) != WRPT_PTE_FLAGS) ||
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 17e1e80072..653786e9e3 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -620,6 +620,46 @@ static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs)
return 0;
}
+static int spurious_page_fault(unsigned long addr, struct cpu_user_regs *regs)
+{
+ struct vcpu *v = current;
+ struct domain *d = v->domain;
+ int rc;
+
+ /*
+ * The only possible reason for a spurious page fault not to be picked
+ * up already is that a page directory was unhooked by writable page table
+ * logic and then reattached before the faulting VCPU could detect it.
+ */
+ if ( is_idle_domain(d) || /* no ptwr in idle domain */
+ IN_HYPERVISOR_RANGE(addr) || /* no ptwr on hypervisor addrs */
+ shadow_mode_enabled(d) || /* no ptwr logic in shadow mode */
+ ((regs->error_code & 0x1d) != 0) ) /* simple not-present fault? */
+ return 0;
+
+ LOCK_BIGLOCK(d);
+
+ /*
+ * The page directory could have been detached again while we weren't
+ * holding the per-domain lock. Detect that and fix up if it's the case.
+ */
+ if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) &&
+ unlikely(l2_linear_offset(addr) ==
+ d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx) )
+ {
+ ptwr_flush(d, PTWR_PT_ACTIVE);
+ rc = 1;
+ }
+ else
+ {
+ /* Okay, walk the page tables. Only check for not-present faults.*/
+ rc = __spurious_page_fault(addr);
+ }
+
+ UNLOCK_BIGLOCK(d);
+ return rc;
+}
+
/*
* #PF error code:
* Bit 0: Protection violation (=1) ; Page not present (=0)
@@ -644,6 +684,13 @@ asmlinkage int do_page_fault(struct cpu_user_regs *regs)
if ( unlikely(!guest_mode(regs)) )
{
+ if ( spurious_page_fault(addr, regs) )
+ {
+ DPRINTK("Spurious fault in domain %u:%u at addr %lx\n",
+ current->domain->domain_id, current->vcpu_id, addr);
+ return EXCRET_not_a_fault;
+ }
+
if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
{
perfc_incrc(copy_user_faults);
diff --git a/xen/arch/x86/x86_32/traps.c b/xen/arch/x86/x86_32/traps.c
index 1c570dad29..1c5daca90d 100644
--- a/xen/arch/x86/x86_32/traps.c
+++ b/xen/arch/x86/x86_32/traps.c
@@ -70,38 +70,77 @@ void show_registers(struct cpu_user_regs *regs)
void show_page_walk(unsigned long addr)
{
- unsigned long mfn = read_cr3() >> PAGE_SHIFT;
- intpte_t *ptab, ent;
- unsigned long pfn;
+ unsigned long pfn, mfn = read_cr3() >> PAGE_SHIFT;
+#ifdef CONFIG_X86_PAE
+ l3_pgentry_t l3e, *l3t;
+#endif
+ l2_pgentry_t l2e, *l2t;
+ l1_pgentry_t l1e, *l1t;
printk("Pagetable walk from %08lx:\n", addr);
#ifdef CONFIG_X86_PAE
- ptab = map_domain_page(mfn);
- ent = ptab[l3_table_offset(addr)];
- pfn = get_gpfn_from_mfn((u32)(ent >> PAGE_SHIFT));
- printk(" L3 = %"PRIpte" %08lx\n", ent, pfn);
- unmap_domain_page(ptab);
- if ( !(ent & _PAGE_PRESENT) )
+ l3t = map_domain_page(mfn);
+ l3e = l3t[l3_table_offset(addr)];
+ mfn = l3e_get_pfn(l3e);
+ pfn = get_gpfn_from_mfn(mfn);
+ printk(" L3 = %"PRIpte" %08lx\n", l3e_get_intpte(l3e), pfn);
+ unmap_domain_page(l3t);
+ if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
return;
- mfn = ent >> PAGE_SHIFT;
#endif
- ptab = map_domain_page(mfn);
- ent = ptab[l2_table_offset(addr)];
- pfn = get_gpfn_from_mfn((u32)(ent >> PAGE_SHIFT));
- printk(" L2 = %"PRIpte" %08lx %s\n", ent, pfn,
- (ent & _PAGE_PSE) ? "(PSE)" : "");
- unmap_domain_page(ptab);
- if ( !(ent & _PAGE_PRESENT) || (ent & _PAGE_PSE) )
+ l2t = map_domain_page(mfn);
+ l2e = l2t[l2_table_offset(addr)];
+ mfn = l2e_get_pfn(l2e);
+ pfn = get_gpfn_from_mfn(mfn);
+ printk(" L2 = %"PRIpte" %08lx %s\n", l2e_get_intpte(l2e), pfn,
+ (l2e_get_flags(l2e) & _PAGE_PSE) ? "(PSE)" : "");
+ unmap_domain_page(l2t);
+ if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ||
+ (l2e_get_flags(l2e) & _PAGE_PSE) )
return;
- mfn = ent >> PAGE_SHIFT;
- ptab = map_domain_page(ent >> PAGE_SHIFT);
- ent = ptab[l1_table_offset(addr)];
- pfn = get_gpfn_from_mfn((u32)(ent >> PAGE_SHIFT));
- printk(" L1 = %"PRIpte" %08lx\n", ent, pfn);
- unmap_domain_page(ptab);
+ l1t = map_domain_page(mfn);
+ l1e = l1t[l1_table_offset(addr)];
+ mfn = l1e_get_pfn(l1e);
+ pfn = get_gpfn_from_mfn(mfn);
+ printk(" L1 = %"PRIpte" %08lx\n", l1e_get_intpte(l1e), pfn);
+ unmap_domain_page(l1t);
+}
+
+int __spurious_page_fault(unsigned long addr)
+{
+ unsigned long mfn = read_cr3() >> PAGE_SHIFT;
+#ifdef CONFIG_X86_PAE
+ l3_pgentry_t l3e, *l3t;
+#endif
+ l2_pgentry_t l2e, *l2t;
+ l1_pgentry_t l1e, *l1t;
+
+#ifdef CONFIG_X86_PAE
+ l3t = map_domain_page(mfn);
+ l3e = l3t[l3_table_offset(addr)];
+ mfn = l3e_get_pfn(l3e);
+ unmap_domain_page(l3t);
+ if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
+ return 0;
+#endif
+
+ l2t = map_domain_page(mfn);
+ l2e = l2t[l2_table_offset(addr)];
+ mfn = l2e_get_pfn(l2e);
+ unmap_domain_page(l2t);
+ if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
+ return 0;
+ if ( l2e_get_flags(l2e) & _PAGE_PSE )
+ return 1;
+
+ l1t = map_domain_page(mfn);
+ l1e = l1t[l1_table_offset(addr)];
+ mfn = l1e_get_pfn(l1e);
+ unmap_domain_page(l1t);
+ return !!(l1e_get_flags(l1e) & _PAGE_PRESENT);
}
#define DOUBLEFAULT_STACK_SIZE 1024
diff --git a/xen/arch/x86/x86_64/traps.c b/xen/arch/x86/x86_64/traps.c
index e78c8f9453..46835570c4 100644
--- a/xen/arch/x86/x86_64/traps.c
+++ b/xen/arch/x86/x86_64/traps.c
@@ -70,31 +70,79 @@ void show_registers(struct cpu_user_regs *regs)
void show_page_walk(unsigned long addr)
{
- unsigned long page = read_cr3();
-
+ unsigned long pfn, mfn = read_cr3() >> PAGE_SHIFT;
+ l4_pgentry_t l4e, *l4t;
+ l3_pgentry_t l3e, *l3t;
+ l2_pgentry_t l2e, *l2t;
+ l1_pgentry_t l1e, *l1t;
+
printk("Pagetable walk from %016lx:\n", addr);
- page &= PAGE_MASK;
- page = ((unsigned long *) __va(page))[l4_table_offset(addr)];
- printk(" L4 = %016lx\n", page);
- if ( !(page & _PAGE_PRESENT) )
+ l4t = mfn_to_virt(mfn);
+ l4e = l4t[l4_table_offset(addr)];
+ mfn = l4e_get_pfn(l4e);
+ pfn = get_gpfn_from_mfn(mfn);
+ printk(" L4 = %"PRIpte" %016lx\n", l4e_get_intpte(l4e), pfn);
+ if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
return;
- page &= PAGE_MASK;
- page = ((unsigned long *) __va(page))[l3_table_offset(addr)];
- printk(" L3 = %016lx\n", page);
- if ( !(page & _PAGE_PRESENT) )
+ l3t = mfn_to_virt(mfn);
+ l3e = l3t[l3_table_offset(addr)];
+ mfn = l3e_get_pfn(l3e);
+ pfn = get_gpfn_from_mfn(mfn);
+ printk(" L3 = %"PRIpte" %016lx\n", l3e_get_intpte(l3e), pfn);
+ if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
return;
- page &= PAGE_MASK;
- page = ((unsigned long *) __va(page))[l2_table_offset(addr)];
- printk(" L2 = %016lx %s\n", page, (page & _PAGE_PSE) ? "(2MB)" : "");
- if ( !(page & _PAGE_PRESENT) || (page & _PAGE_PSE) )
+ l2t = mfn_to_virt(mfn);
+ l2e = l2t[l2_table_offset(addr)];
+ mfn = l2e_get_pfn(l2e);
+ pfn = get_gpfn_from_mfn(mfn);
+ printk(" L2 = %"PRIpte" %016lx %s\n", l2e_get_intpte(l2e), pfn,
+ (l2e_get_flags(l2e) & _PAGE_PSE) ? "(PSE)" : "");
+ if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ||
+ (l2e_get_flags(l2e) & _PAGE_PSE) )
return;
- page &= PAGE_MASK;
- page = ((unsigned long *) __va(page))[l1_table_offset(addr)];
- printk(" L1 = %016lx\n", page);
+ l1t = mfn_to_virt(mfn);
+ l1e = l1t[l1_table_offset(addr)];
+ mfn = l1e_get_pfn(l1e);
+ pfn = get_gpfn_from_mfn(mfn);
+ printk(" L1 = %"PRIpte" %016lx\n", l1e_get_intpte(l1e), pfn);
+}
+
+int __spurious_page_fault(unsigned long addr)
+{
+ unsigned long mfn = read_cr3() >> PAGE_SHIFT;
+ l4_pgentry_t l4e, *l4t;
+ l3_pgentry_t l3e, *l3t;
+ l2_pgentry_t l2e, *l2t;
+ l1_pgentry_t l1e, *l1t;
+
+ l4t = mfn_to_virt(mfn);
+ l4e = l4t[l4_table_offset(addr)];
+ mfn = l4e_get_pfn(l4e);
+ if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
+ return 0;
+
+ l3t = mfn_to_virt(mfn);
+ l3e = l3t[l3_table_offset(addr)];
+ mfn = l3e_get_pfn(l3e);
+ if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
+ return 0;
+
+ l2t = mfn_to_virt(mfn);
+ l2e = l2t[l2_table_offset(addr)];
+ mfn = l2e_get_pfn(l2e);
+ if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
+ return 0;
+ if ( l2e_get_flags(l2e) & _PAGE_PSE )
+ return 1;
+
+ l1t = mfn_to_virt(mfn);
+ l1e = l1t[l1_table_offset(addr)];
+ mfn = l1e_get_pfn(l1e);
+ return !!(l1e_get_flags(l1e) & _PAGE_PRESENT);
}
asmlinkage void double_fault(void);
diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
index de4232f5bd..2cda6c15f5 100644
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -524,6 +524,7 @@ extern always_inline void prefetchw(const void *x)
void show_stack(struct cpu_user_regs *regs);
void show_registers(struct cpu_user_regs *regs);
void show_page_walk(unsigned long addr);
+int __spurious_page_fault(unsigned long addr);
asmlinkage void fatal_trap(int trapnr, struct cpu_user_regs *regs);
extern void mtrr_ap_init(void);