aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeir Fraser <keir@xen.org>2010-10-18 11:30:10 +0100
committerKeir Fraser <keir@xen.org>2010-10-18 11:30:10 +0100
commit51ce0a98c8531bb33324f6f1f8b72a9d761775ed (patch)
treee276fe081f46e67a422d6466adceb695a2fc3efb
parent92d4a8cf459ea72bf0c8300dc6f3a897f0d62633 (diff)
downloadxen-51ce0a98c8531bb33324f6f1f8b72a9d761775ed.tar.gz
xen-51ce0a98c8531bb33324f6f1f8b72a9d761775ed.tar.bz2
xen-51ce0a98c8531bb33324f6f1f8b72a9d761775ed.zip
x86 guest pagetable walker: check for invalid bits in pagetable
entries. Some bits are reserved in x86 pagetable entries and must be zero; the MMU should raise a pagefault if it sees them, with bit 3 set in the error code. Xen's software pagetable walker hasn't been doing this, which has been OK because no guest OSes actually have invalid bits set except in error cases where things are already very wrong. Xen's shadow pagetable code deliberately sets these bits as part of the not-present-entry fast path, so if we're to support shadow-on-shadow nested HVM, we need to start checking them. Signed-off-by: Tim Deeegan <Tim.Deegan@citrix.com>
-rw-r--r--xen/arch/x86/cpu/mtrr/main.c9
-rw-r--r--xen/arch/x86/mm/guest_walk.c20
-rw-r--r--xen/arch/x86/mm/hap/guest_walk.c3
-rw-r--r--xen/arch/x86/mm/shadow/multi.c9
-rw-r--r--xen/include/asm-x86/guest_pt.h11
-rw-r--r--xen/include/asm-x86/processor.h3
6 files changed, 40 insertions, 15 deletions
diff --git a/xen/arch/x86/cpu/mtrr/main.c b/xen/arch/x86/cpu/mtrr/main.c
index cf7570925e..671b9966a9 100644
--- a/xen/arch/x86/cpu/mtrr/main.c
+++ b/xen/arch/x86/cpu/mtrr/main.c
@@ -600,6 +600,8 @@ struct mtrr_value {
unsigned long lsize;
};
+unsigned int paddr_bits __read_mostly = 36;
+
/**
* mtrr_bp_init - initialize mtrrs on the boot CPU
*
@@ -620,17 +622,16 @@ void __init mtrr_bp_init(void)
Intel will implement it to when they extend the address
bus of the Xeon. */
if (cpuid_eax(0x80000000) >= 0x80000008) {
- u32 phys_addr;
- phys_addr = cpuid_eax(0x80000008) & 0xff;
+ paddr_bits = cpuid_eax(0x80000008) & 0xff;
/* CPUID workaround for Intel 0F33/0F34 CPU */
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
boot_cpu_data.x86 == 0xF &&
boot_cpu_data.x86_model == 0x3 &&
(boot_cpu_data.x86_mask == 0x3 ||
boot_cpu_data.x86_mask == 0x4))
- phys_addr = 36;
+ paddr_bits = 36;
- size_or_mask = ~((1ULL << (phys_addr - PAGE_SHIFT)) - 1);
+ size_or_mask = ~((1ULL << (paddr_bits - PAGE_SHIFT)) - 1);
size_and_mask = ~size_or_mask & 0xfffff00000ULL;
} else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
boot_cpu_data.x86 == 6) {
diff --git a/xen/arch/x86/mm/guest_walk.c b/xen/arch/x86/mm/guest_walk.c
index 576333065b..0551579e75 100644
--- a/xen/arch/x86/mm/guest_walk.c
+++ b/xen/arch/x86/mm/guest_walk.c
@@ -63,7 +63,7 @@ static uint32_t mandatory_flags(struct vcpu *v, uint32_t pfec)
&& !(pfec & PFEC_user_mode) )
pfec &= ~PFEC_write_access;
- return flags[(pfec & 0x1f) >> 1];
+ return flags[(pfec & 0x1f) >> 1] | _PAGE_INVALID_BITS;
}
/* Modify a guest pagetable entry to set the Accessed and Dirty bits.
@@ -131,17 +131,19 @@ guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m,
guest_l3e_t *l3p = NULL;
guest_l4e_t *l4p;
#endif
- uint32_t gflags, mflags, rc = 0;
+ uint32_t gflags, mflags, iflags, rc = 0;
int pse;
perfc_incr(guest_walk);
memset(gw, 0, sizeof(*gw));
gw->va = va;
- /* Mandatory bits that must be set in every entry. We invert NX, to
- * calculate as if there were an "X" bit that allowed access.
- * We will accumulate, in rc, the set of flags that are missing. */
+ /* Mandatory bits that must be set in every entry. We invert NX and
+ * the invalid bits, to calculate as if there were an "X" bit that
+ * allowed access. We will accumulate, in rc, the set of flags that
+ * are missing/unwanted. */
mflags = mandatory_flags(v, pfec);
+ iflags = (_PAGE_NX_BIT | _PAGE_INVALID_BITS);
#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
@@ -150,7 +152,7 @@ guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m,
gw->l4mfn = top_mfn;
l4p = (guest_l4e_t *) top_map;
gw->l4e = l4p[guest_l4_table_offset(va)];
- gflags = guest_l4e_get_flags(gw->l4e) ^ _PAGE_NX_BIT;
+ gflags = guest_l4e_get_flags(gw->l4e) ^ iflags;
rc |= ((gflags & mflags) ^ mflags);
if ( rc & _PAGE_PRESENT ) goto out;
@@ -164,7 +166,7 @@ guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m,
goto out;
/* Get the l3e and check its flags*/
gw->l3e = l3p[guest_l3_table_offset(va)];
- gflags = guest_l3e_get_flags(gw->l3e) ^ _PAGE_NX_BIT;
+ gflags = guest_l3e_get_flags(gw->l3e) ^ iflags;
rc |= ((gflags & mflags) ^ mflags);
if ( rc & _PAGE_PRESENT )
goto out;
@@ -201,7 +203,7 @@ guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m,
#endif /* All levels... */
- gflags = guest_l2e_get_flags(gw->l2e) ^ _PAGE_NX_BIT;
+ gflags = guest_l2e_get_flags(gw->l2e) ^ iflags;
rc |= ((gflags & mflags) ^ mflags);
if ( rc & _PAGE_PRESENT )
goto out;
@@ -246,7 +248,7 @@ guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m,
if(l1p == NULL)
goto out;
gw->l1e = l1p[guest_l1_table_offset(va)];
- gflags = guest_l1e_get_flags(gw->l1e) ^ _PAGE_NX_BIT;
+ gflags = guest_l1e_get_flags(gw->l1e) ^ iflags;
rc |= ((gflags & mflags) ^ mflags);
}
diff --git a/xen/arch/x86/mm/hap/guest_walk.c b/xen/arch/x86/mm/hap/guest_walk.c
index 82a6255e44..2fbd971438 100644
--- a/xen/arch/x86/mm/hap/guest_walk.c
+++ b/xen/arch/x86/mm/hap/guest_walk.c
@@ -99,6 +99,9 @@ unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)(
if ( missing & _PAGE_PRESENT )
pfec[0] &= ~PFEC_page_present;
+ if ( missing & _PAGE_INVALID_BITS )
+ pfec[0] |= PFEC_reserved_bit;
+
if ( missing & _PAGE_PAGED )
pfec[0] = PFEC_page_paged;
diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index 09771bfc03..376cff008a 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -3181,6 +3181,8 @@ static int sh_page_fault(struct vcpu *v,
perfc_incr(shadow_fault_bail_real_fault);
SHADOW_PRINTK("not a shadow fault\n");
reset_early_unshadow(v);
+ if ( (rc & _PAGE_INVALID_BITS) )
+ regs->error_code |= PFEC_reserved_bit;
goto propagate;
}
@@ -3772,6 +3774,7 @@ sh_gva_to_gfn(struct vcpu *v, unsigned long va, uint32_t *pfec)
{
walk_t gw;
gfn_t gfn;
+ uint32_t missing;
#if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
/* Check the vTLB cache first */
@@ -3780,10 +3783,12 @@ sh_gva_to_gfn(struct vcpu *v, unsigned long va, uint32_t *pfec)
return vtlb_gfn;
#endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */
- if ( sh_walk_guest_tables(v, va, &gw, pfec[0]) != 0 )
+ if ( (missing = sh_walk_guest_tables(v, va, &gw, pfec[0])) != 0 )
{
- if ( !(guest_l1e_get_flags(gw.l1e) & _PAGE_PRESENT) )
+ if ( (missing & _PAGE_PRESENT) )
pfec[0] &= ~PFEC_page_present;
+ if ( missing & _PAGE_INVALID_BITS )
+ pfec[0] |= PFEC_reserved_bit;
return INVALID_GFN;
}
gfn = guest_walk_to_gfn(&gw);
diff --git a/xen/include/asm-x86/guest_pt.h b/xen/include/asm-x86/guest_pt.h
index 0fbb236198..4e207ef5b2 100644
--- a/xen/include/asm-x86/guest_pt.h
+++ b/xen/include/asm-x86/guest_pt.h
@@ -204,6 +204,17 @@ guest_supports_nx(struct vcpu *v)
}
+/* Some bits are invalid in any pagetable entry. */
+#if GUEST_PAGING_LEVELS == 2
+#define _PAGE_INVALID_BITS (0)
+#elif GUEST_PAGING_LEVELS == 3
+#define _PAGE_INVALID_BITS \
+ get_pte_flags(((1ull<<63) - 1) & ~((1ull<<paddr_bits) - 1))
+#else /* GUEST_PAGING_LEVELS == 4 */
+#define _PAGE_INVALID_BITS \
+ get_pte_flags(((1ull<<52) - 1) & ~((1ull<<paddr_bits) - 1))
+#endif
+
/* Type used for recording a walk through guest pagetables. It is
* filled in by the pagetable walk function, and also used as a cache
diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
index c9ae5cea03..35efc68468 100644
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -196,6 +196,9 @@ extern int phys_proc_id[NR_CPUS];
extern int cpu_core_id[NR_CPUS];
extern int opt_cpu_info;
+/* Maximum width of physical addresses supported by the hardware */
+extern unsigned int paddr_bits;
+
extern void identify_cpu(struct cpuinfo_x86 *);
extern void setup_clear_cpu_cap(unsigned int);
extern void print_cpu_info(unsigned int cpu);