diff options
author | Keir Fraser <keir@xen.org> | 2010-10-18 11:30:10 +0100 |
---|---|---|
committer | Keir Fraser <keir@xen.org> | 2010-10-18 11:30:10 +0100 |
commit | 51ce0a98c8531bb33324f6f1f8b72a9d761775ed (patch) | |
tree | e276fe081f46e67a422d6466adceb695a2fc3efb | |
parent | 92d4a8cf459ea72bf0c8300dc6f3a897f0d62633 (diff) | |
download | xen-51ce0a98c8531bb33324f6f1f8b72a9d761775ed.tar.gz xen-51ce0a98c8531bb33324f6f1f8b72a9d761775ed.tar.bz2 xen-51ce0a98c8531bb33324f6f1f8b72a9d761775ed.zip |
x86 guest pagetable walker: check for invalid bits in pagetable
entries.
Some bits are reserved in x86 pagetable entries and must be zero; the
MMU should raise a pagefault if it sees them, with bit 3 set in the
error code. Xen's software pagetable walker hasn't been doing this,
which has been OK because no guest OSes actually have invalid bits set
except in error cases where things are already very wrong.
Xen's shadow pagetable code deliberately sets these bits as part of
the not-present-entry fast path, so if we're to support
shadow-on-shadow nested HVM, we need to start checking them.
Signed-off-by: Tim Deeegan <Tim.Deegan@citrix.com>
-rw-r--r-- | xen/arch/x86/cpu/mtrr/main.c | 9 | ||||
-rw-r--r-- | xen/arch/x86/mm/guest_walk.c | 20 | ||||
-rw-r--r-- | xen/arch/x86/mm/hap/guest_walk.c | 3 | ||||
-rw-r--r-- | xen/arch/x86/mm/shadow/multi.c | 9 | ||||
-rw-r--r-- | xen/include/asm-x86/guest_pt.h | 11 | ||||
-rw-r--r-- | xen/include/asm-x86/processor.h | 3 |
6 files changed, 40 insertions, 15 deletions
diff --git a/xen/arch/x86/cpu/mtrr/main.c b/xen/arch/x86/cpu/mtrr/main.c index cf7570925e..671b9966a9 100644 --- a/xen/arch/x86/cpu/mtrr/main.c +++ b/xen/arch/x86/cpu/mtrr/main.c @@ -600,6 +600,8 @@ struct mtrr_value { unsigned long lsize; }; +unsigned int paddr_bits __read_mostly = 36; + /** * mtrr_bp_init - initialize mtrrs on the boot CPU * @@ -620,17 +622,16 @@ void __init mtrr_bp_init(void) Intel will implement it to when they extend the address bus of the Xeon. */ if (cpuid_eax(0x80000000) >= 0x80000008) { - u32 phys_addr; - phys_addr = cpuid_eax(0x80000008) & 0xff; + paddr_bits = cpuid_eax(0x80000008) & 0xff; /* CPUID workaround for Intel 0F33/0F34 CPU */ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 == 0xF && boot_cpu_data.x86_model == 0x3 && (boot_cpu_data.x86_mask == 0x3 || boot_cpu_data.x86_mask == 0x4)) - phys_addr = 36; + paddr_bits = 36; - size_or_mask = ~((1ULL << (phys_addr - PAGE_SHIFT)) - 1); + size_or_mask = ~((1ULL << (paddr_bits - PAGE_SHIFT)) - 1); size_and_mask = ~size_or_mask & 0xfffff00000ULL; } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && boot_cpu_data.x86 == 6) { diff --git a/xen/arch/x86/mm/guest_walk.c b/xen/arch/x86/mm/guest_walk.c index 576333065b..0551579e75 100644 --- a/xen/arch/x86/mm/guest_walk.c +++ b/xen/arch/x86/mm/guest_walk.c @@ -63,7 +63,7 @@ static uint32_t mandatory_flags(struct vcpu *v, uint32_t pfec) && !(pfec & PFEC_user_mode) ) pfec &= ~PFEC_write_access; - return flags[(pfec & 0x1f) >> 1]; + return flags[(pfec & 0x1f) >> 1] | _PAGE_INVALID_BITS; } /* Modify a guest pagetable entry to set the Accessed and Dirty bits. @@ -131,17 +131,19 @@ guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m, guest_l3e_t *l3p = NULL; guest_l4e_t *l4p; #endif - uint32_t gflags, mflags, rc = 0; + uint32_t gflags, mflags, iflags, rc = 0; int pse; perfc_incr(guest_walk); memset(gw, 0, sizeof(*gw)); gw->va = va; - /* Mandatory bits that must be set in every entry. We invert NX, to - * calculate as if there were an "X" bit that allowed access. - * We will accumulate, in rc, the set of flags that are missing. */ + /* Mandatory bits that must be set in every entry. We invert NX and + * the invalid bits, to calculate as if there were an "X" bit that + * allowed access. We will accumulate, in rc, the set of flags that + * are missing/unwanted. */ mflags = mandatory_flags(v, pfec); + iflags = (_PAGE_NX_BIT | _PAGE_INVALID_BITS); #if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */ #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ @@ -150,7 +152,7 @@ guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m, gw->l4mfn = top_mfn; l4p = (guest_l4e_t *) top_map; gw->l4e = l4p[guest_l4_table_offset(va)]; - gflags = guest_l4e_get_flags(gw->l4e) ^ _PAGE_NX_BIT; + gflags = guest_l4e_get_flags(gw->l4e) ^ iflags; rc |= ((gflags & mflags) ^ mflags); if ( rc & _PAGE_PRESENT ) goto out; @@ -164,7 +166,7 @@ guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m, goto out; /* Get the l3e and check its flags*/ gw->l3e = l3p[guest_l3_table_offset(va)]; - gflags = guest_l3e_get_flags(gw->l3e) ^ _PAGE_NX_BIT; + gflags = guest_l3e_get_flags(gw->l3e) ^ iflags; rc |= ((gflags & mflags) ^ mflags); if ( rc & _PAGE_PRESENT ) goto out; @@ -201,7 +203,7 @@ guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m, #endif /* All levels... */ - gflags = guest_l2e_get_flags(gw->l2e) ^ _PAGE_NX_BIT; + gflags = guest_l2e_get_flags(gw->l2e) ^ iflags; rc |= ((gflags & mflags) ^ mflags); if ( rc & _PAGE_PRESENT ) goto out; @@ -246,7 +248,7 @@ guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m, if(l1p == NULL) goto out; gw->l1e = l1p[guest_l1_table_offset(va)]; - gflags = guest_l1e_get_flags(gw->l1e) ^ _PAGE_NX_BIT; + gflags = guest_l1e_get_flags(gw->l1e) ^ iflags; rc |= ((gflags & mflags) ^ mflags); } diff --git a/xen/arch/x86/mm/hap/guest_walk.c b/xen/arch/x86/mm/hap/guest_walk.c index 82a6255e44..2fbd971438 100644 --- a/xen/arch/x86/mm/hap/guest_walk.c +++ b/xen/arch/x86/mm/hap/guest_walk.c @@ -99,6 +99,9 @@ unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)( if ( missing & _PAGE_PRESENT ) pfec[0] &= ~PFEC_page_present; + if ( missing & _PAGE_INVALID_BITS ) + pfec[0] |= PFEC_reserved_bit; + if ( missing & _PAGE_PAGED ) pfec[0] = PFEC_page_paged; diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c index 09771bfc03..376cff008a 100644 --- a/xen/arch/x86/mm/shadow/multi.c +++ b/xen/arch/x86/mm/shadow/multi.c @@ -3181,6 +3181,8 @@ static int sh_page_fault(struct vcpu *v, perfc_incr(shadow_fault_bail_real_fault); SHADOW_PRINTK("not a shadow fault\n"); reset_early_unshadow(v); + if ( (rc & _PAGE_INVALID_BITS) ) + regs->error_code |= PFEC_reserved_bit; goto propagate; } @@ -3772,6 +3774,7 @@ sh_gva_to_gfn(struct vcpu *v, unsigned long va, uint32_t *pfec) { walk_t gw; gfn_t gfn; + uint32_t missing; #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) /* Check the vTLB cache first */ @@ -3780,10 +3783,12 @@ sh_gva_to_gfn(struct vcpu *v, unsigned long va, uint32_t *pfec) return vtlb_gfn; #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */ - if ( sh_walk_guest_tables(v, va, &gw, pfec[0]) != 0 ) + if ( (missing = sh_walk_guest_tables(v, va, &gw, pfec[0])) != 0 ) { - if ( !(guest_l1e_get_flags(gw.l1e) & _PAGE_PRESENT) ) + if ( (missing & _PAGE_PRESENT) ) pfec[0] &= ~PFEC_page_present; + if ( missing & _PAGE_INVALID_BITS ) + pfec[0] |= PFEC_reserved_bit; return INVALID_GFN; } gfn = guest_walk_to_gfn(&gw); diff --git a/xen/include/asm-x86/guest_pt.h b/xen/include/asm-x86/guest_pt.h index 0fbb236198..4e207ef5b2 100644 --- a/xen/include/asm-x86/guest_pt.h +++ b/xen/include/asm-x86/guest_pt.h @@ -204,6 +204,17 @@ guest_supports_nx(struct vcpu *v) } +/* Some bits are invalid in any pagetable entry. */ +#if GUEST_PAGING_LEVELS == 2 +#define _PAGE_INVALID_BITS (0) +#elif GUEST_PAGING_LEVELS == 3 +#define _PAGE_INVALID_BITS \ + get_pte_flags(((1ull<<63) - 1) & ~((1ull<<paddr_bits) - 1)) +#else /* GUEST_PAGING_LEVELS == 4 */ +#define _PAGE_INVALID_BITS \ + get_pte_flags(((1ull<<52) - 1) & ~((1ull<<paddr_bits) - 1)) +#endif + /* Type used for recording a walk through guest pagetables. It is * filled in by the pagetable walk function, and also used as a cache diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h index c9ae5cea03..35efc68468 100644 --- a/xen/include/asm-x86/processor.h +++ b/xen/include/asm-x86/processor.h @@ -196,6 +196,9 @@ extern int phys_proc_id[NR_CPUS]; extern int cpu_core_id[NR_CPUS]; extern int opt_cpu_info; +/* Maximum width of physical addresses supported by the hardware */ +extern unsigned int paddr_bits; + extern void identify_cpu(struct cpuinfo_x86 *); extern void setup_clear_cpu_cap(unsigned int); extern void print_cpu_info(unsigned int cpu); |