diff options
author | Tim Deegan <Tim.Deegan@xensource.com> | 2006-12-20 12:03:07 +0000 |
---|---|---|
committer | Tim Deegan <Tim.Deegan@xensource.com> | 2006-12-20 12:03:07 +0000 |
commit | 1128c254f3ddece0fc98b98f30c37c7d83b251f3 (patch) | |
tree | 04cc143e88859f7fe486f95df373dee2318348dd | |
parent | 6f7ff73d5777b3aec9b7fdadfe0869ca1df280aa (diff) | |
download | xen-1128c254f3ddece0fc98b98f30c37c7d83b251f3.tar.gz xen-1128c254f3ddece0fc98b98f30c37c7d83b251f3.tar.bz2 xen-1128c254f3ddece0fc98b98f30c37c7d83b251f3.zip |
[XEN] Clean up the shadow interface
Remove a lot of unneccesary things from shadow.h, and move the shadow lock
entirely inside the shadow code.
Signed-off-by: Tim Deegan <Tim.Deegan@xensource.com>
-rw-r--r-- | xen/arch/x86/domain.c | 19 | ||||
-rw-r--r-- | xen/arch/x86/domain_build.c | 2 | ||||
-rw-r--r-- | xen/arch/x86/mm.c | 199 | ||||
-rw-r--r-- | xen/arch/x86/mm/shadow/common.c | 282 | ||||
-rw-r--r-- | xen/arch/x86/mm/shadow/multi.c | 53 | ||||
-rw-r--r-- | xen/arch/x86/mm/shadow/multi.h | 4 | ||||
-rw-r--r-- | xen/arch/x86/mm/shadow/private.h | 160 | ||||
-rw-r--r-- | xen/arch/x86/mm/shadow/types.h | 4 | ||||
-rw-r--r-- | xen/include/asm-x86/mm.h | 2 | ||||
-rw-r--r-- | xen/include/asm-x86/shadow.h | 468 |
10 files changed, 577 insertions, 616 deletions
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index e5897454fa..afa2f60f08 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -172,10 +172,11 @@ int arch_domain_create(struct domain *d) { #ifdef __x86_64__ struct page_info *pg; + int i; #endif l1_pgentry_t gdt_l1e; int vcpuid, pdpt_order; - int i, rc = -ENOMEM; + int rc = -ENOMEM; pdpt_order = get_order_from_bytes(PDPT_L1_ENTRIES * sizeof(l1_pgentry_t)); d->arch.mm_perdomain_pt = alloc_xenheap_pages(pdpt_order); @@ -218,12 +219,7 @@ int arch_domain_create(struct domain *d) #endif /* __x86_64__ */ - shadow_lock_init(d); - for ( i = 0; i <= SHADOW_MAX_ORDER; i++ ) - INIT_LIST_HEAD(&d->arch.shadow.freelists[i]); - INIT_LIST_HEAD(&d->arch.shadow.p2m_freelist); - INIT_LIST_HEAD(&d->arch.shadow.p2m_inuse); - INIT_LIST_HEAD(&d->arch.shadow.pinned_shadows); + shadow_domain_init(d); if ( !is_idle_domain(d) ) { @@ -366,15 +362,6 @@ int arch_set_info_guest( v->arch.guest_table = pagetable_from_pfn(cr3_pfn); } - /* Shadow: make sure the domain has enough shadow memory to - * boot another vcpu */ - if ( shadow_mode_enabled(d) - && d->arch.shadow.total_pages < shadow_min_acceptable_pages(d) ) - { - destroy_gdt(v); - return -ENOMEM; - } - if ( v->vcpu_id == 0 ) update_domain_wallclock_time(d); diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c index c092149def..236bc5be46 100644 --- a/xen/arch/x86/domain_build.c +++ b/xen/arch/x86/domain_build.c @@ -827,7 +827,7 @@ int construct_dom0(struct domain *d, regs->eflags = X86_EFLAGS_IF; if ( opt_dom0_shadow ) - if ( shadow_test_enable(d) == 0 ) + if ( shadow_enable(d, SHM2_enable) == 0 ) shadow_update_paging_modes(v); if ( supervisor_mode_kernel ) diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index 9c932e9686..6ed7d84490 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -365,6 +365,38 @@ void write_ptbase(struct vcpu *v) write_cr3(v->arch.cr3); } +/* Should be called after CR3 is updated. + * Updates vcpu->arch.cr3 and, for HVM guests, vcpu->arch.hvm_vcpu.cpu_cr3. + * + * Also updates other state derived from CR3 (vcpu->arch.guest_vtable, + * shadow_vtable, etc). + * + * Uses values found in vcpu->arch.(guest_table and guest_table_user), and + * for HVM guests, arch.monitor_table and hvm's guest CR3. + * + * Update ref counts to shadow tables appropriately. + */ +void update_cr3(struct vcpu *v) +{ + unsigned long cr3_mfn=0; + + if ( shadow_mode_enabled(v->domain) ) + { + shadow_update_cr3(v); + return; + } + +#if CONFIG_PAGING_LEVELS == 4 + if ( !(v->arch.flags & TF_kernel_mode) ) + cr3_mfn = pagetable_get_pfn(v->arch.guest_table_user); + else +#endif + cr3_mfn = pagetable_get_pfn(v->arch.guest_table); + + make_cr3(v, cr3_mfn); +} + + void invalidate_shadow_ldt(struct vcpu *v) { int i; @@ -1160,53 +1192,57 @@ static void free_l4_table(struct page_info *page) #endif -static inline int update_l1e(l1_pgentry_t *pl1e, - l1_pgentry_t ol1e, - l1_pgentry_t nl1e, - unsigned long gl1mfn, - struct vcpu *v) + +/* How to write an entry to the guest pagetables. + * Returns 0 for failure (pointer not valid), 1 for success. */ +static inline int update_intpte(intpte_t *p, + intpte_t old, + intpte_t new, + unsigned long mfn, + struct vcpu *v) { int rv = 1; - if ( unlikely(shadow_mode_enabled(v->domain)) ) - shadow_lock(v->domain); #ifndef PTE_UPDATE_WITH_CMPXCHG - rv = (!__copy_to_user(pl1e, &nl1e, sizeof(nl1e))); + if ( unlikely(shadow_mode_enabled(v->domain)) ) + rv = shadow_write_guest_entry(v, p, new, _mfn(mfn)); + else + rv = (!__copy_to_user(p, &new, sizeof(new))); #else { - intpte_t o = l1e_get_intpte(ol1e); - intpte_t n = l1e_get_intpte(nl1e); - + intpte_t t = old; for ( ; ; ) { - if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) ) + if ( unlikely(shadow_mode_enabled(v->domain)) ) + rv = shadow_cmpxchg_guest_entry(v, p, &t, new, _mfn(mfn)); + else + rv = (!cmpxchg_user(p, t, new)); + + if ( unlikely(rv == 0) ) { MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte - ": saw %" PRIpte, - l1e_get_intpte(ol1e), - l1e_get_intpte(nl1e), - o); - rv = 0; + ": saw %" PRIpte, old, new, t); break; } - if ( o == l1e_get_intpte(ol1e) ) + if ( t == old ) break; /* Allowed to change in Accessed/Dirty flags only. */ - BUG_ON((o ^ l1e_get_intpte(ol1e)) & - ~(int)(_PAGE_ACCESSED|_PAGE_DIRTY)); - ol1e = l1e_from_intpte(o); + BUG_ON((t ^ old) & ~(intpte_t)(_PAGE_ACCESSED|_PAGE_DIRTY)); + + old = t; } } #endif - if ( unlikely(shadow_mode_enabled(v->domain)) && rv ) - { - shadow_validate_guest_entry(v, _mfn(gl1mfn), pl1e); - shadow_unlock(v->domain); - } return rv; } +/* Macro that wraps the appropriate type-changes around update_intpte(). + * Arguments are: type, ptr, old, new, mfn, vcpu */ +#define UPDATE_ENTRY(_t,_p,_o,_n,_m,_v) \ + update_intpte((intpte_t *)(_p), \ + _t ## e_get_intpte(_o), _t ## e_get_intpte(_n), \ + (_m), (_v)) /* Update the L1 entry at pl1e to new value nl1e. */ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e, @@ -1219,7 +1255,7 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e, return 0; if ( unlikely(shadow_mode_refcounts(d)) ) - return update_l1e(pl1e, ol1e, nl1e, gl1mfn, current); + return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current); if ( l1e_get_flags(nl1e) & _PAGE_PRESENT ) { @@ -1238,12 +1274,12 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e, /* Fast path for identical mapping, r/w and presence. */ if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) ) - return update_l1e(pl1e, ol1e, nl1e, gl1mfn, current); + return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current); if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) ) return 0; - if ( unlikely(!update_l1e(pl1e, ol1e, nl1e, gl1mfn, current)) ) + if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current)) ) { put_page_from_l1e(nl1e, d); return 0; @@ -1251,7 +1287,7 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e, } else { - if ( unlikely(!update_l1e(pl1e, ol1e, nl1e, gl1mfn, current)) ) + if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current)) ) return 0; } @@ -1259,36 +1295,6 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e, return 1; } -#ifndef PTE_UPDATE_WITH_CMPXCHG -#define _UPDATE_ENTRY(_t,_p,_o,_n) ({ (*(_p) = (_n)); 1; }) -#else -#define _UPDATE_ENTRY(_t,_p,_o,_n) ({ \ - for ( ; ; ) \ - { \ - intpte_t __o = cmpxchg((intpte_t *)(_p), \ - _t ## e_get_intpte(_o), \ - _t ## e_get_intpte(_n)); \ - if ( __o == _t ## e_get_intpte(_o) ) \ - break; \ - /* Allowed to change in Accessed/Dirty flags only. */ \ - BUG_ON((__o ^ _t ## e_get_intpte(_o)) & \ - ~(int)(_PAGE_ACCESSED|_PAGE_DIRTY)); \ - _o = _t ## e_from_intpte(__o); \ - } \ - 1; }) -#endif -#define UPDATE_ENTRY(_t,_p,_o,_n,_m) ({ \ - int rv; \ - if ( unlikely(shadow_mode_enabled(current->domain)) ) \ - shadow_lock(current->domain); \ - rv = _UPDATE_ENTRY(_t, _p, _o, _n); \ - if ( unlikely(shadow_mode_enabled(current->domain)) ) \ - { \ - shadow_validate_guest_entry(current, _mfn(_m), (_p)); \ - shadow_unlock(current->domain); \ - } \ - rv; \ -}) /* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */ static int mod_l2_entry(l2_pgentry_t *pl2e, @@ -1320,18 +1326,18 @@ static int mod_l2_entry(l2_pgentry_t *pl2e, /* Fast path for identical mapping and presence. */ if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT)) - return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn); + return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current); if ( unlikely(!get_page_from_l2e(nl2e, pfn, current->domain)) ) return 0; - if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn)) ) + if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current)) ) { put_page_from_l2e(nl2e, pfn); return 0; } } - else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn)) ) + else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current)) ) { return 0; } @@ -1381,18 +1387,18 @@ static int mod_l3_entry(l3_pgentry_t *pl3e, /* Fast path for identical mapping and presence. */ if (!l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT)) - return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn); + return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current); if ( unlikely(!get_page_from_l3e(nl3e, pfn, current->domain)) ) return 0; - if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn)) ) + if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current)) ) { put_page_from_l3e(nl3e, pfn); return 0; } } - else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn)) ) + else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current)) ) { return 0; } @@ -1439,18 +1445,18 @@ static int mod_l4_entry(l4_pgentry_t *pl4e, /* Fast path for identical mapping and presence. */ if (!l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT)) - return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn); + return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current); if ( unlikely(!get_page_from_l4e(nl4e, pfn, current->domain)) ) return 0; - if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn)) ) + if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current)) ) { put_page_from_l4e(nl4e, pfn); return 0; } } - else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn)) ) + else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current)) ) { return 0; } @@ -2292,15 +2298,11 @@ int do_mmu_update( break; if ( unlikely(shadow_mode_enabled(d)) ) - shadow_lock(d); - - *(intpte_t *)va = req.val; - okay = 1; - - if ( unlikely(shadow_mode_enabled(d)) ) + okay = shadow_write_guest_entry(v, va, req.val, _mfn(mfn)); + else { - shadow_validate_guest_entry(v, _mfn(mfn), va); - shadow_unlock(d); + *(intpte_t *)va = req.val; + okay = 1; } put_page_type(page); @@ -2409,7 +2411,7 @@ static int create_grant_pte_mapping( } ol1e = *(l1_pgentry_t *)va; - if ( !update_l1e(va, ol1e, nl1e, mfn, v) ) + if ( !UPDATE_ENTRY(l1, va, ol1e, nl1e, mfn, v) ) { put_page_type(page); rc = GNTST_general_error; @@ -2477,7 +2479,7 @@ static int destroy_grant_pte_mapping( } /* Delete pagetable entry. */ - if ( unlikely(!update_l1e( + if ( unlikely(!UPDATE_ENTRY(l1, (l1_pgentry_t *)va, ol1e, l1e_empty(), mfn, d->vcpu[0] /* Change if we go to per-vcpu shadows. */)) ) { @@ -2515,7 +2517,7 @@ static int create_grant_va_mapping( return GNTST_general_error; } ol1e = *pl1e; - okay = update_l1e(pl1e, ol1e, nl1e, gl1mfn, v); + okay = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v); guest_unmap_l1e(v, pl1e); pl1e = NULL; @@ -2553,7 +2555,7 @@ static int destroy_grant_va_mapping( } /* Delete pagetable entry. */ - if ( unlikely(!update_l1e(pl1e, ol1e, l1e_empty(), gl1mfn, v)) ) + if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, l1e_empty(), gl1mfn, v)) ) { MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e); rc = GNTST_general_error; @@ -2952,16 +2954,6 @@ long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg) UNLOCK_BIGLOCK(d); - /* If we're doing FAST_FAULT_PATH, then shadow mode may have - cached the fact that this is an mmio region in the shadow - page tables. Blow the tables away to remove the cache. - This is pretty heavy handed, but this is a rare operation - (it might happen a dozen times during boot and then never - again), so it doesn't matter too much. */ - shadow_lock(d); - shadow_blow_tables(d); - shadow_unlock(d); - put_domain(d); break; @@ -3188,27 +3180,30 @@ static int ptwr_emulated_update( pl1e = (l1_pgentry_t *)((unsigned long)pl1e + (addr & ~PAGE_MASK)); if ( do_cmpxchg ) { - if ( shadow_mode_enabled(d) ) - shadow_lock(d); + int okay; ol1e = l1e_from_intpte(old); - if ( cmpxchg((intpte_t *)pl1e, old, val) != old ) + + if ( shadow_mode_enabled(d) ) + { + intpte_t t = old; + okay = shadow_cmpxchg_guest_entry(v, (intpte_t *) pl1e, + &t, val, _mfn(mfn)); + okay = (okay && t == old); + } + else + okay = (cmpxchg((intpte_t *)pl1e, old, val) == old); + + if ( !okay ) { - if ( shadow_mode_enabled(d) ) - shadow_unlock(d); unmap_domain_page(pl1e); put_page_from_l1e(gl1e_to_ml1e(d, nl1e), d); return X86EMUL_CMPXCHG_FAILED; } - if ( unlikely(shadow_mode_enabled(d)) ) - { - shadow_validate_guest_entry(v, _mfn(page_to_mfn(page)), pl1e); - shadow_unlock(d); - } } else { ol1e = *pl1e; - if ( !update_l1e(pl1e, ol1e, nl1e, page_to_mfn(page), v) ) + if ( !UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, page_to_mfn(page), v) ) BUG(); } diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c index 4bb8e3b230..b4275bf8e0 100644 --- a/xen/arch/x86/mm/shadow/common.c +++ b/xen/arch/x86/mm/shadow/common.c @@ -38,6 +38,21 @@ #include <asm/shadow.h> #include "private.h" + +/* Set up the shadow-specific parts of a domain struct at start of day. + * Called for every domain from arch_domain_create() */ +void shadow_domain_init(struct domain *d) +{ + int i; + shadow_lock_init(d); + for ( i = 0; i <= SHADOW_MAX_ORDER; i++ ) + INIT_LIST_HEAD(&d->arch.shadow.freelists[i]); + INIT_LIST_HEAD(&d->arch.shadow.p2m_freelist); + INIT_LIST_HEAD(&d->arch.shadow.p2m_inuse); + INIT_LIST_HEAD(&d->arch.shadow.pinned_shadows); +} + + #if SHADOW_AUDIT int shadow_audit_enable = 0; @@ -434,7 +449,7 @@ void shadow_promote(struct vcpu *v, mfn_t gmfn, unsigned int type) ASSERT(mfn_valid(gmfn)); /* We should never try to promote a gmfn that has writeable mappings */ - ASSERT(shadow_remove_write_access(v, gmfn, 0, 0) == 0); + ASSERT(sh_remove_write_access(v, gmfn, 0, 0) == 0); /* Is the page already shadowed? */ if ( !test_and_set_bit(_PGC_page_table, &page->count_info) ) @@ -466,8 +481,7 @@ void shadow_demote(struct vcpu *v, mfn_t gmfn, u32 type) * Returns a bitmask of SHADOW_SET_* flags. */ int -__shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, - void *entry, u32 size) +sh_validate_guest_entry(struct vcpu *v, mfn_t gmfn, void *entry, u32 size) { int result = 0; struct page_info *page = mfn_to_page(gmfn); @@ -546,22 +560,9 @@ __shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, } -int -shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, void *entry) -/* This is the entry point from hypercalls. It returns a bitmask of all the - * results of shadow_set_l*e() calls, so the caller knows to do TLB flushes. */ -{ - int rc; - - ASSERT(shadow_locked_by_me(v->domain)); - rc = __shadow_validate_guest_entry(v, gmfn, entry, sizeof(l1_pgentry_t)); - shadow_audit_tables(v); - return rc; -} - void -shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, - void *entry, u32 size) +sh_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, + void *entry, u32 size) /* This is the entry point for emulated writes to pagetables in HVM guests and * PV translated guests. */ @@ -570,7 +571,7 @@ shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, int rc; ASSERT(shadow_locked_by_me(v->domain)); - rc = __shadow_validate_guest_entry(v, gmfn, entry, size); + rc = sh_validate_guest_entry(v, gmfn, entry, size); if ( rc & SHADOW_SET_FLUSH ) /* Need to flush TLBs to pick up shadow PT changes */ flush_tlb_mask(d->domain_dirty_cpumask); @@ -585,6 +586,38 @@ shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, } } +int shadow_write_guest_entry(struct vcpu *v, intpte_t *p, + intpte_t new, mfn_t gmfn) +/* Write a new value into the guest pagetable, and update the shadows + * appropriately. Returns 0 if we page-faulted, 1 for success. */ +{ + int failed; + shadow_lock(v->domain); + failed = __copy_to_user(p, &new, sizeof(new)); + if ( failed != sizeof(new) ) + sh_validate_guest_entry(v, gmfn, p, sizeof(new)); + shadow_unlock(v->domain); + return (failed == 0); +} + +int shadow_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p, + intpte_t *old, intpte_t new, mfn_t gmfn) +/* Cmpxchg a new value into the guest pagetable, and update the shadows + * appropriately. Returns 0 if we page-faulted, 1 if not. + * N.B. caller should check the value of "old" to see if the + * cmpxchg itself was successful. */ +{ + int failed; + intpte_t t = *old; + shadow_lock(v->domain); + failed = cmpxchg_user(p, t, new); + if ( t == *old ) + sh_validate_guest_entry(v, gmfn, p, sizeof(new)); + *old = t; + shadow_unlock(v->domain); + return (failed == 0); +} + /**************************************************************************/ /* Memory management for shadow pages. */ @@ -791,7 +824,7 @@ void shadow_prealloc(struct domain *d, unsigned int order) /* Deliberately free all the memory we can: this will tear down all of * this domain's shadows */ -void shadow_blow_tables(struct domain *d) +static void shadow_blow_tables(struct domain *d) { struct list_head *l, *t; struct shadow_page_info *sp; @@ -989,7 +1022,7 @@ void shadow_free(struct domain *d, mfn_t smfn) * Also, we only ever allocate a max-order chunk, so as to preserve * the invariant that shadow_prealloc() always works. * Returns 0 iff it can't get a chunk (the caller should then - * free up some pages in domheap and call set_sh_allocation); + * free up some pages in domheap and call sh_set_allocation); * returns non-zero on success. */ static int @@ -1149,14 +1182,14 @@ p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table, if ( pagetable_get_pfn(v->arch.guest_table) == pagetable_get_pfn(d->arch.phys_table) && v->arch.shadow.mode != NULL ) - v->arch.shadow.mode->update_cr3(v); + v->arch.shadow.mode->update_cr3(v, 0); } } #endif /* The P2M can be shadowed: keep the shadows synced */ if ( d->vcpu[0] != NULL ) - (void)__shadow_validate_guest_entry(d->vcpu[0], *table_mfn, - p2m_entry, sizeof *p2m_entry); + (void)sh_validate_guest_entry(d->vcpu[0], *table_mfn, + p2m_entry, sizeof *p2m_entry); } *table_mfn = _mfn(l1e_get_pfn(*p2m_entry)); next = sh_map_domain_page(*table_mfn); @@ -1216,8 +1249,8 @@ shadow_set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn) /* The P2M can be shadowed: keep the shadows synced */ if ( d->vcpu[0] != NULL ) - (void)__shadow_validate_guest_entry( - d->vcpu[0], table_mfn, p2m_entry, sizeof(*p2m_entry)); + (void)sh_validate_guest_entry(d->vcpu[0], table_mfn, + p2m_entry, sizeof(*p2m_entry)); /* Success */ rv = 1; @@ -1427,9 +1460,9 @@ static void shadow_p2m_teardown(struct domain *d) * Input will be rounded up to at least shadow_min_acceptable_pages(), * plus space for the p2m table. * Returns 0 for success, non-zero for failure. */ -static unsigned int set_sh_allocation(struct domain *d, - unsigned int pages, - int *preempted) +static unsigned int sh_set_allocation(struct domain *d, + unsigned int pages, + int *preempted) { struct shadow_page_info *sp; unsigned int lower_bound; @@ -1499,20 +1532,12 @@ static unsigned int set_sh_allocation(struct domain *d, return 0; } -unsigned int shadow_set_allocation(struct domain *d, - unsigned int megabytes, - int *preempted) -/* Hypercall interface to set the shadow memory allocation */ +/* Return the size of the shadow pool, rounded up to the nearest MB */ +static unsigned int shadow_get_allocation(struct domain *d) { - unsigned int rv; - shadow_lock(d); - rv = set_sh_allocation(d, megabytes << (20 - PAGE_SHIFT), preempted); - SHADOW_PRINTK("dom %u allocation now %u pages (%u MB)\n", - d->domain_id, - d->arch.shadow.total_pages, - shadow_get_allocation(d)); - shadow_unlock(d); - return rv; + unsigned int pg = d->arch.shadow.total_pages; + return ((pg >> (20 - PAGE_SHIFT)) + + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0)); } /**************************************************************************/ @@ -1889,24 +1914,24 @@ void sh_destroy_shadow(struct vcpu *v, mfn_t smfn) * level and fault_addr desribe how we found this to be a pagetable; * level==0 means we have some other reason for revoking write access.*/ -int shadow_remove_write_access(struct vcpu *v, mfn_t gmfn, - unsigned int level, - unsigned long fault_addr) +int sh_remove_write_access(struct vcpu *v, mfn_t gmfn, + unsigned int level, + unsigned long fault_addr) { /* Dispatch table for getting per-type functions */ static hash_callback_t callbacks[16] = { NULL, /* none */ #if CONFIG_PAGING_LEVELS == 2 - SHADOW_INTERNAL_NAME(sh_remove_write_access,2,2), /* l1_32 */ - SHADOW_INTERNAL_NAME(sh_remove_write_access,2,2), /* fl1_32 */ + SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,2,2), /* l1_32 */ + SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,2,2), /* fl1_32 */ #else - SHADOW_INTERNAL_NAME(sh_remove_write_access,3,2), /* l1_32 */ - SHADOW_INTERNAL_NAME(sh_remove_write_access,3,2), /* fl1_32 */ + SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,2), /* l1_32 */ + SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,2), /* fl1_32 */ #endif NULL, /* l2_32 */ #if CONFIG_PAGING_LEVELS >= 3 - SHADOW_INTERNAL_NAME(sh_remove_write_access,3,3), /* l1_pae */ - SHADOW_INTERNAL_NAME(sh_remove_write_access,3,3), /* fl1_pae */ + SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,3), /* l1_pae */ + SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,3), /* fl1_pae */ #else NULL, /* l1_pae */ NULL, /* fl1_pae */ @@ -1914,8 +1939,8 @@ int shadow_remove_write_access(struct vcpu *v, mfn_t gmfn, NULL, /* l2_pae */ NULL, /* l2h_pae */ #if CONFIG_PAGING_LEVELS >= 4 - SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* l1_64 */ - SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* fl1_64 */ + SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,4,4), /* l1_64 */ + SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,4,4), /* fl1_64 */ #else NULL, /* l1_64 */ NULL, /* fl1_64 */ @@ -2077,25 +2102,25 @@ int shadow_remove_write_access(struct vcpu *v, mfn_t gmfn, /* Remove all mappings of a guest frame from the shadow tables. * Returns non-zero if we need to flush TLBs. */ -int shadow_remove_all_mappings(struct vcpu *v, mfn_t gmfn) +int sh_remove_all_mappings(struct vcpu *v, mfn_t gmfn) { struct page_info *page = mfn_to_page(gmfn); - int expected_count; + int expected_count, do_locking; /* Dispatch table for getting per-type functions */ static hash_callback_t callbacks[16] = { NULL, /* none */ #if CONFIG_PAGING_LEVELS == 2 - SHADOW_INTERNAL_NAME(sh_remove_all_mappings,2,2), /* l1_32 */ - SHADOW_INTERNAL_NAME(sh_remove_all_mappings,2,2), /* fl1_32 */ + SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,2,2), /* l1_32 */ + SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,2,2), /* fl1_32 */ #else - SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,2), /* l1_32 */ - SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,2), /* fl1_32 */ + SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,2), /* l1_32 */ + SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,2), /* fl1_32 */ #endif NULL, /* l2_32 */ #if CONFIG_PAGING_LEVELS >= 3 - SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,3), /* l1_pae */ - SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,3), /* fl1_pae */ + SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,3), /* l1_pae */ + SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,3), /* fl1_pae */ #else NULL, /* l1_pae */ NULL, /* fl1_pae */ @@ -2103,8 +2128,8 @@ int shadow_remove_all_mappings(struct vcpu *v, mfn_t gmfn) NULL, /* l2_pae */ NULL, /* l2h_pae */ #if CONFIG_PAGING_LEVELS >= 4 - SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* l1_64 */ - SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* fl1_64 */ + SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,4,4), /* l1_64 */ + SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,4,4), /* fl1_64 */ #else NULL, /* l1_64 */ NULL, /* fl1_64 */ @@ -2129,7 +2154,12 @@ int shadow_remove_all_mappings(struct vcpu *v, mfn_t gmfn) if ( (page->count_info & PGC_count_mask) == 0 ) return 0; - ASSERT(shadow_locked_by_me(v->domain)); + /* Although this is an externally visible function, we do not know + * whether the shadow lock will be held when it is called (since it + * can be called via put_page_type when we clear a shadow l1e). + * If the lock isn't held, take it for the duration of the call. */ + do_locking = !shadow_locked_by_me(v->domain); + if ( do_locking ) shadow_lock(v->domain); /* XXX TODO: * Heuristics for finding the (probably) single mapping of this gmfn */ @@ -2154,6 +2184,8 @@ int shadow_remove_all_mappings(struct vcpu *v, mfn_t gmfn) } } + if ( do_locking ) shadow_unlock(v->domain); + /* We killed at least one mapping, so must flush TLBs. */ return 1; } @@ -2236,9 +2268,10 @@ void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all) * (all != 0 implies fast == 0) */ { - struct page_info *pg; + struct page_info *pg = mfn_to_page(gmfn); mfn_t smfn; u32 sh_flags; + int do_locking; unsigned char t; /* Dispatch table for getting per-type functions: each level must @@ -2296,15 +2329,19 @@ void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all) 0 /* unused */ }; - ASSERT(shadow_locked_by_me(v->domain)); ASSERT(!(all && fast)); - pg = mfn_to_page(gmfn); - /* Bail out now if the page is not shadowed */ if ( (pg->count_info & PGC_page_table) == 0 ) return; + /* Although this is an externally visible function, we do not know + * whether the shadow lock will be held when it is called (since it + * can be called via put_page_type when we clear a shadow l1e). + * If the lock isn't held, take it for the duration of the call. */ + do_locking = !shadow_locked_by_me(v->domain); + if ( do_locking ) shadow_lock(v->domain); + SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx\n", v->domain->domain_id, v->vcpu_id, mfn_x(gmfn)); @@ -2356,14 +2393,16 @@ void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all) /* Need to flush TLBs now, so that linear maps are safe next time we * take a fault. */ flush_tlb_mask(v->domain->domain_dirty_cpumask); + + if ( do_locking ) shadow_unlock(v->domain); } -void -shadow_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn) +static void +sh_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn) /* Even harsher: this is a HVM page that we thing is no longer a pagetable. * Unshadow it, and recursively unshadow pages that reference it. */ { - shadow_remove_all_shadows(v, gmfn); + sh_remove_shadows(v, gmfn, 0, 1); /* XXX TODO: * Rework this hashtable walker to return a linked-list of all * the shadows it modified, then do breadth-first recursion @@ -2376,7 +2415,7 @@ shadow_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn) /**************************************************************************/ -void sh_update_paging_modes(struct vcpu *v) +static void sh_update_paging_modes(struct vcpu *v) { struct domain *d = v->domain; struct shadow_paging_mode *old_mode = v->arch.shadow.mode; @@ -2394,7 +2433,8 @@ void sh_update_paging_modes(struct vcpu *v) // First, tear down any old shadow tables held by this vcpu. // - shadow_detach_old_tables(v); + if ( v->arch.shadow.mode ) + v->arch.shadow.mode->detach_old_tables(v); if ( !is_hvm_domain(d) ) { @@ -2402,10 +2442,9 @@ void sh_update_paging_modes(struct vcpu *v) /// PV guest /// #if CONFIG_PAGING_LEVELS == 4 - if ( pv_32bit_guest(v) ) - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,3); - else - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,4); + /* When 32-on-64 PV guests are supported, they must choose + * a different mode here */ + v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,4); #elif CONFIG_PAGING_LEVELS == 3 v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); #elif CONFIG_PAGING_LEVELS == 2 @@ -2493,7 +2532,7 @@ void sh_update_paging_modes(struct vcpu *v) if ( pagetable_is_null(v->arch.monitor_table) ) { - mfn_t mmfn = shadow_make_monitor_table(v); + mfn_t mmfn = v->arch.shadow.mode->make_monitor_table(v); v->arch.monitor_table = pagetable_from_mfn(mmfn); make_cr3(v, mfn_x(mmfn)); hvm_update_host_cr3(v); @@ -2528,7 +2567,7 @@ void sh_update_paging_modes(struct vcpu *v) old_mfn = pagetable_get_mfn(v->arch.monitor_table); v->arch.monitor_table = pagetable_null(); - new_mfn = v->arch.shadow.mode->make_monitor_table(v); + new_mfn = v->arch.shadow.mode->make_monitor_table(v); v->arch.monitor_table = pagetable_from_mfn(new_mfn); SHADOW_PRINTK("new monitor table %"SH_PRI_mfn "\n", mfn_x(new_mfn)); @@ -2549,7 +2588,14 @@ void sh_update_paging_modes(struct vcpu *v) // This *does* happen, at least for CR4.PGE... } - v->arch.shadow.mode->update_cr3(v); + v->arch.shadow.mode->update_cr3(v, 0); +} + +void shadow_update_paging_modes(struct vcpu *v) +{ + shadow_lock(v->domain); + sh_update_paging_modes(v); + shadow_unlock(v->domain); } /**************************************************************************/ @@ -2610,9 +2656,9 @@ int shadow_enable(struct domain *d, u32 mode) /* Init the shadow memory allocation if the user hasn't done so */ old_pages = d->arch.shadow.total_pages; if ( old_pages == 0 ) - if ( set_sh_allocation(d, 256, NULL) != 0 ) /* Use at least 1MB */ + if ( sh_set_allocation(d, 256, NULL) != 0 ) /* Use at least 1MB */ { - set_sh_allocation(d, 0, NULL); + sh_set_allocation(d, 0, NULL); rv = -ENOMEM; goto out; } @@ -2620,7 +2666,7 @@ int shadow_enable(struct domain *d, u32 mode) /* Init the hash table */ if ( shadow_hash_alloc(d) != 0 ) { - set_sh_allocation(d, old_pages, NULL); + sh_set_allocation(d, old_pages, NULL); rv = -ENOMEM; goto out; } @@ -2630,7 +2676,7 @@ int shadow_enable(struct domain *d, u32 mode) if ( !shadow_alloc_p2m_table(d) ) { shadow_hash_teardown(d); - set_sh_allocation(d, old_pages, NULL); + sh_set_allocation(d, old_pages, NULL); shadow_p2m_teardown(d); rv = -ENOMEM; goto out; @@ -2669,13 +2715,16 @@ void shadow_teardown(struct domain *d) /* Release the shadow and monitor tables held by each vcpu */ for_each_vcpu(d, v) { - shadow_detach_old_tables(v); - if ( shadow_mode_external(d) ) + if ( v->arch.shadow.mode ) { - mfn = pagetable_get_mfn(v->arch.monitor_table); - if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) ) - shadow_destroy_monitor_table(v, mfn); - v->arch.monitor_table = pagetable_null(); + v->arch.shadow.mode->detach_old_tables(v); + if ( shadow_mode_external(d) ) + { + mfn = pagetable_get_mfn(v->arch.monitor_table); + if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) ) + v->arch.shadow.mode->destroy_monitor_table(v, mfn); + v->arch.monitor_table = pagetable_null(); + } } } } @@ -2689,7 +2738,7 @@ void shadow_teardown(struct domain *d) d->arch.shadow.free_pages, d->arch.shadow.p2m_pages); /* Destroy all the shadows and release memory to domheap */ - set_sh_allocation(d, 0, NULL); + sh_set_allocation(d, 0, NULL); /* Release the hash table back to xenheap */ if (d->arch.shadow.hash_table) shadow_hash_teardown(d); @@ -2755,10 +2804,10 @@ static int shadow_one_bit_enable(struct domain *d, u32 mode) if ( d->arch.shadow.mode == 0 ) { /* Init the shadow memory allocation and the hash table */ - if ( set_sh_allocation(d, 1, NULL) != 0 + if ( sh_set_allocation(d, 1, NULL) != 0 || shadow_hash_alloc(d) != 0 ) { - set_sh_allocation(d, 0, NULL); + sh_set_allocation(d, 0, NULL); return -ENOMEM; } } @@ -2794,7 +2843,8 @@ static int shadow_one_bit_disable(struct domain *d, u32 mode) d->arch.shadow.p2m_pages); for_each_vcpu(d, v) { - shadow_detach_old_tables(v); + if ( v->arch.shadow.mode ) + v->arch.shadow.mode->detach_old_tables(v); #if CONFIG_PAGING_LEVELS == 4 if ( !(v->arch.flags & TF_kernel_mode) ) make_cr3(v, pagetable_get_pfn(v->arch.guest_table_user)); @@ -2805,7 +2855,7 @@ static int shadow_one_bit_disable(struct domain *d, u32 mode) } /* Pull down the memory allocation */ - if ( set_sh_allocation(d, 0, NULL) != 0 ) + if ( sh_set_allocation(d, 0, NULL) != 0 ) { // XXX - How can this occur? // Seems like a bug to return an error now that we've @@ -2826,7 +2876,7 @@ static int shadow_one_bit_disable(struct domain *d, u32 mode) } /* Enable/disable ops for the "test" and "log-dirty" modes */ -int shadow_test_enable(struct domain *d) +static int shadow_test_enable(struct domain *d) { int ret; @@ -2849,7 +2899,7 @@ int shadow_test_enable(struct domain *d) return ret; } -int shadow_test_disable(struct domain *d) +static int shadow_test_disable(struct domain *d) { int ret; @@ -2968,8 +3018,8 @@ sh_p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn) if ( v != NULL ) { - shadow_remove_all_shadows_and_parents(v, _mfn(mfn)); - if ( shadow_remove_all_mappings(v, _mfn(mfn)) ) + sh_remove_all_shadows_and_parents(v, _mfn(mfn)); + if ( sh_remove_all_mappings(v, _mfn(mfn)) ) flush_tlb_mask(d->domain_dirty_cpumask); } @@ -3012,8 +3062,8 @@ shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn, v = d->vcpu[0]; if ( v != NULL ) { - shadow_remove_all_shadows_and_parents(v, omfn); - if ( shadow_remove_all_mappings(v, omfn) ) + sh_remove_all_shadows_and_parents(v, omfn); + if ( sh_remove_all_mappings(v, omfn) ) flush_tlb_mask(d->domain_dirty_cpumask); } set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); @@ -3043,6 +3093,17 @@ shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn, shadow_set_p2m_entry(d, gfn, _mfn(mfn)); set_gpfn_from_mfn(mfn, gfn); + +#if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH) + /* If we're doing FAST_FAULT_PATH, then shadow mode may have + cached the fact that this is an mmio region in the shadow + page tables. Blow the tables away to remove the cache. + This is pretty heavy handed, but this is a rare operation + (it might happen a dozen times during boot and then never + again), so it doesn't matter too much. */ + shadow_blow_tables(d); +#endif + shadow_audit_p2m(d); shadow_unlock(d); } @@ -3130,14 +3191,13 @@ static int shadow_log_dirty_op( /* Mark a page as dirty */ -void sh_do_mark_dirty(struct domain *d, mfn_t gmfn) +void sh_mark_dirty(struct domain *d, mfn_t gmfn) { unsigned long pfn; ASSERT(shadow_locked_by_me(d)); - ASSERT(shadow_mode_log_dirty(d)); - if ( !mfn_valid(gmfn) ) + if ( !shadow_mode_log_dirty(d) || !mfn_valid(gmfn) ) return; ASSERT(d->arch.shadow.dirty_bitmap != NULL); @@ -3181,13 +3241,19 @@ void sh_do_mark_dirty(struct domain *d, mfn_t gmfn) } } +void shadow_mark_dirty(struct domain *d, mfn_t gmfn) +{ + shadow_lock(d); + sh_mark_dirty(d, gmfn); + shadow_unlock(d); +} /**************************************************************************/ /* Shadow-control XEN_DOMCTL dispatcher */ int shadow_domctl(struct domain *d, - xen_domctl_shadow_op_t *sc, - XEN_GUEST_HANDLE(xen_domctl_t) u_domctl) + xen_domctl_shadow_op_t *sc, + XEN_GUEST_HANDLE(xen_domctl_t) u_domctl) { int rc, preempted = 0; @@ -3233,7 +3299,9 @@ int shadow_domctl(struct domain *d, return 0; case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION: - rc = shadow_set_allocation(d, sc->mb, &preempted); + shadow_lock(d); + rc = sh_set_allocation(d, sc->mb << (20 - PAGE_SHIFT), &preempted); + shadow_unlock(d); if ( preempted ) /* Not finished. Set up to re-run the call. */ rc = hypercall_create_continuation( diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c index 32af022668..5aabce5469 100644 --- a/xen/arch/x86/mm/shadow/multi.c +++ b/xen/arch/x86/mm/shadow/multi.c @@ -243,7 +243,7 @@ guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, int guest_op) gw->l3mfn = vcpu_gfn_to_mfn(v, guest_l4e_get_gfn(*gw->l4e)); if ( !mfn_valid(gw->l3mfn) ) return 1; /* This mfn is a pagetable: make sure the guest can't write to it. */ - if ( guest_op && shadow_remove_write_access(v, gw->l3mfn, 3, va) != 0 ) + if ( guest_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 ) flush_tlb_mask(v->domain->domain_dirty_cpumask); gw->l3e = ((guest_l3e_t *)sh_map_domain_page(gw->l3mfn)) + guest_l3_table_offset(va); @@ -257,7 +257,7 @@ guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, int guest_op) gw->l2mfn = vcpu_gfn_to_mfn(v, guest_l3e_get_gfn(*gw->l3e)); if ( !mfn_valid(gw->l2mfn) ) return 1; /* This mfn is a pagetable: make sure the guest can't write to it. */ - if ( guest_op && shadow_remove_write_access(v, gw->l2mfn, 2, va) != 0 ) + if ( guest_op && sh_remove_write_access(v, gw->l2mfn, 2, va) != 0 ) flush_tlb_mask(v->domain->domain_dirty_cpumask); gw->l2e = ((guest_l2e_t *)sh_map_domain_page(gw->l2mfn)) + guest_l2_table_offset(va); @@ -299,7 +299,7 @@ guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, int guest_op) if ( !mfn_valid(gw->l1mfn) ) return 1; /* This mfn is a pagetable: make sure the guest can't write to it. */ if ( guest_op - && shadow_remove_write_access(v, gw->l1mfn, 1, va) != 0 ) + && sh_remove_write_access(v, gw->l1mfn, 1, va) != 0 ) flush_tlb_mask(v->domain->domain_dirty_cpumask); gw->l1e = ((guest_l1e_t *)sh_map_domain_page(gw->l1mfn)) + guest_l1_table_offset(va); @@ -492,7 +492,7 @@ static u32 guest_set_ad_bits(struct vcpu *v, u32 shflags = mfn_to_page(gmfn)->shadow_flags & SHF_page_type_mask; /* More than one type bit set in shadow-flags? */ if ( shflags & ~(1UL << find_first_set_bit(shflags)) ) - res = __shadow_validate_guest_entry(v, gmfn, ep, sizeof(*ep)); + res = sh_validate_guest_entry(v, gmfn, ep, sizeof (*ep)); } /* We should never need to flush the TLB or recopy PAE entries */ @@ -2847,7 +2847,7 @@ static int sh_page_fault(struct vcpu *v, /* If this is actually a page table, then we have a bug, and need * to support more operations in the emulator. More likely, * though, this is a hint that this page should not be shadowed. */ - shadow_remove_all_shadows(v, gmfn); + sh_remove_shadows(v, gmfn, 0 /* thorough */, 1 /* must succeed */); } /* Emulator has changed the user registers: write back */ @@ -3080,7 +3080,7 @@ sh_update_linear_entries(struct vcpu *v) sh_unmap_domain_page(ml4e); } - /* Shadow l3 tables are made up by update_cr3 */ + /* Shadow l3 tables are made up by sh_update_cr3 */ sl3e = v->arch.shadow.l3table; for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ ) @@ -3118,7 +3118,7 @@ sh_update_linear_entries(struct vcpu *v) int unmap_l2e = 0; #if GUEST_PAGING_LEVELS == 2 - /* Shadow l3 tables were built by update_cr3 */ + /* Shadow l3 tables were built by sh_update_cr3 */ if ( shadow_mode_external(d) ) shadow_l3e = (shadow_l3e_t *)&v->arch.shadow.l3table; else @@ -3341,12 +3341,15 @@ sh_set_toplevel_shadow(struct vcpu *v, static void -sh_update_cr3(struct vcpu *v) +sh_update_cr3(struct vcpu *v, int do_locking) /* Updates vcpu->arch.cr3 after the guest has changed CR3. * Paravirtual guests should set v->arch.guest_table (and guest_table_user, * if appropriate). * HVM guests should also make sure hvm_get_guest_cntl_reg(v, 3) works, * and read vcpu->arch.hvm_vcpu.hw_cr3 afterwards. + * If do_locking != 0, assume we are being called from outside the + * shadow code, and must take and release the shadow lock; otherwise + * that is the caller's respnsibility. */ { struct domain *d = v->domain; @@ -3355,6 +3358,15 @@ sh_update_cr3(struct vcpu *v) u32 guest_idx=0; #endif + /* Don't do anything on an uninitialised vcpu */ + if ( !is_hvm_domain(d) && !test_bit(_VCPUF_initialised, &v->vcpu_flags) ) + { + ASSERT(v->arch.cr3 == 0); + return; + } + + if ( do_locking ) shadow_lock(v->domain); + ASSERT(shadow_locked_by_me(v->domain)); ASSERT(v->arch.shadow.mode); @@ -3400,11 +3412,6 @@ sh_update_cr3(struct vcpu *v) #endif gmfn = pagetable_get_mfn(v->arch.guest_table); - if ( !is_hvm_domain(d) && !test_bit(_VCPUF_initialised, &v->vcpu_flags) ) - { - ASSERT(v->arch.cr3 == 0); - return; - } //// //// vcpu->arch.guest_vtable @@ -3466,7 +3473,7 @@ sh_update_cr3(struct vcpu *v) * replace the old shadow pagetable(s), so that we can safely use the * (old) shadow linear maps in the writeable mapping heuristics. */ #if GUEST_PAGING_LEVELS == 2 - if ( shadow_remove_write_access(v, gmfn, 2, 0) != 0 ) + if ( sh_remove_write_access(v, gmfn, 2, 0) != 0 ) flush_tlb_mask(v->domain->domain_dirty_cpumask); sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l2_shadow); #elif GUEST_PAGING_LEVELS == 3 @@ -3484,7 +3491,7 @@ sh_update_cr3(struct vcpu *v) { gl2gfn = guest_l3e_get_gfn(gl3e[i]); gl2mfn = vcpu_gfn_to_mfn(v, gl2gfn); - flush |= shadow_remove_write_access(v, gl2mfn, 2, 0); + flush |= sh_remove_write_access(v, gl2mfn, 2, 0); } } if ( flush ) @@ -3506,7 +3513,7 @@ sh_update_cr3(struct vcpu *v) } } #elif GUEST_PAGING_LEVELS == 4 - if ( shadow_remove_write_access(v, gmfn, 4, 0) != 0 ) + if ( sh_remove_write_access(v, gmfn, 4, 0) != 0 ) flush_tlb_mask(v->domain->domain_dirty_cpumask); sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l4_shadow); #else @@ -3582,6 +3589,9 @@ sh_update_cr3(struct vcpu *v) /* Fix up the linear pagetable mappings */ sh_update_linear_entries(v); + + /* Release the lock, if we took it (otherwise it's the caller's problem) */ + if ( do_locking ) shadow_unlock(v->domain); } @@ -3637,7 +3647,8 @@ static int sh_guess_wrmap(struct vcpu *v, unsigned long vaddr, mfn_t gmfn) } #endif -int sh_remove_write_access(struct vcpu *v, mfn_t sl1mfn, mfn_t readonly_mfn) +int sh_rm_write_access_from_l1(struct vcpu *v, mfn_t sl1mfn, + mfn_t readonly_mfn) /* Excises all writeable mappings to readonly_mfn from this l1 shadow table */ { shadow_l1e_t *sl1e; @@ -3668,7 +3679,7 @@ int sh_remove_write_access(struct vcpu *v, mfn_t sl1mfn, mfn_t readonly_mfn) } -int sh_remove_all_mappings(struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn) +int sh_rm_mappings_from_l1(struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn) /* Excises all mappings to guest frame from this shadow l1 table */ { shadow_l1e_t *sl1e; @@ -3888,7 +3899,7 @@ sh_x86_emulate_write(struct vcpu *v, unsigned long vaddr, void *src, skip = safe_not_to_verify_write(mfn, addr, src, bytes); memcpy(addr, src, bytes); - if ( !skip ) shadow_validate_guest_pt_write(v, mfn, addr, bytes); + if ( !skip ) sh_validate_guest_pt_write(v, mfn, addr, bytes); /* If we are writing zeros to this page, might want to unshadow */ if ( likely(bytes >= 4) && (*(u32 *)addr == 0) ) @@ -3933,7 +3944,7 @@ sh_x86_emulate_cmpxchg(struct vcpu *v, unsigned long vaddr, if ( prev == old ) { - if ( !skip ) shadow_validate_guest_pt_write(v, mfn, addr, bytes); + if ( !skip ) sh_validate_guest_pt_write(v, mfn, addr, bytes); } else rv = X86EMUL_CMPXCHG_FAILED; @@ -3977,7 +3988,7 @@ sh_x86_emulate_cmpxchg8b(struct vcpu *v, unsigned long vaddr, if ( prev == old ) { - if ( !skip ) shadow_validate_guest_pt_write(v, mfn, addr, 8); + if ( !skip ) sh_validate_guest_pt_write(v, mfn, addr, 8); } else rv = X86EMUL_CMPXCHG_FAILED; diff --git a/xen/arch/x86/mm/shadow/multi.h b/xen/arch/x86/mm/shadow/multi.h index 2cc61b830f..97903059d4 100644 --- a/xen/arch/x86/mm/shadow/multi.h +++ b/xen/arch/x86/mm/shadow/multi.h @@ -61,10 +61,10 @@ SHADOW_INTERNAL_NAME(sh_unhook_64b_mappings, SHADOW_LEVELS, GUEST_LEVELS) (struct vcpu *v, mfn_t sl4mfn); extern int -SHADOW_INTERNAL_NAME(sh_remove_write_access, SHADOW_LEVELS, GUEST_LEVELS) +SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1, SHADOW_LEVELS, GUEST_LEVELS) (struct vcpu *v, mfn_t sl1mfn, mfn_t readonly_mfn); extern int -SHADOW_INTERNAL_NAME(sh_remove_all_mappings, SHADOW_LEVELS, GUEST_LEVELS) +SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1, SHADOW_LEVELS, GUEST_LEVELS) (struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn); extern void diff --git a/xen/arch/x86/mm/shadow/private.h b/xen/arch/x86/mm/shadow/private.h index 62a6364c39..913fa43770 100644 --- a/xen/arch/x86/mm/shadow/private.h +++ b/xen/arch/x86/mm/shadow/private.h @@ -33,8 +33,43 @@ /****************************************************************************** + * Levels of self-test and paranoia + */ + +#define SHADOW_AUDIT_HASH 0x01 /* Check current hash bucket */ +#define SHADOW_AUDIT_HASH_FULL 0x02 /* Check every hash bucket */ +#define SHADOW_AUDIT_ENTRIES 0x04 /* Check this walk's shadows */ +#define SHADOW_AUDIT_ENTRIES_FULL 0x08 /* Check every shadow */ +#define SHADOW_AUDIT_ENTRIES_MFNS 0x10 /* Check gfn-mfn map in shadows */ +#define SHADOW_AUDIT_P2M 0x20 /* Check the p2m table */ + +#ifdef NDEBUG +#define SHADOW_AUDIT 0 +#define SHADOW_AUDIT_ENABLE 0 +#else +#define SHADOW_AUDIT 0x15 /* Basic audit of all except p2m. */ +#define SHADOW_AUDIT_ENABLE shadow_audit_enable +extern int shadow_audit_enable; +#endif + +/****************************************************************************** + * Levels of optimization + */ + +#define SHOPT_WRITABLE_HEURISTIC 0x01 /* Guess at RW PTEs via linear maps */ +#define SHOPT_EARLY_UNSHADOW 0x02 /* Unshadow l1s on fork or exit */ +#define SHOPT_FAST_FAULT_PATH 0x04 /* Fast-path MMIO and not-present */ +#define SHOPT_PREFETCH 0x08 /* Shadow multiple entries per fault */ +#define SHOPT_LINUX_L3_TOPLEVEL 0x10 /* Pin l3es on early 64bit linux */ +#define SHOPT_SKIP_VERIFY 0x20 /* Skip PTE v'fy when safe to do so */ + +#define SHADOW_OPTIMIZATIONS 0x3f + + +/****************************************************************************** * Debug and error-message output */ + #define SHADOW_PRINTK(_f, _a...) \ debugtrace_printk("sh: %s(): " _f, __func__, ##_a) #define SHADOW_ERROR(_f, _a...) \ @@ -54,6 +89,58 @@ #define SHADOW_DEBUG_EMULATE 1 #define SHADOW_DEBUG_LOGDIRTY 0 +/****************************************************************************** + * The shadow lock. + * + * This lock is per-domain. It is intended to allow us to make atomic + * updates to the software TLB that the shadow tables provide. + * + * Specifically, it protects: + * - all changes to shadow page table pages + * - the shadow hash table + * - the shadow page allocator + * - all changes to guest page table pages + * - all changes to the page_info->tlbflush_timestamp + * - the page_info->count fields on shadow pages + * - the shadow dirty bit array and count + */ +#ifndef CONFIG_SMP +#error shadow.h currently requires CONFIG_SMP +#endif + +#define shadow_lock_init(_d) \ + do { \ + spin_lock_init(&(_d)->arch.shadow.lock); \ + (_d)->arch.shadow.locker = -1; \ + (_d)->arch.shadow.locker_function = "nobody"; \ + } while (0) + +#define shadow_locked_by_me(_d) \ + (current->processor == (_d)->arch.shadow.locker) + +#define shadow_lock(_d) \ + do { \ + if ( unlikely((_d)->arch.shadow.locker == current->processor) ) \ + { \ + printk("Error: shadow lock held by %s\n", \ + (_d)->arch.shadow.locker_function); \ + BUG(); \ + } \ + spin_lock(&(_d)->arch.shadow.lock); \ + ASSERT((_d)->arch.shadow.locker == -1); \ + (_d)->arch.shadow.locker = current->processor; \ + (_d)->arch.shadow.locker_function = __func__; \ + } while (0) + +#define shadow_unlock(_d) \ + do { \ + ASSERT((_d)->arch.shadow.locker == current->processor); \ + (_d)->arch.shadow.locker = -1; \ + (_d)->arch.shadow.locker_function = "nobody"; \ + spin_unlock(&(_d)->arch.shadow.lock); \ + } while (0) + + /****************************************************************************** * Auditing routines @@ -291,6 +378,21 @@ void sh_install_xen_entries_in_l4(struct vcpu *v, mfn_t gl4mfn, mfn_t sl4mfn); void sh_install_xen_entries_in_l2h(struct vcpu *v, mfn_t sl2hmfn); void sh_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn); +/* Update the shadows in response to a pagetable write from Xen */ +extern int sh_validate_guest_entry(struct vcpu *v, mfn_t gmfn, + void *entry, u32 size); + +/* Update the shadows in response to a pagetable write from a HVM guest */ +extern void sh_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, + void *entry, u32 size); + +/* Remove all writeable mappings of a guest frame from the shadows. + * Returns non-zero if we need to flush TLBs. + * level and fault_addr desribe how we found this to be a pagetable; + * level==0 means we have some other reason for revoking write access. */ +extern int sh_remove_write_access(struct vcpu *v, mfn_t readonly_mfn, + unsigned int level, + unsigned long fault_addr); /****************************************************************************** * Flags used in the return value of the shadow_set_lXe() functions... @@ -325,6 +427,26 @@ void sh_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn); #undef mfn_valid #define mfn_valid(_mfn) (mfn_x(_mfn) < max_page) + +static inline int +sh_mfn_is_a_page_table(mfn_t gmfn) +{ + struct page_info *page = mfn_to_page(gmfn); + struct domain *owner; + unsigned long type_info; + + if ( !mfn_valid(gmfn) ) + return 0; + + owner = page_get_owner(page); + if ( owner && shadow_mode_refcounts(owner) + && (page->count_info & PGC_page_table) ) + return 1; + + type_info = page->u.inuse.type_info & PGT_type_mask; + return type_info && (type_info <= PGT_l4_page_table); +} + // Provide mfn_t-aware versions of common xen functions static inline void * sh_map_domain_page(mfn_t mfn) @@ -350,6 +472,25 @@ sh_unmap_domain_page_global(void *p) unmap_domain_page_global(p); } +static inline mfn_t +pagetable_get_mfn(pagetable_t pt) +{ + return _mfn(pagetable_get_pfn(pt)); +} + +static inline pagetable_t +pagetable_from_mfn(mfn_t mfn) +{ + return pagetable_from_pfn(mfn_x(mfn)); +} + + +/****************************************************************************** + * Log-dirty mode bitmap handling + */ + +extern void sh_mark_dirty(struct domain *d, mfn_t gmfn); + static inline int sh_mfn_is_dirty(struct domain *d, mfn_t gmfn) /* Is this guest page dirty? Call only in log-dirty mode. */ @@ -368,25 +509,6 @@ sh_mfn_is_dirty(struct domain *d, mfn_t gmfn) return 0; } -static inline int -sh_mfn_is_a_page_table(mfn_t gmfn) -{ - struct page_info *page = mfn_to_page(gmfn); - struct domain *owner; - unsigned long type_info; - - if ( !mfn_valid(gmfn) ) - return 0; - - owner = page_get_owner(page); - if ( owner && shadow_mode_refcounts(owner) - && (page->count_info & PGC_page_table) ) - return 1; - - type_info = page->u.inuse.type_info & PGT_type_mask; - return type_info && (type_info <= PGT_l4_page_table); -} - /**************************************************************************/ /* Shadow-page refcounting. */ diff --git a/xen/arch/x86/mm/shadow/types.h b/xen/arch/x86/mm/shadow/types.h index 4aed70aa8c..e2edebe555 100644 --- a/xen/arch/x86/mm/shadow/types.h +++ b/xen/arch/x86/mm/shadow/types.h @@ -477,8 +477,8 @@ struct shadow_walk_t #define sh_gva_to_gpa INTERNAL_NAME(sh_gva_to_gpa) #define sh_gva_to_gfn INTERNAL_NAME(sh_gva_to_gfn) #define sh_update_cr3 INTERNAL_NAME(sh_update_cr3) -#define sh_remove_write_access INTERNAL_NAME(sh_remove_write_access) -#define sh_remove_all_mappings INTERNAL_NAME(sh_remove_all_mappings) +#define sh_rm_write_access_from_l1 INTERNAL_NAME(sh_rm_write_access_from_l1) +#define sh_rm_mappings_from_l1 INTERNAL_NAME(sh_rm_mappings_from_l1) #define sh_remove_l1_shadow INTERNAL_NAME(sh_remove_l1_shadow) #define sh_remove_l2_shadow INTERNAL_NAME(sh_remove_l2_shadow) #define sh_remove_l3_shadow INTERNAL_NAME(sh_remove_l3_shadow) diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h index 93d96df4fd..d561677fed 100644 --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -307,7 +307,7 @@ void audit_domains(void); int new_guest_cr3(unsigned long pfn); void make_cr3(struct vcpu *v, unsigned long mfn); - +void update_cr3(struct vcpu *v); void propagate_page_fault(unsigned long addr, u16 error_code); int __sync_lazy_execstate(void); diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h index 4b9094cb7c..46027d94ac 100644 --- a/xen/include/asm-x86/shadow.h +++ b/xen/include/asm-x86/shadow.h @@ -29,20 +29,8 @@ #include <xen/domain_page.h> #include <asm/flushtlb.h> -/* How to make sure a page is not referred to in a shadow PT */ -/* This will need to be a for_each_vcpu if we go to per-vcpu shadows */ -#define shadow_drop_references(_d, _p) \ - shadow_remove_all_mappings((_d)->vcpu[0], _mfn(page_to_mfn(_p))) -#define shadow_sync_and_drop_references(_d, _p) \ - shadow_remove_all_mappings((_d)->vcpu[0], _mfn(page_to_mfn(_p))) - -/* How to add and remove entries in the p2m mapping. */ -#define guest_physmap_add_page(_d, _p, _m) \ - shadow_guest_physmap_add_page((_d), (_p), (_m)) -#define guest_physmap_remove_page(_d, _p, _m ) \ - shadow_guest_physmap_remove_page((_d), (_p), (_m)) - -/* Shadow PT operation mode : shadow-mode variable in arch_domain. */ +/***************************************************************************** + * Macros to tell which shadow paging mode a domain is in */ #define SHM2_shift 10 /* We're in one of the shadow modes */ @@ -64,107 +52,24 @@ #define shadow_mode_external(_d) ((_d)->arch.shadow.mode & SHM2_external) /* Xen traps & emulates all reads of all page table pages: - * not yet supported - */ + * not yet supported */ #define shadow_mode_trap_reads(_d) ({ (void)(_d); 0; }) -// How do we tell that we have a 32-bit PV guest in a 64-bit Xen? -#ifdef __x86_64__ -#define pv_32bit_guest(_v) 0 // not yet supported -#else -#define pv_32bit_guest(_v) !is_hvm_vcpu(v) -#endif - -/* The shadow lock. - * - * This lock is per-domain. It is intended to allow us to make atomic - * updates to the software TLB that the shadow tables provide. - * - * Specifically, it protects: - * - all changes to shadow page table pages - * - the shadow hash table - * - the shadow page allocator - * - all changes to guest page table pages; if/when the notion of - * out-of-sync pages is added to this code, then the shadow lock is - * protecting all guest page table pages which are not listed as - * currently as both guest-writable and out-of-sync... - * XXX -- need to think about this relative to writable page tables. - * - all changes to the page_info->tlbflush_timestamp - * - the page_info->count fields on shadow pages - * - the shadow dirty bit array and count - * - XXX - */ -#ifndef CONFIG_SMP -#error shadow.h currently requires CONFIG_SMP -#endif - -#define shadow_lock_init(_d) \ - do { \ - spin_lock_init(&(_d)->arch.shadow.lock); \ - (_d)->arch.shadow.locker = -1; \ - (_d)->arch.shadow.locker_function = "nobody"; \ - } while (0) - -#define shadow_locked_by_me(_d) \ - (current->processor == (_d)->arch.shadow.locker) - -#define shadow_lock(_d) \ - do { \ - if ( unlikely((_d)->arch.shadow.locker == current->processor) ) \ - { \ - printk("Error: shadow lock held by %s\n", \ - (_d)->arch.shadow.locker_function); \ - BUG(); \ - } \ - spin_lock(&(_d)->arch.shadow.lock); \ - ASSERT((_d)->arch.shadow.locker == -1); \ - (_d)->arch.shadow.locker = current->processor; \ - (_d)->arch.shadow.locker_function = __func__; \ - } while (0) - -#define shadow_unlock(_d) \ - do { \ - ASSERT((_d)->arch.shadow.locker == current->processor); \ - (_d)->arch.shadow.locker = -1; \ - (_d)->arch.shadow.locker_function = "nobody"; \ - spin_unlock(&(_d)->arch.shadow.lock); \ - } while (0) - -/* - * Levels of self-test and paranoia - * XXX should go in config files somewhere? - */ -#define SHADOW_AUDIT_HASH 0x01 /* Check current hash bucket */ -#define SHADOW_AUDIT_HASH_FULL 0x02 /* Check every hash bucket */ -#define SHADOW_AUDIT_ENTRIES 0x04 /* Check this walk's shadows */ -#define SHADOW_AUDIT_ENTRIES_FULL 0x08 /* Check every shadow */ -#define SHADOW_AUDIT_ENTRIES_MFNS 0x10 /* Check gfn-mfn map in shadows */ -#define SHADOW_AUDIT_P2M 0x20 /* Check the p2m table */ - -#ifdef NDEBUG -#define SHADOW_AUDIT 0 -#define SHADOW_AUDIT_ENABLE 0 -#else -#define SHADOW_AUDIT 0x15 /* Basic audit of all except p2m. */ -#define SHADOW_AUDIT_ENABLE shadow_audit_enable -extern int shadow_audit_enable; -#endif -/* - * Levels of optimization - * XXX should go in config files somewhere? - */ -#define SHOPT_WRITABLE_HEURISTIC 0x01 /* Guess at RW PTEs via linear maps */ -#define SHOPT_EARLY_UNSHADOW 0x02 /* Unshadow l1s on fork or exit */ -#define SHOPT_FAST_FAULT_PATH 0x04 /* Fast-path MMIO and not-present */ -#define SHOPT_PREFETCH 0x08 /* Shadow multiple entries per fault */ -#define SHOPT_LINUX_L3_TOPLEVEL 0x10 /* Pin l3es on early 64bit linux */ -#define SHOPT_SKIP_VERIFY 0x20 /* Skip PTE v'fy when safe to do so */ +/****************************************************************************** + * The equivalent for a particular vcpu of a shadowed domain. */ -#define SHADOW_OPTIMIZATIONS 0x3f +/* Is this vcpu using the P2M table to translate between GFNs and MFNs? + * + * This is true of translated HVM domains on a vcpu which has paging + * enabled. (HVM vcpus with paging disabled are using the p2m table as + * its paging table, so no translation occurs in this case.) + * It is also true for all vcpus of translated PV domains. */ +#define shadow_vcpu_mode_translate(_v) ((_v)->arch.shadow.translate_enabled) -/* With shadow pagetables, the different kinds of address start +/****************************************************************************** + * With shadow pagetables, the different kinds of address start * to get get confusing. * * Virtual addresses are what they usually are: the addresses that are used @@ -214,38 +119,16 @@ static inline _type _name##_x(_name##_t n) { return n; } #endif TYPE_SAFE(unsigned long,mfn) -#define SH_PRI_mfn "05lx" - -static inline mfn_t -pagetable_get_mfn(pagetable_t pt) -{ - return _mfn(pagetable_get_pfn(pt)); -} - -static inline pagetable_t -pagetable_from_mfn(mfn_t mfn) -{ - return pagetable_from_pfn(mfn_x(mfn)); -} -static inline int -shadow_vcpu_mode_translate(struct vcpu *v) -{ - // Returns true if this VCPU needs to be using the P2M table to translate - // between GFNs and MFNs. - // - // This is true of translated HVM domains on a vcpu which has paging - // enabled. (HVM vcpu's with paging disabled are using the p2m table as - // its paging table, so no translation occurs in this case.) - // - // It is also true for translated PV domains. - // - return v->arch.shadow.translate_enabled; -} +/* Macro for printk formats: use as printk("%"SH_PRI_mfn"\n", mfn_x(foo)); */ +#define SH_PRI_mfn "05lx" -/**************************************************************************/ -/* Mode-specific entry points into the shadow code */ +/***************************************************************************** + * Mode-specific entry points into the shadow code. + * + * These shouldn't be used directly by callers; rather use the functions + * below which will indirect through this table as appropriate. */ struct sh_emulate_ctxt; struct shadow_paging_mode { @@ -254,7 +137,7 @@ struct shadow_paging_mode { int (*invlpg )(struct vcpu *v, unsigned long va); paddr_t (*gva_to_gpa )(struct vcpu *v, unsigned long va); unsigned long (*gva_to_gfn )(struct vcpu *v, unsigned long va); - void (*update_cr3 )(struct vcpu *v); + void (*update_cr3 )(struct vcpu *v, int do_locking); int (*map_and_validate_gl1e )(struct vcpu *v, mfn_t gmfn, void *new_guest_entry, u32 size); int (*map_and_validate_gl2e )(struct vcpu *v, mfn_t gmfn, @@ -286,35 +169,30 @@ struct shadow_paging_mode { unsigned long *gl1mfn); void (*guest_get_eff_l1e )(struct vcpu *v, unsigned long va, void *eff_l1e); -#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC int (*guess_wrmap )(struct vcpu *v, unsigned long vaddr, mfn_t gmfn); -#endif /* For outsiders to tell what mode we're in */ unsigned int shadow_levels; unsigned int guest_levels; }; -static inline int shadow_guest_paging_levels(struct vcpu *v) -{ - ASSERT(v->arch.shadow.mode != NULL); - return v->arch.shadow.mode->guest_levels; -} -/**************************************************************************/ -/* Entry points into the shadow code */ +/***************************************************************************** + * Entry points into the shadow code */ -/* Enable arbitrary shadow mode. */ -int shadow_enable(struct domain *d, u32 mode); +/* Set up the shadow-specific parts of a domain struct at start of day. + * Called for every domain from arch_domain_create() */ +void shadow_domain_init(struct domain *d); -/* Turning on shadow test mode */ -int shadow_test_enable(struct domain *d); +/* Enable an arbitrary shadow mode. Call once at domain creation. */ +int shadow_enable(struct domain *d, u32 mode); -/* Handler for shadow control ops: enabling and disabling shadow modes, - * and log-dirty bitmap ops all happen through here. */ +/* Handler for shadow control ops: operations from user-space to enable + * and disable ephemeral shadow modes (test mode and log-dirty mode) and + * manipulate the log-dirty bitmap. */ int shadow_domctl(struct domain *d, - xen_domctl_shadow_op_t *sc, - XEN_GUEST_HANDLE(xen_domctl_t) u_domctl); + xen_domctl_shadow_op_t *sc, + XEN_GUEST_HANDLE(xen_domctl_t) u_domctl); /* Call when destroying a domain */ void shadow_teardown(struct domain *d); @@ -322,164 +200,96 @@ void shadow_teardown(struct domain *d); /* Call once all of the references to the domain have gone away */ void shadow_final_teardown(struct domain *d); - -/* Mark a page as dirty in the bitmap */ -void sh_do_mark_dirty(struct domain *d, mfn_t gmfn); +/* Mark a page as dirty in the log-dirty bitmap: called when Xen + * makes changes to guest memory on its behalf. */ +void shadow_mark_dirty(struct domain *d, mfn_t gmfn); +/* Cleaner version so we don't pepper shadow_mode tests all over the place */ static inline void mark_dirty(struct domain *d, unsigned long gmfn) { - if ( likely(!shadow_mode_log_dirty(d)) ) - return; - - shadow_lock(d); - sh_do_mark_dirty(d, _mfn(gmfn)); - shadow_unlock(d); -} - -/* Internal version, for when the shadow lock is already held */ -static inline void sh_mark_dirty(struct domain *d, mfn_t gmfn) -{ - ASSERT(shadow_locked_by_me(d)); if ( unlikely(shadow_mode_log_dirty(d)) ) - sh_do_mark_dirty(d, gmfn); + shadow_mark_dirty(d, _mfn(gmfn)); } -static inline int -shadow_fault(unsigned long va, struct cpu_user_regs *regs) -/* Called from pagefault handler in Xen, and from the HVM trap handlers +/* Handle page-faults caused by the shadow pagetable mechanisms. + * Called from pagefault handler in Xen, and from the HVM trap handlers * for pagefaults. Returns 1 if this fault was an artefact of the * shadow code (and the guest should retry) or 0 if it is not (and the * fault should be handled elsewhere or passed to the guest). */ +static inline int shadow_fault(unsigned long va, struct cpu_user_regs *regs) { struct vcpu *v = current; perfc_incrc(shadow_fault); return v->arch.shadow.mode->page_fault(v, va, regs); } -static inline int -shadow_invlpg(struct vcpu *v, unsigned long va) -/* Called when the guest requests an invlpg. Returns 1 if the invlpg - * instruction should be issued on the hardware, or 0 if it's safe not - * to do so. */ +/* Handle invlpg requests on shadowed vcpus. + * Returns 1 if the invlpg instruction should be issued on the hardware, + * or 0 if it's safe not to do so. */ +static inline int shadow_invlpg(struct vcpu *v, unsigned long va) { return v->arch.shadow.mode->invlpg(v, va); } -static inline paddr_t -shadow_gva_to_gpa(struct vcpu *v, unsigned long va) -/* Called to translate a guest virtual address to what the *guest* - * pagetables would map it to. */ +/* Translate a guest virtual address to the physical address that the + * *guest* pagetables would map it to. */ +static inline paddr_t shadow_gva_to_gpa(struct vcpu *v, unsigned long va) { if ( unlikely(!shadow_vcpu_mode_translate(v)) ) return (paddr_t) va; return v->arch.shadow.mode->gva_to_gpa(v, va); } -static inline unsigned long -shadow_gva_to_gfn(struct vcpu *v, unsigned long va) -/* Called to translate a guest virtual address to what the *guest* - * pagetables would map it to. */ +/* Translate a guest virtual address to the frame number that the + * *guest* pagetables would map it to. */ +static inline unsigned long shadow_gva_to_gfn(struct vcpu *v, unsigned long va) { if ( unlikely(!shadow_vcpu_mode_translate(v)) ) return va >> PAGE_SHIFT; return v->arch.shadow.mode->gva_to_gfn(v, va); } -static inline void -shadow_update_cr3(struct vcpu *v) -/* Updates all the things that are derived from the guest's CR3. - * Called when the guest changes CR3. */ -{ - shadow_lock(v->domain); - v->arch.shadow.mode->update_cr3(v); - shadow_unlock(v->domain); -} - - -/* Should be called after CR3 is updated. - * Updates vcpu->arch.cr3 and, for HVM guests, vcpu->arch.hvm_vcpu.cpu_cr3. - * - * Also updates other state derived from CR3 (vcpu->arch.guest_vtable, - * shadow_vtable, etc). - * - * Uses values found in vcpu->arch.(guest_table and guest_table_user), and - * for HVM guests, arch.monitor_table and hvm's guest CR3. - * - * Update ref counts to shadow tables appropriately. - */ -static inline void update_cr3(struct vcpu *v) +/* Update all the things that are derived from the guest's CR3. + * Called when the guest changes CR3; the caller can then use + * v->arch.cr3 as the value to load into the host CR3 to schedule this vcpu + * and v->arch.hvm_vcpu.hw_cr3 as the value to put in the vmcb/vmcs when + * entering the HVM guest. */ +static inline void shadow_update_cr3(struct vcpu *v) { - unsigned long cr3_mfn=0; - - if ( shadow_mode_enabled(v->domain) ) - { - shadow_update_cr3(v); - return; - } - -#if CONFIG_PAGING_LEVELS == 4 - if ( !(v->arch.flags & TF_kernel_mode) ) - cr3_mfn = pagetable_get_pfn(v->arch.guest_table_user); - else -#endif - cr3_mfn = pagetable_get_pfn(v->arch.guest_table); - - make_cr3(v, cr3_mfn); + v->arch.shadow.mode->update_cr3(v, 1); } -extern void sh_update_paging_modes(struct vcpu *v); - -/* Should be called to initialise paging structures if the paging mode +/* Update all the things that are derived from the guest's CR0/CR3/CR4. + * Called to initialize paging structures if the paging mode * has changed, and when bringing up a VCPU for the first time. */ -static inline void shadow_update_paging_modes(struct vcpu *v) -{ - ASSERT(shadow_mode_enabled(v->domain)); - shadow_lock(v->domain); - sh_update_paging_modes(v); - shadow_unlock(v->domain); -} +void shadow_update_paging_modes(struct vcpu *v); -static inline void -shadow_detach_old_tables(struct vcpu *v) -{ - if ( v->arch.shadow.mode ) - v->arch.shadow.mode->detach_old_tables(v); -} -static inline mfn_t -shadow_make_monitor_table(struct vcpu *v) -{ - return v->arch.shadow.mode->make_monitor_table(v); -} - -static inline void -shadow_destroy_monitor_table(struct vcpu *v, mfn_t mmfn) -{ - v->arch.shadow.mode->destroy_monitor_table(v, mmfn); -} +/***************************************************************************** + * Access to the guest pagetables */ +/* Get a mapping of a PV guest's l1e for this virtual address. */ static inline void * guest_map_l1e(struct vcpu *v, unsigned long addr, unsigned long *gl1mfn) { - if ( likely(!shadow_mode_translate(v->domain)) ) - { - l2_pgentry_t l2e; - ASSERT(!shadow_mode_external(v->domain)); - /* Find this l1e and its enclosing l1mfn in the linear map */ - if ( __copy_from_user(&l2e, - &__linear_l2_table[l2_linear_offset(addr)], - sizeof(l2_pgentry_t)) != 0 ) - return NULL; - /* Check flags that it will be safe to read the l1e */ - if ( (l2e_get_flags(l2e) & (_PAGE_PRESENT | _PAGE_PSE)) - != _PAGE_PRESENT ) - return NULL; - *gl1mfn = l2e_get_pfn(l2e); - return &__linear_l1_table[l1_linear_offset(addr)]; - } + l2_pgentry_t l2e; - return v->arch.shadow.mode->guest_map_l1e(v, addr, gl1mfn); + if ( unlikely(shadow_mode_translate(v->domain)) ) + return v->arch.shadow.mode->guest_map_l1e(v, addr, gl1mfn); + + /* Find this l1e and its enclosing l1mfn in the linear map */ + if ( __copy_from_user(&l2e, + &__linear_l2_table[l2_linear_offset(addr)], + sizeof(l2_pgentry_t)) != 0 ) + return NULL; + /* Check flags that it will be safe to read the l1e */ + if ( (l2e_get_flags(l2e) & (_PAGE_PRESENT | _PAGE_PSE)) + != _PAGE_PRESENT ) + return NULL; + *gl1mfn = l2e_get_pfn(l2e); + return &__linear_l1_table[l1_linear_offset(addr)]; } +/* Pull down the mapping we got from guest_map_l1e() */ static inline void guest_unmap_l1e(struct vcpu *v, void *p) { @@ -487,6 +297,7 @@ guest_unmap_l1e(struct vcpu *v, void *p) unmap_domain_page(p); } +/* Read the guest's l1e that maps this address. */ static inline void guest_get_eff_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e) { @@ -503,6 +314,8 @@ guest_get_eff_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e) v->arch.shadow.mode->guest_get_eff_l1e(v, addr, eff_l1e); } +/* Read the guest's l1e that maps this address, from the kernel-mode + * pagetables. */ static inline void guest_get_eff_kern_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e) { @@ -518,82 +331,36 @@ guest_get_eff_kern_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e) TOGGLE_MODE(); } - -/* Validate a pagetable change from the guest and update the shadows. */ -extern int shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, - void *new_guest_entry); -extern int __shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, - void *entry, u32 size); - -/* Update the shadows in response to a pagetable write from a HVM guest */ -extern void shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, - void *entry, u32 size); - -/* Remove all writeable mappings of a guest frame from the shadows. - * Returns non-zero if we need to flush TLBs. - * level and fault_addr desribe how we found this to be a pagetable; - * level==0 means we have some other reason for revoking write access. */ -extern int shadow_remove_write_access(struct vcpu *v, mfn_t readonly_mfn, - unsigned int level, - unsigned long fault_addr); - -/* Remove all mappings of the guest mfn from the shadows. - * Returns non-zero if we need to flush TLBs. */ -extern int shadow_remove_all_mappings(struct vcpu *v, mfn_t target_mfn); - -/* Remove all mappings from the shadows. */ -extern void shadow_blow_tables(struct domain *d); - -void -shadow_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn); -/* This is a HVM page that we thing is no longer a pagetable. - * Unshadow it, and recursively unshadow pages that reference it. */ - -/* Remove all shadows of the guest mfn. */ -extern void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all); -static inline void shadow_remove_all_shadows(struct vcpu *v, mfn_t gmfn) +/* Write a new value into the guest pagetable, and update the shadows + * appropriately. Returns 0 if we page-faulted, 1 for success. */ +int shadow_write_guest_entry(struct vcpu *v, intpte_t *p, + intpte_t new, mfn_t gmfn); + +/* Cmpxchg a new value into the guest pagetable, and update the shadows + * appropriately. Returns 0 if we page-faulted, 1 if not. + * N.B. caller should check the value of "old" to see if the + * cmpxchg itself was successful. */ +int shadow_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p, + intpte_t *old, intpte_t new, mfn_t gmfn); + +/* Remove all mappings of the guest page from the shadows. + * This is called from common code. It does not flush TLBs. */ +int sh_remove_all_mappings(struct vcpu *v, mfn_t target_mfn); +static inline void +shadow_drop_references(struct domain *d, struct page_info *p) { - int was_locked = shadow_locked_by_me(v->domain); - if ( !was_locked ) - shadow_lock(v->domain); - sh_remove_shadows(v, gmfn, 0, 1); - if ( !was_locked ) - shadow_unlock(v->domain); + /* See the comment about locking in sh_remove_all_mappings */ + sh_remove_all_mappings(d->vcpu[0], _mfn(page_to_mfn(p))); } -/* Add a page to a domain */ -void -shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn, - unsigned long mfn); - -/* Remove a page from a domain */ -void -shadow_guest_physmap_remove_page(struct domain *d, unsigned long gfn, - unsigned long mfn); - -/* - * Allocation of shadow pages - */ - -/* Return the minumum acceptable number of shadow pages a domain needs */ -unsigned int shadow_min_acceptable_pages(struct domain *d); - -/* Set the pool of shadow pages to the required number of MB. - * Input will be rounded up to at least min_acceptable_shadow_pages(). - * Returns 0 for success, 1 for failure. */ -unsigned int shadow_set_allocation(struct domain *d, - unsigned int megabytes, - int *preempted); - -/* Return the size of the shadow pool, rounded up to the nearest MB */ -static inline unsigned int shadow_get_allocation(struct domain *d) +/* Remove all shadows of the guest mfn. */ +void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all); +static inline void shadow_remove_all_shadows(struct vcpu *v, mfn_t gmfn) { - unsigned int pg = d->arch.shadow.total_pages; - return ((pg >> (20 - PAGE_SHIFT)) - + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0)); + /* See the comment about locking in sh_remove_shadows */ + sh_remove_shadows(v, gmfn, 0 /* Be thorough */, 1 /* Must succeed */); } - /**************************************************************************/ /* Guest physmap (p2m) support * @@ -602,9 +369,20 @@ static inline unsigned int shadow_get_allocation(struct domain *d) * guests, so we steal the address space that would have normally * been used by the read-only MPT map. */ - #define phys_to_machine_mapping ((l1_pgentry_t *)RO_MPT_VIRT_START) +/* Add a page to a domain's p2m table */ +void shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn, + unsigned long mfn); + +/* Remove a page from a domain's p2m table */ +void shadow_guest_physmap_remove_page(struct domain *d, unsigned long gfn, + unsigned long mfn); + +/* Aliases, called from common code. */ +#define guest_physmap_add_page shadow_guest_physmap_add_page +#define guest_physmap_remove_page shadow_guest_physmap_remove_page + /* Read the current domain's P2M table. */ static inline mfn_t sh_gfn_to_mfn_current(unsigned long gfn) { @@ -627,8 +405,8 @@ static inline mfn_t sh_gfn_to_mfn_current(unsigned long gfn) return _mfn(INVALID_MFN); } -/* Walk another domain's P2M table, mapping pages as we go */ -extern mfn_t sh_gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn); +/* Read another domain's P2M table, mapping pages as we go */ +mfn_t sh_gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn); /* General conversion function from gfn to mfn */ static inline mfn_t @@ -666,6 +444,7 @@ mmio_space(paddr_t gpa) return !mfn_valid(mfn_x(sh_gfn_to_mfn_current(gfn))); } +/* Translate the frame number held in an l1e from guest to machine */ static inline l1_pgentry_t gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e) { @@ -685,4 +464,3 @@ gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e) * indent-tabs-mode: nil * End: */ - |