aboutsummaryrefslogtreecommitdiffstats
path: root/xen/arch/x86/shadow.c
diff options
context:
space:
mode:
Diffstat (limited to 'xen/arch/x86/shadow.c')
-rw-r--r--xen/arch/x86/shadow.c4150
1 files changed, 0 insertions, 4150 deletions
diff --git a/xen/arch/x86/shadow.c b/xen/arch/x86/shadow.c
deleted file mode 100644
index 88e2ec8417..0000000000
--- a/xen/arch/x86/shadow.c
+++ /dev/null
@@ -1,4150 +0,0 @@
-/******************************************************************************
- * arch/x86/shadow.c
- *
- * Copyright (c) 2005 Michael A Fetterman
- * Based on an earlier implementation by Ian Pratt et al
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-/*
- * Jun Nakajima <jun.nakajima@intel.com>
- * Chengyuan Li <chengyuan.li@intel.com>
- *
- * Extended to support 32-bit PAE and 64-bit guests.
- */
-
-#include <xen/config.h>
-#include <xen/types.h>
-#include <xen/mm.h>
-#include <xen/domain_page.h>
-#include <asm/shadow.h>
-#include <asm/page.h>
-#include <xen/event.h>
-#include <xen/sched.h>
-#include <xen/trace.h>
-#include <asm/shadow_64.h>
-
-/* Use this to have the compiler remove unnecessary branches */
-#define SH_L1_HAS_NEXT_PAGE (GUEST_L1_PAGETABLE_ENTRIES - L1_PAGETABLE_ENTRIES)
-
-extern void free_shadow_pages(struct domain *d);
-
-#if 0 // this code has not been updated for 32pae & 64 bit modes
-#if SHADOW_DEBUG
-static void mark_shadows_as_reflecting_snapshot(struct domain *d, unsigned long gpfn);
-#endif
-#endif
-
-#if CONFIG_PAGING_LEVELS == 3
-static unsigned long shadow_l3_table(
- struct vcpu *v, unsigned long gpfn, unsigned long gmfn);
-#endif
-
-#if CONFIG_PAGING_LEVELS == 4
-static unsigned long shadow_l4_table(
- struct vcpu *v, unsigned long gpfn, unsigned long gmfn);
-#endif
-
-#if CONFIG_PAGING_LEVELS >= 3
-static void shadow_map_into_current(struct vcpu *v,
- unsigned long va, unsigned int from, unsigned int to);
-static inline void validate_bl2e_change( struct domain *d,
- guest_root_pgentry_t *new_gle_p, pgentry_64_t *shadow_l3, int index);
-static void update_top_level_shadow(struct vcpu *v, unsigned long smfn);
-#endif
-
-/********
-
-There's a per-domain shadow table spin lock which works fine for SMP
-hosts. We don't have to worry about interrupts as no shadow operations
-happen in an interrupt context. It's probably not quite ready for SMP
-guest operation as we have to worry about synchonisation between gpte
-and spte updates. Its possible that this might only happen in a
-hypercall context, in which case we'll probably at have a per-domain
-hypercall lock anyhow (at least initially).
-
-********/
-
-static inline int
-shadow_promote(struct domain *d, unsigned long gpfn, unsigned long gmfn,
- unsigned long new_type)
-{
- struct page_info *page = mfn_to_page(gmfn);
- int pinned = 0, okay = 1;
-
- if ( page_out_of_sync(page) )
- {
- // Don't know how long ago this snapshot was taken.
- // Can't trust it to be recent enough.
- //
- __shadow_sync_mfn(d, gmfn);
- }
-
- if ( !shadow_mode_refcounts(d) )
- return 1;
-
- if ( unlikely(page_is_page_table(page)) )
- return 1;
-
- FSH_LOG("%s: gpfn=%lx gmfn=%lx nt=%08lx", __func__, gpfn, gmfn, new_type);
-
- if ( !shadow_remove_all_write_access(d, gpfn, gmfn) )
- {
- FSH_LOG("%s: couldn't find/remove all write accesses, gpfn=%lx gmfn=%lx",
- __func__, gpfn, gmfn);
-#if 1 || defined(LIVE_DANGEROUSLY)
- set_bit(_PGC_page_table, &page->count_info);
- return 1;
-#endif
- return 0;
- }
-
- // To convert this page to use as a page table, the writable count
- // should now be zero. Test this by grabbing the page as an page table,
- // and then immediately releasing. This will also deal with any
- // necessary TLB flushing issues for us.
- //
- // The cruft here about pinning doesn't really work right. This
- // needs rethinking/rewriting... Need to gracefully deal with the
- // TLB flushes required when promoting a writable page, and also deal
- // with any outstanding (external) writable refs to this page (by
- // refusing to promote it). The pinning headache complicates this
- // code -- it would all get much simpler if we stop using
- // shadow_lock() and move the shadow code to BIGLOCK().
- //
- if ( unlikely(!get_page(page, d)) )
- BUG(); // XXX -- needs more thought for a graceful failure
- if ( unlikely(test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info)) )
- {
- pinned = 1;
- put_page_and_type(page);
- }
- if ( get_page_type(page, PGT_base_page_table) )
- {
- set_bit(_PGC_page_table, &page->count_info);
- put_page_type(page);
- }
- else
- {
- printk("shadow_promote: get_page_type failed "
- "dom%d gpfn=%lx gmfn=%lx t=%08lx\n",
- d->domain_id, gpfn, gmfn, new_type);
- okay = 0;
- }
-
- // Now put the type back to writable...
- if ( unlikely(!get_page_type(page, PGT_writable_page)) )
- BUG(); // XXX -- needs more thought for a graceful failure
- if ( unlikely(pinned) )
- {
- if ( unlikely(test_and_set_bit(_PGT_pinned,
- &page->u.inuse.type_info)) )
- BUG(); // hmm... someone pinned this again?
- }
- else
- put_page_and_type(page);
-
- return okay;
-}
-
-
-/*
- * Things in shadow mode that collect get_page() refs to the domain's
- * pages are:
- * - PGC_allocated takes a gen count, just like normal.
- * - A writable page can be pinned (paravirtualized guests may consider
- * these pages to be L1s or L2s, and don't know the difference).
- * Pinning a page takes a gen count (but, for domains in shadow mode,
- * it *doesn't* take a type count)
- * - CR3 grabs a ref to whatever it points at, just like normal.
- * - Shadow mode grabs an initial gen count for itself, as a placehold
- * for whatever references will exist.
- * - Shadow PTEs that point to a page take a gen count, just like regular
- * PTEs. However, they don't get a type count, as get_page_type() is
- * hardwired to keep writable pages' counts at 1 for domains in shadow
- * mode.
- * - Whenever we shadow a page, the entry in the shadow hash grabs a
- * general ref to the page.
- * - Whenever a page goes out of sync, the out of sync entry grabs a
- * general ref to the page.
- */
-/*
- * page_info fields for pages allocated as shadow pages:
- *
- * All 32 bits of count_info are a simple count of refs to this shadow
- * from a) other shadow pages, b) current CR3's (aka ed->arch.shadow_table),
- * c) if it's a pinned shadow root pgtable, d) outstanding out-of-sync
- * references.
- *
- * u.inuse._domain is left NULL, to prevent accidently allow some random
- * domain from gaining permissions to map this page.
- *
- * u.inuse.type_info & PGT_type_mask remembers what kind of page is being
- * shadowed.
- * u.inuse.type_info & PGT_mfn_mask holds the mfn of the page being shadowed.
- * u.inuse.type_info & PGT_pinned says that an extra reference to this shadow
- * is currently exists because this is a shadow of a root page, and we
- * don't want to let those disappear just because no CR3 is currently pointing
- * at it.
- *
- * tlbflush_timestamp holds a min & max index of valid page table entries
- * within the shadow page.
- */
-static inline void
-shadow_page_info_init(struct page_info *page,
- unsigned long gmfn,
- u32 psh_type)
-{
- ASSERT( (gmfn & ~PGT_mfn_mask) == 0 );
- page->u.inuse.type_info = psh_type | gmfn;
- page->count_info = 0;
- page->tlbflush_timestamp = 0;
-}
-
-static inline unsigned long
-alloc_shadow_page(struct domain *d,
- unsigned long gpfn, unsigned long gmfn,
- u32 psh_type)
-{
- struct page_info *page;
- unsigned long smfn, real_gpfn;
- int pin = 0;
- void *l1, *lp;
- u64 index = 0;
-
- // Currently, we only keep pre-zero'ed pages around for use as L1's...
- // This will change. Soon.
- //
- if ( psh_type == PGT_l1_shadow )
- {
- if ( !list_empty(&d->arch.free_shadow_frames) )
- {
- struct list_head *entry = d->arch.free_shadow_frames.next;
- page = list_entry(entry, struct page_info, list);
- list_del(entry);
- perfc_decr(free_l1_pages);
- }
- else
- {
- if ( SH_L1_HAS_NEXT_PAGE &&
- d->arch.ops->guest_paging_levels == PAGING_L2)
- {
-#if CONFIG_PAGING_LEVELS >= 3
- /*
- * For 32-bit HVM guest, 2 shadow L1s are required to
- * simulate 1 guest L1 So need allocate 2 shadow L1
- * pages each time.
- *
- * --> Need to avoidalloc_domheap_pages.
- */
- page = alloc_domheap_pages(NULL, SL1_ORDER, 0);
- if (!page)
- goto no_shadow_page;
-
- l1 = map_domain_page(page_to_mfn(page));
- memset(l1, 0, PAGE_SIZE);
- unmap_domain_page(l1);
-
- l1 = map_domain_page(page_to_mfn(page + 1));
- memset(l1, 0, PAGE_SIZE);
- unmap_domain_page(l1);
-
- /* we'd like to initialize the second continuous page here
- * and leave the first page initialization later */
-
- shadow_page_info_init(page+1, gmfn, psh_type);
-#else
- page = alloc_domheap_page(NULL);
- if (!page)
- goto no_shadow_page;
-
- l1 = map_domain_page(page_to_mfn(page));
- memset(l1, 0, PAGE_SIZE);
- unmap_domain_page(l1);
-#endif
- }
- else
- {
- page = alloc_domheap_page(NULL);
- if (!page)
- goto no_shadow_page;
-
- l1 = map_domain_page(page_to_mfn(page));
- memset(l1, 0, PAGE_SIZE);
- unmap_domain_page(l1);
- }
- }
- }
- else {
-#if CONFIG_PAGING_LEVELS == 2
- page = alloc_domheap_page(NULL);
-#elif CONFIG_PAGING_LEVELS >= 3
- if ( d->arch.ops->guest_paging_levels == PAGING_L2 &&
- psh_type == PGT_l4_shadow ) /* allocated for PAE PDP page */
- page = alloc_domheap_pages(NULL, 0, MEMF_dma);
- else if ( d->arch.ops->guest_paging_levels == PAGING_L3 &&
- (psh_type == PGT_l3_shadow || psh_type == PGT_l4_shadow) )
- page = alloc_domheap_pages(NULL, 0, MEMF_dma); /* allocated for PAE PDP page */
- else
- page = alloc_domheap_page(NULL);
-#endif
- if (!page)
- goto no_shadow_page;
-
- lp = map_domain_page(page_to_mfn(page));
- memset(lp, 0, PAGE_SIZE);
- unmap_domain_page(lp);
- }
-
- smfn = page_to_mfn(page);
-
- shadow_page_info_init(page, gmfn, psh_type);
-
- switch ( psh_type )
- {
- case PGT_l1_shadow:
- if ( !shadow_promote(d, gpfn, gmfn, psh_type) )
- goto fail;
- perfc_incr(shadow_l1_pages);
- d->arch.shadow_page_count++;
- break;
-
- case PGT_l2_shadow:
- if ( !shadow_promote(d, gpfn, gmfn, psh_type) )
- goto fail;
- perfc_incr(shadow_l2_pages);
- d->arch.shadow_page_count++;
- if ( PGT_l2_page_table == PGT_root_page_table )
- pin = 1;
-
- break;
-
- case PGT_l3_shadow:
- if ( !shadow_promote(d, gpfn, gmfn, psh_type) )
- goto fail;
- perfc_incr(shadow_l3_pages);
- d->arch.shadow_page_count++;
- if ( PGT_l3_page_table == PGT_root_page_table )
- pin = 1;
- break;
-
- case PGT_l4_shadow:
- real_gpfn = gpfn & PGT_mfn_mask;
- if ( !shadow_promote(d, real_gpfn, gmfn, psh_type) )
- goto fail;
- perfc_incr(shadow_l4_pages);
- d->arch.shadow_page_count++;
- if ( PGT_l4_page_table == PGT_root_page_table )
- pin = 1;
-#if CONFIG_PAGING_LEVELS == 3 & defined (GUEST_PGENTRY_32)
- /*
- * We use PGT_l4_shadow for 2-level paging guests on PAE
- */
- if ( d->arch.ops->guest_paging_levels == PAGING_L2 )
- pin = 1;
-#endif
-
-#if CONFIG_PAGING_LEVELS == 3 & defined ( GUEST_32PAE )
- /*
- * We use PGT_l4_shadow for 2-level paging guests on PAE
- */
- if ( d->arch.ops->guest_paging_levels == PAGING_L3 )
- pin = 1;
-#endif
- if ( d->arch.ops->guest_paging_levels == PAGING_L3 )
- index = get_cr3_idxval(current);
- break;
-
-#if CONFIG_PAGING_LEVELS >= 3
- case PGT_fl1_shadow:
- perfc_incr(shadow_l1_pages);
- d->arch.shadow_page_count++;
- break;
-#else
-
- case PGT_hl2_shadow:
- // Treat an hl2 as an L1 for purposes of promotion.
- // For external mode domains, treat them as an L2 for purposes of
- // pinning.
- //
- if ( !shadow_promote(d, gpfn, gmfn, PGT_l1_shadow) )
- goto fail;
- perfc_incr(hl2_table_pages);
- d->arch.hl2_page_count++;
- if ( shadow_mode_external(d) &&
- (PGT_l2_page_table == PGT_root_page_table) )
- pin = 1;
-
- break;
-#endif
- case PGT_snapshot:
- perfc_incr(snapshot_pages);
- d->arch.snapshot_page_count++;
- break;
-
- default:
- printk("Alloc shadow weird page type type=%08x\n", psh_type);
- BUG();
- break;
- }
-
- // Don't add a new shadow of something that already has a snapshot.
- //
- ASSERT( (psh_type == PGT_snapshot) || !mfn_out_of_sync(gmfn) );
-
- set_shadow_status(d, gpfn, gmfn, smfn, psh_type, index);
-
- if ( pin )
- shadow_pin(smfn);
-
- return smfn;
-
-fail:
- FSH_LOG("promotion of pfn=%lx mfn=%lx failed! external gnttab refs?",
- gpfn, gmfn);
- if (psh_type == PGT_l1_shadow)
- {
- if (d->arch.ops->guest_paging_levels == PAGING_L2)
- {
-#if CONFIG_PAGING_LEVELS >=3
- free_domheap_pages(page, SL1_ORDER);
-#else
- free_domheap_page(page);
-#endif
- }
- else
- free_domheap_page(page);
- }
- else
- free_domheap_page(page);
-
- return 0;
-
-no_shadow_page:
- ASSERT(page == NULL);
- printk("Couldn't alloc shadow page! dom%d count=%d\n",
- d->domain_id, d->arch.shadow_page_count);
- printk("Shadow table counts: l1=%d l2=%d hl2=%d snapshot=%d\n",
- perfc_value(shadow_l1_pages),
- perfc_value(shadow_l2_pages),
- perfc_value(hl2_table_pages),
- perfc_value(snapshot_pages));
- /* XXX FIXME: try a shadow flush to free up some memory. */
- domain_crash_synchronous();
-
- return 0;
-}
-
-#if CONFIG_PAGING_LEVELS == 2
-static unsigned long
-shadow_hl2_table(struct domain *d, unsigned long gpfn, unsigned long gmfn,
- unsigned long smfn)
-{
- unsigned long hl2mfn;
- l1_pgentry_t *hl2;
- int limit;
-
- ASSERT(PGT_base_page_table == PGT_l2_page_table);
-
- if ( unlikely(!(hl2mfn = alloc_shadow_page(d, gpfn, gmfn, PGT_hl2_shadow))) )
- {
- printk("Couldn't alloc an HL2 shadow for pfn=%lx mfn=%lx\n",
- gpfn, gmfn);
- BUG(); /* XXX Deal gracefully with failure. */
- }
-
- SH_VVLOG("shadow_hl2_table(gpfn=%lx, gmfn=%lx, smfn=%lx) => %lx",
- gpfn, gmfn, smfn, hl2mfn);
- perfc_incrc(shadow_hl2_table_count);
-
- hl2 = map_domain_page(hl2mfn);
-
- if ( shadow_mode_external(d) )
- limit = L2_PAGETABLE_ENTRIES;
- else
- limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
-
- memset(hl2, 0, limit * sizeof(l1_pgentry_t));
-
- if ( !shadow_mode_external(d) )
- {
- memset(&hl2[DOMAIN_ENTRIES_PER_L2_PAGETABLE], 0,
- HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
-
- // Setup easy access to the GL2, SL2, and HL2 frames.
- //
- hl2[l2_table_offset(LINEAR_PT_VIRT_START)] =
- l1e_from_pfn(gmfn, __PAGE_HYPERVISOR);
- hl2[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
- l1e_from_pfn(smfn, __PAGE_HYPERVISOR);
- hl2[l2_table_offset(PERDOMAIN_VIRT_START)] =
- l1e_from_pfn(hl2mfn, __PAGE_HYPERVISOR);
- }
-
- unmap_domain_page(hl2);
-
- return hl2mfn;
-}
-
-/*
- * This could take and use a snapshot, and validate the entire page at
- * once, or it could continue to fault in entries one at a time...
- * Might be worth investigating...
- */
-static unsigned long shadow_l2_table(
- struct vcpu *v, unsigned long gpfn, unsigned long gmfn)
-{
- unsigned long smfn;
- l2_pgentry_t *spl2e;
- struct domain *d = v->domain;
- int i;
-
- SH_VVLOG("shadow_l2_table(gpfn=%lx, gmfn=%lx)", gpfn, gmfn);
-
- perfc_incrc(shadow_l2_table_count);
-
- if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l2_shadow))) )
- {
- printk("Couldn't alloc an L2 shadow for pfn=%lx mfn=%lx\n",
- gpfn, gmfn);
- BUG(); /* XXX Deal gracefully with failure. */
- }
-
- spl2e = (l2_pgentry_t *)map_domain_page(smfn);
-
- /* Install hypervisor and 2x linear p.t. mapings. */
- if ( (PGT_base_page_table == PGT_l2_page_table) &&
- !shadow_mode_external(d) )
- {
- /*
- * We could proactively fill in PDEs for pages that are already
- * shadowed *and* where the guest PDE has _PAGE_ACCESSED set
- * (restriction required for coherence of the accessed bit). However,
- * we tried it and it didn't help performance. This is simpler.
- */
- memset(spl2e, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE*sizeof(l2_pgentry_t));
-
- /* Install hypervisor and 2x linear p.t. mapings. */
- memcpy(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
- &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
- HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
-
- spl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
- l2e_from_pfn(smfn, __PAGE_HYPERVISOR);
-
- for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
- spl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
- l2e_from_page(virt_to_page(page_get_owner(mfn_to_page(gmfn))->
- arch.mm_perdomain_pt) + i,
- __PAGE_HYPERVISOR);
-
- if ( shadow_mode_translate(d) ) // NB: not external
- {
- unsigned long hl2mfn;
-
- spl2e[l2_table_offset(RO_MPT_VIRT_START)] =
- l2e_from_paddr(pagetable_get_paddr(d->arch.phys_table),
- __PAGE_HYPERVISOR);
-
- if ( unlikely(!(hl2mfn = __shadow_status(d, gpfn, PGT_hl2_shadow))) )
- hl2mfn = shadow_hl2_table(d, gpfn, gmfn, smfn);
-
- // shadow_mode_translate (but not external) sl2 tables hold a
- // ref to their hl2.
- //
- if ( !get_shadow_ref(hl2mfn) )
- BUG();
-
- spl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
- l2e_from_pfn(hl2mfn, __PAGE_HYPERVISOR);
- }
- else
- spl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
- l2e_from_pfn(gmfn, __PAGE_HYPERVISOR);
- }
- else
- {
- memset(spl2e, 0, L2_PAGETABLE_ENTRIES*sizeof(l2_pgentry_t));
- }
-
- unmap_domain_page(spl2e);
-
- SH_VLOG("shadow_l2_table(%lx -> %lx)", gmfn, smfn);
- return smfn;
-}
-#endif /* CONFIG_PAGING_LEVELS == 2 */
-
-static void shadow_map_l1_into_current_l2(unsigned long va)
-{
- struct vcpu *v = current;
- struct domain *d = v->domain;
- l1_pgentry_t *spl1e, *spl1e_next = 0;
- l2_pgentry_t sl2e;
- guest_l1_pgentry_t *gpl1e;
- guest_l2_pgentry_t gl2e = {0};
- unsigned long gl1pfn, gl1mfn, sl1mfn;
- int i, init_table = 0;
-
- __guest_get_l2e(v, va, &gl2e);
- ASSERT(guest_l2e_get_flags(gl2e) & _PAGE_PRESENT);
- gl1pfn = l2e_get_pfn(gl2e);
-
- if ( !(sl1mfn = __shadow_status(d, gl1pfn, PGT_l1_shadow)) )
- {
- /* This L1 is NOT already shadowed so we need to shadow it. */
- SH_VVLOG("4a: l1 not shadowed");
-
- gl1mfn = gmfn_to_mfn(d, gl1pfn);
- if ( unlikely(!VALID_MFN(gl1mfn)) )
- {
- // Attempt to use an invalid pfn as an L1 page.
- // XXX this needs to be more graceful!
- BUG();
- }
-
- if ( unlikely(!(sl1mfn =
- alloc_shadow_page(d, gl1pfn, gl1mfn, PGT_l1_shadow))) )
- {
- printk("Couldn't alloc an L1 shadow for pfn=%lx mfn=%lx\n",
- gl1pfn, gl1mfn);
- BUG(); /* XXX Need to deal gracefully with failure. */
- }
-
- perfc_incrc(shadow_l1_table_count);
- init_table = 1;
- }
- else
- {
- /* This L1 is shadowed already, but the L2 entry is missing. */
- SH_VVLOG("4b: was shadowed, l2 missing (%lx)", sl1mfn);
- }
-
-#ifndef NDEBUG
- {
- l2_pgentry_t old_sl2e;
- __shadow_get_l2e(v, va, &old_sl2e);
- ASSERT(!(l2e_get_flags(old_sl2e) & _PAGE_PRESENT));
- }
-#endif
-
-#if CONFIG_PAGING_LEVELS >= 3
- if ( SH_L1_HAS_NEXT_PAGE &&
- d->arch.ops->guest_paging_levels == PAGING_L2 )
- {
- /* for 32-bit HVM guest on 64-bit or PAE host,
- * need update two L2 entries each time
- */
- if ( !get_shadow_ref(sl1mfn))
- BUG();
- l2pde_general(d, &gl2e, &sl2e, sl1mfn);
- __guest_set_l2e(v, va, &gl2e);
- __shadow_set_l2e(v, va & ~((1<<L2_PAGETABLE_SHIFT_32) - 1), &sl2e);
- if ( !get_shadow_ref(sl1mfn+1))
- BUG();
- sl2e = l2e_empty();
- l2pde_general(d, &gl2e, &sl2e, sl1mfn+1);
- __shadow_set_l2e(v,((va & ~((1<<L2_PAGETABLE_SHIFT_32) - 1)) + (1 << L2_PAGETABLE_SHIFT)) , &sl2e);
- } else
-#endif
- {
- if ( !get_shadow_ref(sl1mfn) )
- BUG();
- l2pde_general(d, &gl2e, &sl2e, sl1mfn);
- __guest_set_l2e(v, va, &gl2e);
- __shadow_set_l2e(v, va , &sl2e);
- }
-
- if ( init_table )
- {
- l1_pgentry_t sl1e;
- int index = guest_l1_table_offset(va);
- int min = 1, max = 0;
-
- unsigned long tmp_gmfn;
- l2_pgentry_t tmp_sl2e = {0};
- guest_l2_pgentry_t tmp_gl2e = {0};
-
- __guest_get_l2e(v, va, &tmp_gl2e);
- tmp_gmfn = gmfn_to_mfn(d, l2e_get_pfn(tmp_gl2e));
- gpl1e = (guest_l1_pgentry_t *) map_domain_page(tmp_gmfn);
-
- /* If the PGT_l1_shadow has two contiguous pages */
-#if CONFIG_PAGING_LEVELS >= 3
- if ( SH_L1_HAS_NEXT_PAGE &&
- d->arch.ops->guest_paging_levels == PAGING_L2 )
- __shadow_get_l2e(v, va & ~((1UL << L2_PAGETABLE_SHIFT_32) - 1), &tmp_sl2e);
- else
-#endif
- __shadow_get_l2e(v, va, &tmp_sl2e);
-
- spl1e = (l1_pgentry_t *) map_domain_page(l2e_get_pfn(tmp_sl2e));
-
- if ( SH_L1_HAS_NEXT_PAGE )
- spl1e_next = (l1_pgentry_t *) map_domain_page(
- (l2e_get_pfn(tmp_sl2e) + 1UL));
-
- for ( i = 0; i < GUEST_L1_PAGETABLE_ENTRIES; i++ )
- {
- l1pte_propagate_from_guest(d, gpl1e[i], &sl1e);
- if ( (l1e_get_flags(sl1e) & _PAGE_PRESENT) &&
- unlikely(!shadow_get_page_from_l1e(sl1e, d)) )
- sl1e = l1e_empty();
- if ( l1e_get_flags(sl1e) == 0 )
- {
- // First copy entries from 0 until first invalid.
- // Then copy entries from index until first invalid.
- //
- if ( i < index ) {
- i = index - 1;
- continue;
- }
- break;
- }
-
- if ( SH_L1_HAS_NEXT_PAGE && i >= L1_PAGETABLE_ENTRIES )
- spl1e_next[i - L1_PAGETABLE_ENTRIES] = sl1e;
- else
- spl1e[i] = sl1e;
-
- if ( unlikely(i < min) )
- min = i;
- if ( likely(i > max) )
- max = i;
- set_guest_back_ptr(d, sl1e, sl1mfn, i);
- }
-
- mfn_to_page(sl1mfn)->tlbflush_timestamp =
- SHADOW_ENCODE_MIN_MAX(min, max);
-
- unmap_domain_page(gpl1e);
- unmap_domain_page(spl1e);
-
- if ( SH_L1_HAS_NEXT_PAGE )
- unmap_domain_page(spl1e_next);
- }
-}
-
-#if CONFIG_PAGING_LEVELS == 2
-static void
-shadow_set_l1e(unsigned long va, l1_pgentry_t new_spte, int create_l1_shadow)
-{
- struct vcpu *v = current;
- struct domain *d = v->domain;
- l2_pgentry_t sl2e = {0};
-
- __shadow_get_l2e(v, va, &sl2e);
- if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) )
- {
- /*
- * Either the L1 is not shadowed, or the shadow isn't linked into
- * the current shadow L2.
- */
- if ( create_l1_shadow )
- {
- perfc_incrc(shadow_set_l1e_force_map);
- shadow_map_l1_into_current_l2(va);
- }
- else /* check to see if it exists; if so, link it in */
- {
- l2_pgentry_t gpde = {0};
- unsigned long gl1pfn;
- unsigned long sl1mfn;
-
- __guest_get_l2e(v, va, &gpde);
-
- if ( l2e_get_flags(gpde) & _PAGE_PRESENT )
- {
- gl1pfn = l2e_get_pfn(gpde);
- sl1mfn = __shadow_status(d, gl1pfn, PGT_l1_shadow);
- }
- else
- {
- // no shadow exists, so there's nothing to do.
- perfc_incrc(shadow_set_l1e_fail);
- return;
- }
-
- if ( sl1mfn )
- {
- perfc_incrc(shadow_set_l1e_unlinked);
- if ( !get_shadow_ref(sl1mfn) )
- BUG();
- l2pde_general(d, (guest_l2_pgentry_t *)&gpde, &sl2e, sl1mfn);
- __guest_set_l2e(v, va, &gpde);
- __shadow_set_l2e(v, va, &sl2e);
- }
- else
- {
- // no shadow exists, so there's nothing to do.
- perfc_incrc(shadow_set_l1e_fail);
- return;
- }
- }
- }
-
- __shadow_get_l2e(v, va, &sl2e);
-
- if ( shadow_mode_refcounts(d) )
- {
- l1_pgentry_t old_spte;
- __shadow_get_l1e(v, va, &old_spte);
-
- // only do the ref counting if something important changed.
- //
- if ( l1e_has_changed(old_spte, new_spte, _PAGE_RW | _PAGE_PRESENT) )
- {
- if ( (l1e_get_flags(new_spte) & _PAGE_PRESENT) &&
- !shadow_get_page_from_l1e(new_spte, d) )
- new_spte = l1e_empty();
- if ( l1e_get_flags(old_spte) & _PAGE_PRESENT )
- shadow_put_page_from_l1e(old_spte, d);
- }
- }
-
- set_guest_back_ptr(d, new_spte, l2e_get_pfn(sl2e), l1_table_offset(va));
- __shadow_set_l1e(v, va, &new_spte);
- shadow_update_min_max(l2e_get_pfn(sl2e), l1_table_offset(va));
-}
-
-static void shadow_invlpg_32(struct vcpu *v, unsigned long va)
-{
- struct domain *d = v->domain;
- l1_pgentry_t gpte, spte;
-
- ASSERT(shadow_mode_enabled(d));
-
- shadow_lock(d);
-
- __shadow_sync_va(v, va);
-
- // XXX mafetter: will need to think about 4MB pages...
-
- // It's not strictly necessary to update the shadow here,
- // but it might save a fault later.
- //
- /*if (__copy_from_user(&gpte, &linear_pg_table[va >> PAGE_SHIFT],
- sizeof(gpte))) {*/
- if (unlikely(!__guest_get_l1e(v, va, &gpte))) {
- perfc_incrc(shadow_invlpg_faults);
- shadow_unlock(d);
- return;
- }
- l1pte_propagate_from_guest(d, gpte, &spte);
- shadow_set_l1e(va, spte, 1);
-
- shadow_unlock(d);
-}
-#endif /* CONFIG_PAGING_LEVELS == 2 */
-
-#if CONFIG_PAGING_LEVELS >= 3
-static void shadow_set_l1e_64(
- unsigned long va, pgentry_64_t *sl1e_p,
- int create_l1_shadow)
-{
- struct vcpu *v = current;
- struct domain *d = v->domain;
- pgentry_64_t sle = { 0 };
- pgentry_64_t sle_up = {0};
- l1_pgentry_t old_spte;
- l1_pgentry_t sl1e = *(l1_pgentry_t *)sl1e_p;
- int i;
- unsigned long orig_va = 0;
-
- if ( d->arch.ops->guest_paging_levels == PAGING_L2 )
- {
- /* This is for 32-bit VMX guest on 64-bit host */
- orig_va = va;
- va = va & (~((1<<L2_PAGETABLE_SHIFT_32)-1));
- }
-
- for ( i = PAGING_L4; i >= PAGING_L2; i-- )
- {
- if ( !__rw_entry(v, va, &sle, SHADOW_ENTRY | GET_ENTRY | i) )
- {
- sl1e = l1e_empty();
- goto out;
- }
- if ( !(entry_get_flags(sle) & _PAGE_PRESENT) )
- {
- if ( create_l1_shadow )
- {
- perfc_incrc(shadow_set_l3e_force_map);
- shadow_map_into_current(v, va, i-1, i);
- __rw_entry(v, va, &sle, SHADOW_ENTRY | GET_ENTRY | i);
- }
- }
- if ( d->arch.ops->guest_paging_levels == PAGING_L3 )
- {
- if ( i < PAGING_L3 )
- shadow_update_min_max(entry_get_pfn(sle_up), table_offset_64(va, i));
- }
- else
- {
- if ( i < PAGING_L4 )
- shadow_update_min_max(entry_get_pfn(sle_up), table_offset_64(va, i));
- }
-
- sle_up = sle;
- }
-
- if ( d->arch.ops->guest_paging_levels == PAGING_L2 )
- {
- va = orig_va;
- }
-
- if ( shadow_mode_refcounts(d) )
- {
- __shadow_get_l1e(v, va, &old_spte);
- if ( l1e_has_changed(old_spte, sl1e, _PAGE_RW | _PAGE_PRESENT) )
- {
- if ( (l1e_get_flags(sl1e) & _PAGE_PRESENT) &&
- !shadow_get_page_from_l1e(sl1e, d) )
- sl1e = l1e_empty();
- if ( l1e_get_flags(old_spte) & _PAGE_PRESENT )
- put_page_from_l1e(old_spte, d);
- }
- }
-
-out:
- __shadow_set_l1e(v, va, &sl1e);
-
- shadow_update_min_max(entry_get_pfn(sle_up), guest_l1_table_offset(va));
-}
-#endif /* CONFIG_PAGING_LEVELS >= 3 */
-
-static struct out_of_sync_entry *
-shadow_alloc_oos_entry(struct domain *d)
-{
- struct out_of_sync_entry *f, *extra;
- unsigned size, i;
-
- if ( unlikely(d->arch.out_of_sync_free == NULL) )
- {
- FSH_LOG("Allocate more fullshadow tuple blocks.");
-
- size = sizeof(void *) + (out_of_sync_extra_size * sizeof(*f));
- extra = xmalloc_bytes(size);
-
- /* XXX Should be more graceful here. */
- if ( extra == NULL )
- BUG();
-
- memset(extra, 0, size);
-
- /* Record the allocation block so it can be correctly freed later. */
- d->arch.out_of_sync_extras_count++;
- *((struct out_of_sync_entry **)&extra[out_of_sync_extra_size]) =
- d->arch.out_of_sync_extras;
- d->arch.out_of_sync_extras = &extra[0];
-
- /* Thread a free chain through the newly-allocated nodes. */
- for ( i = 0; i < (out_of_sync_extra_size - 1); i++ )
- extra[i].next = &extra[i+1];
- extra[i].next = NULL;
-
- /* Add the new nodes to the free list. */
- d->arch.out_of_sync_free = &extra[0];
- }
-
- /* Allocate a new node from the quicklist. */
- f = d->arch.out_of_sync_free;
- d->arch.out_of_sync_free = f->next;
-
- return f;
-}
-
-static inline unsigned long
-shadow_make_snapshot(
- struct domain *d, unsigned long gpfn, unsigned long gmfn)
-{
- unsigned long smfn, sl1mfn = 0;
- void *original, *snapshot;
- u32 min_max = 0;
- int min, max, length;
-
- if ( test_and_set_bit(_PGC_out_of_sync, &mfn_to_page(gmfn)->count_info) )
- {
- ASSERT(__shadow_status(d, gpfn, PGT_snapshot));
- return SHADOW_SNAPSHOT_ELSEWHERE;
- }
-
- perfc_incrc(shadow_make_snapshot);
-
- if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_snapshot))) )
- {
- printk("Couldn't alloc fullshadow snapshot for pfn=%lx mfn=%lx!\n"
- "Dom%d snapshot_count_count=%d\n",
- gpfn, gmfn, d->domain_id, d->arch.snapshot_page_count);
- BUG(); /* XXX FIXME: try a shadow flush to free up some memory. */
- }
-
- if ( !get_shadow_ref(smfn) )
- BUG();
-
- if ( shadow_mode_refcounts(d) &&
- (shadow_max_pgtable_type(d, gpfn, &sl1mfn) == PGT_l1_shadow) )
- min_max = mfn_to_page(sl1mfn)->tlbflush_timestamp;
- mfn_to_page(smfn)->tlbflush_timestamp = min_max;
-
- min = SHADOW_MIN(min_max);
- max = SHADOW_MAX(min_max);
- length = max - min + 1;
- perfc_incr_histo(snapshot_copies, length, PT_UPDATES);
-
- min *= sizeof(guest_l1_pgentry_t);
- length *= sizeof(guest_l1_pgentry_t);
-
- original = map_domain_page(gmfn);
- snapshot = map_domain_page(smfn);
- memcpy(snapshot + min, original + min, length);
- unmap_domain_page(original);
- unmap_domain_page(snapshot);
-
- return smfn;
-}
-
-static struct out_of_sync_entry *
-__mark_mfn_out_of_sync(struct vcpu *v, unsigned long gpfn,
- unsigned long mfn)
-{
- struct domain *d = v->domain;
- struct page_info *page = mfn_to_page(mfn);
- struct out_of_sync_entry *entry = shadow_alloc_oos_entry(d);
-
- ASSERT(shadow_lock_is_acquired(d));
- ASSERT(mfn_valid(mfn));
-
-#ifndef NDEBUG
- {
- u32 type = page->u.inuse.type_info & PGT_type_mask;
- if ( shadow_mode_refcounts(d) )
- {
- ASSERT(type == PGT_writable_page);
- }
- else
- {
- ASSERT(type && (type < PGT_l4_page_table));
- }
- }
-#endif
-
- FSH_LOG("%s(gpfn=%lx, mfn=%lx) c=%08x t=%08x", __func__,
- gpfn, mfn, page->count_info, page->u.inuse.type_info);
-
- // XXX this will require some more thought... Cross-domain sharing and
- // modification of page tables? Hmm...
- //
- if ( d != page_get_owner(page) )
- BUG();
-
- perfc_incrc(shadow_mark_mfn_out_of_sync_calls);
-
- entry->v = v;
- entry->gpfn = gpfn;
- entry->gmfn = mfn;
- entry->writable_pl1e = -1;
-
-#if 0 // this code has not been updated for 32pae & 64 bit modes
-#if SHADOW_DEBUG
- mark_shadows_as_reflecting_snapshot(d, gpfn);
-#endif
-#endif
-
- // increment guest's ref count to represent the entry in the
- // full shadow out-of-sync list.
- //
- get_page(page, d);
-
- return entry;
-}
-
-static struct out_of_sync_entry *
-mark_mfn_out_of_sync(struct vcpu *v, unsigned long gpfn,
- unsigned long mfn)
-{
- struct out_of_sync_entry *entry =
- __mark_mfn_out_of_sync(v, gpfn, mfn);
- struct domain *d = v->domain;
-
- entry->snapshot_mfn = shadow_make_snapshot(d, gpfn, mfn);
- // Add to the out-of-sync list
- //
- entry->next = d->arch.out_of_sync;
- d->arch.out_of_sync = entry;
-
- return entry;
-
-}
-
-static void shadow_mark_va_out_of_sync(
- struct vcpu *v, unsigned long gpfn, unsigned long mfn, unsigned long va)
-{
- struct out_of_sync_entry *entry =
- __mark_mfn_out_of_sync(v, gpfn, mfn);
- l2_pgentry_t sl2e;
- struct domain *d = v->domain;
-
-#if CONFIG_PAGING_LEVELS >= 3
- {
- l4_pgentry_t sl4e;
- l3_pgentry_t sl3e;
-
- __shadow_get_l4e(v, va, &sl4e);
- if ( !(l4e_get_flags(sl4e) & _PAGE_PRESENT)) {
- shadow_map_into_current(v, va, PAGING_L3, PAGING_L4);
- }
-
- if (!__shadow_get_l3e(v, va, &sl3e)) {
- BUG();
- }
-
- if ( !(l3e_get_flags(sl3e) & _PAGE_PRESENT)) {
- shadow_map_into_current(v, va, PAGING_L2, PAGING_L3);
- }
- }
-#endif
-
- // We need the address of shadow PTE that maps @va.
- // It might not exist yet. Make sure it's there.
- //
- __shadow_get_l2e(v, va, &sl2e);
- if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) )
- {
- // either this L1 isn't shadowed yet, or the shadow isn't linked into
- // the current L2.
- shadow_map_l1_into_current_l2(va);
- __shadow_get_l2e(v, va, &sl2e);
- }
- ASSERT(l2e_get_flags(sl2e) & _PAGE_PRESENT);
-
- entry->snapshot_mfn = shadow_make_snapshot(d, gpfn, mfn);
- // NB: this is stored as a machine address.
- entry->writable_pl1e =
- l2e_get_paddr(sl2e) | (sizeof(l1_pgentry_t) * l1_table_offset(va));
- ASSERT( !(entry->writable_pl1e & (sizeof(l1_pgentry_t)-1)) );
- entry->va = va;
-
- // Increment shadow's page count to represent the reference
- // inherent in entry->writable_pl1e
- //
- if ( !get_shadow_ref(l2e_get_pfn(sl2e)) )
- BUG();
-
- // Add to the out-of-sync list
- //
- entry->next = d->arch.out_of_sync;
- d->arch.out_of_sync = entry;
-
- FSH_LOG("%s(va=%lx -> writable_pl1e=%lx)",
- __func__, va, entry->writable_pl1e);
-}
-
-/*
- * Returns 1 if the snapshot for @gmfn exists and its @index'th entry matches.
- * Returns 0 otherwise.
- */
-static int snapshot_entry_matches(
- struct domain *d, guest_l1_pgentry_t *guest_pt,
- unsigned long gpfn, unsigned index)
-{
- unsigned long smfn = __shadow_status(d, gpfn, PGT_snapshot);
- guest_l1_pgentry_t *snapshot, gpte; // could be L1s or L2s or ...
- int entries_match;
-
- perfc_incrc(snapshot_entry_matches_calls);
-
- if ( !smfn )
- return 0;
-
- snapshot = map_domain_page(smfn);
-
- if (__copy_from_user(&gpte, &guest_pt[index],
- sizeof(gpte)))
- {
- unmap_domain_page(snapshot);
- return 0;
- }
-
- // This could probably be smarter, but this is sufficent for
- // our current needs.
- //
- entries_match = !guest_l1e_has_changed(gpte, snapshot[index],
- PAGE_FLAG_MASK);
-
- unmap_domain_page(snapshot);
-
-#ifdef PERF_COUNTERS
- if ( entries_match )
- perfc_incrc(snapshot_entry_matches_true);
-#endif
-
- return entries_match;
-}
-
-/*
- * Returns 1 if va's shadow mapping is out-of-sync.
- * Returns 0 otherwise.
- */
-static int is_out_of_sync(struct vcpu *v, unsigned long va) /* __shadow_out_of_sync */
-{
- struct domain *d = v->domain;
-#if CONFIG_PAGING_LEVELS == 4
- unsigned long l2mfn = ((v->arch.flags & TF_kernel_mode)?
- pagetable_get_pfn(v->arch.guest_table) :
- pagetable_get_pfn(v->arch.guest_table_user));
-#else
- unsigned long l2mfn = pagetable_get_pfn(v->arch.guest_table);
-#endif
- unsigned long l2pfn = mfn_to_gmfn(d, l2mfn);
- guest_l2_pgentry_t l2e;
- unsigned long l1pfn, l1mfn;
- guest_l1_pgentry_t *guest_pt;
-
- ASSERT(shadow_lock_is_acquired(d));
- ASSERT(VALID_M2P(l2pfn));
-
- perfc_incrc(shadow_out_of_sync_calls);
-
-#if CONFIG_PAGING_LEVELS >= 3
-
-#define unmap_and_return(x) \
- if ( guest_pt != (guest_l1_pgentry_t *) v->arch.guest_vtable ) \
- unmap_domain_page(guest_pt); \
- return (x);
-
- if (d->arch.ops->guest_paging_levels >= PAGING_L3)
- {
- pgentry_64_t le;
- unsigned long gmfn;
- unsigned long gpfn;
- int i;
- unsigned int base_idx = 0;
- base_idx = get_cr3_idxval(v);
-
- gmfn = l2mfn;
- gpfn = l2pfn;
- guest_pt = (guest_l1_pgentry_t *)v->arch.guest_vtable;
-
- for ( i = PAGING_L4; i >= PAGING_L3; i-- )
- {
- if (d->arch.ops->guest_paging_levels == PAGING_L3
- && i == PAGING_L4)
- continue; /* skip the top-level for 3-level */
-
- if ( page_out_of_sync(mfn_to_page(gmfn)) &&
- !snapshot_entry_matches(
- d, guest_pt, gpfn, guest_table_offset_64(va, i, base_idx)) )
- {
- unmap_and_return (1);
- }
-
- le = entry_empty();
- __rw_entry(v, va, &le, GUEST_ENTRY | GET_ENTRY | i);
-
- if ( !(entry_get_flags(le) & _PAGE_PRESENT) )
- {
- unmap_and_return (0);
- }
- gpfn = entry_get_pfn(le);
- gmfn = gmfn_to_mfn(d, gpfn);
- if ( !VALID_MFN(gmfn) )
- {
- unmap_and_return (0);
- }
- if ( guest_pt != (guest_l1_pgentry_t *)v->arch.guest_vtable )
- unmap_domain_page(guest_pt);
- guest_pt = (guest_l1_pgentry_t *)map_domain_page(gmfn);
- }
-
- /* L2 */
- if ( page_out_of_sync(mfn_to_page(gmfn)) &&
- !snapshot_entry_matches(d, guest_pt, gpfn, l2_table_offset(va)) )
- {
- unmap_and_return (1);
- }
-
- if ( guest_pt != (guest_l1_pgentry_t *)v->arch.guest_vtable )
- unmap_domain_page(guest_pt);
-
- }
- else
-#undef unmap_and_return
-#endif /* CONFIG_PAGING_LEVELS >= 3 */
- {
- if ( page_out_of_sync(mfn_to_page(l2mfn)) &&
- !snapshot_entry_matches(d, (guest_l1_pgentry_t *)v->arch.guest_vtable,
- l2pfn, guest_l2_table_offset(va)) )
- return 1;
- }
-
- __guest_get_l2e(v, va, &l2e);
- if ( !(guest_l2e_get_flags(l2e) & _PAGE_PRESENT) ||
- (guest_l2e_get_flags(l2e) & _PAGE_PSE))
- return 0;
-
- l1pfn = l2e_get_pfn(l2e);
- l1mfn = gmfn_to_mfn(d, l1pfn);
-
- // If the l1 pfn is invalid, it can't be out of sync...
- if ( !VALID_MFN(l1mfn) )
- return 0;
-
- guest_pt = (guest_l1_pgentry_t *) map_domain_page(l1mfn);
-
- if ( page_out_of_sync(mfn_to_page(l1mfn)) &&
- !snapshot_entry_matches(
- d, guest_pt, l1pfn, guest_l1_table_offset(va)) )
- {
- unmap_domain_page(guest_pt);
- return 1;
- }
-
- unmap_domain_page(guest_pt);
- return 0;
-}
-
-static int fix_entry(
- struct domain *d,
- l1_pgentry_t *pt, u32 *found, int is_l1_shadow, u32 max_refs_to_find)
-{
- l1_pgentry_t old = *pt;
- l1_pgentry_t new = old;
-
- l1e_remove_flags(new,_PAGE_RW);
- if ( is_l1_shadow && !shadow_get_page_from_l1e(new, d) )
- BUG();
- (*found)++;
- *pt = new;
- if ( is_l1_shadow )
- shadow_put_page_from_l1e(old, d);
-
- return (*found == max_refs_to_find);
-}
-
-static u32 remove_all_write_access_in_ptpage(
- struct domain *d, unsigned long pt_pfn, unsigned long pt_mfn,
- unsigned long readonly_gpfn, unsigned long readonly_gmfn,
- u32 max_refs_to_find, unsigned long prediction)
-{
- l1_pgentry_t *pt = map_domain_page(pt_mfn);
- l1_pgentry_t *pt_next = 0, *sl1e_p;
- l1_pgentry_t match;
- unsigned long flags = _PAGE_RW | _PAGE_PRESENT;
- int i;
- u32 found = 0;
- int is_l1_shadow =
- ((mfn_to_page(pt_mfn)->u.inuse.type_info & PGT_type_mask) ==
- PGT_l1_shadow);
-#if CONFIG_PAGING_LEVELS >= 3
- is_l1_shadow |=
- ((mfn_to_page(pt_mfn)->u.inuse.type_info & PGT_type_mask) ==
- PGT_fl1_shadow);
-#endif
-
- if ( SH_L1_HAS_NEXT_PAGE )
- pt_next = map_domain_page(pt_mfn + 1);
-
- match = l1e_from_pfn(readonly_gmfn, flags);
-
- if ( shadow_mode_external(d) )
- {
- i = (mfn_to_page(readonly_gmfn)->u.inuse.type_info & PGT_va_mask)
- >> PGT_va_shift;
-
- if ( SH_L1_HAS_NEXT_PAGE &&
- i >= L1_PAGETABLE_ENTRIES )
- sl1e_p = &pt_next[i - L1_PAGETABLE_ENTRIES];
- else
- sl1e_p = &pt[i];
-
- if ( (i >= 0 && i < GUEST_L1_PAGETABLE_ENTRIES) &&
- !l1e_has_changed(*sl1e_p, match, flags) &&
- fix_entry(d, sl1e_p, &found, is_l1_shadow, max_refs_to_find) &&
- !prediction )
- goto out;
- }
-
- for ( i = 0; i < GUEST_L1_PAGETABLE_ENTRIES; i++ )
- {
- if ( SH_L1_HAS_NEXT_PAGE &&
- i >= L1_PAGETABLE_ENTRIES )
- sl1e_p = &pt_next[i - L1_PAGETABLE_ENTRIES];
- else
- sl1e_p = &pt[i];
-
- if ( unlikely(!l1e_has_changed(*sl1e_p, match, flags)) &&
- fix_entry(d, sl1e_p, &found, is_l1_shadow, max_refs_to_find) )
- break;
- }
-
-out:
- unmap_domain_page(pt);
- if ( SH_L1_HAS_NEXT_PAGE )
- unmap_domain_page(pt_next);
-
- return found;
-}
-
-static int remove_all_write_access(
- struct domain *d, unsigned long readonly_gpfn, unsigned long readonly_gmfn)
-{
- int i;
- struct shadow_status *a;
- u32 found = 0, write_refs;
- unsigned long predicted_smfn;
-
- ASSERT(shadow_lock_is_acquired(d));
- ASSERT(VALID_MFN(readonly_gmfn));
-
- perfc_incrc(remove_write_access);
-
- // If it's not a writable page, then no writable refs can be outstanding.
- //
- if ( (mfn_to_page(readonly_gmfn)->u.inuse.type_info & PGT_type_mask) !=
- PGT_writable_page )
- {
- perfc_incrc(remove_write_not_writable);
- return 1;
- }
-
- // How many outstanding writable PTEs for this page are there?
- //
- write_refs =
- (mfn_to_page(readonly_gmfn)->u.inuse.type_info & PGT_count_mask);
- if ( write_refs && MFN_PINNED(readonly_gmfn) )
- {
- write_refs--;
- }
-
- if ( write_refs == 0 )
- {
- perfc_incrc(remove_write_no_work);
- return 1;
- }
-
- if ( shadow_mode_external(d) ) {
- if (--write_refs == 0)
- return 0;
-
- // Use the back pointer to locate the shadow page that can contain
- // the PTE of interest
- if ( (predicted_smfn = mfn_to_page(readonly_gmfn)->tlbflush_timestamp) ) {
- found += remove_all_write_access_in_ptpage(
- d, predicted_smfn, predicted_smfn, readonly_gpfn, readonly_gmfn, write_refs, 0);
- if ( found == write_refs )
- return 0;
- }
- }
-
- // Search all the shadow L1 page tables...
- //
- for (i = 0; i < shadow_ht_buckets; i++)
- {
- a = &d->arch.shadow_ht[i];
- while ( a && a->gpfn_and_flags )
- {
- if ( (a->gpfn_and_flags & PGT_type_mask) == PGT_l1_shadow
-#if CONFIG_PAGING_LEVELS >= 3
- || (a->gpfn_and_flags & PGT_type_mask) == PGT_fl1_shadow
-#endif
- )
-
- {
- found += remove_all_write_access_in_ptpage(d, a->gpfn_and_flags & PGT_mfn_mask, a->smfn, readonly_gpfn, readonly_gmfn, write_refs - found, a->gpfn_and_flags & PGT_mfn_mask);
- if ( found == write_refs )
- return 0;
- }
-
- a = a->next;
- }
- }
-
- FSH_LOG("%s: looking for %d refs, found %d refs",
- __func__, write_refs, found);
-
- return 0;
-}
-
-static void resync_pae_guest_l3(struct domain *d)
-{
- struct out_of_sync_entry *entry;
- unsigned long i, idx;
- unsigned long smfn, gmfn;
- pgentry_64_t *guest, *shadow_l3, *snapshot;
- struct vcpu *v = current;
- int max = -1;
- int unshadow = 0;
-
-
- ASSERT( shadow_mode_external(d) );
-
- gmfn = pagetable_get_pfn(v->arch.guest_table);
-
- for ( entry = d->arch.out_of_sync; entry; entry = entry->next )
- {
- if ( entry->snapshot_mfn == SHADOW_SNAPSHOT_ELSEWHERE )
- continue;
- if ( entry->gmfn != gmfn )
- continue;
-
- idx = get_cr3_idxval(v);
-
- smfn = __shadow_status(d, entry->gpfn, PGT_l4_shadow);
-
- if ( !smfn )
- continue;
-
- guest = (pgentry_64_t *)map_domain_page(entry->gmfn);
- snapshot = (pgentry_64_t *)map_domain_page(entry->snapshot_mfn);
- shadow_l3 = (pgentry_64_t *)map_domain_page(smfn);
-
- for ( i = 0; i < PAE_L3_PAGETABLE_ENTRIES; i++ )
- {
- int index = i + idx * PAE_L3_PAGETABLE_ENTRIES;
- if ( entry_has_changed(
- guest[index], snapshot[index], PAGE_FLAG_MASK) )
- {
- unsigned long gpfn;
-
- /*
- * Looks like it's no longer a page table.
- */
- if ( unlikely(entry_get_value(guest[index]) & PAE_PDPT_RESERVED) )
- {
- if ( entry_get_flags(shadow_l3[i]) & _PAGE_PRESENT )
- put_shadow_ref(entry_get_pfn(shadow_l3[i]));
-
- shadow_l3[i] = entry_empty();
- continue;
- }
-
- gpfn = entry_get_pfn(guest[index]);
-
- if ( unlikely(gpfn != (gpfn & PGT_mfn_mask)) )
- {
- if ( entry_get_flags(shadow_l3[i]) & _PAGE_PRESENT )
- put_shadow_ref(entry_get_pfn(shadow_l3[i]));
-
- shadow_l3[i] = entry_empty();
- continue;
- }
-
- validate_entry_change(d, &guest[index],
- &shadow_l3[i], PAGING_L3);
- }
-
- if ( entry_get_value(guest[index]) != 0 )
- max = i;
-
- if ( !(entry_get_flags(guest[index]) & _PAGE_PRESENT) &&
- unlikely(entry_get_value(guest[index]) != 0) &&
- !unshadow &&
- (frame_table[smfn].u.inuse.type_info & PGT_pinned) )
- unshadow = 1;
-
- }
- if ( max == -1 )
- unshadow = 1;
-
- unmap_domain_page(guest);
- unmap_domain_page(snapshot);
- unmap_domain_page(shadow_l3);
-
- if ( unlikely(unshadow) )
- shadow_unpin(smfn);
- break;
- }
-}
-
-static int resync_all(struct domain *d, u32 stype)
-{
- struct out_of_sync_entry *entry;
- unsigned i;
- unsigned long smfn;
- void *guest, *shadow, *snapshot;
- int need_flush = 0, external = shadow_mode_external(d);
- int unshadow;
- int changed;
- u32 min_max_shadow, min_max_snapshot;
- int min_shadow, max_shadow, min_snapshot, max_snapshot;
- struct vcpu *v;
-
- ASSERT(shadow_lock_is_acquired(d));
-
- for ( entry = d->arch.out_of_sync; entry; entry = entry->next)
- {
- int max = -1;
-
- if ( entry->snapshot_mfn == SHADOW_SNAPSHOT_ELSEWHERE )
- continue;
-
- smfn = __shadow_status(d, entry->gpfn, stype);
-
- if ( !smfn )
- {
- // For heavy weight shadows: no need to update refcounts if
- // there's no shadow page.
- //
- if ( shadow_mode_refcounts(d) )
- continue;
-
- // For light weight shadows: only need up resync the refcounts to
- // the new contents of the guest page iff this it has the right
- // page type.
- //
- if ( stype != ( mfn_to_page(entry->gmfn)->u.inuse.type_info & PGT_type_mask) )
- continue;
- }
-
- FSH_LOG("resyncing t=%08x gpfn=%lx gmfn=%lx smfn=%lx snapshot_mfn=%lx",
- stype, entry->gpfn, entry->gmfn, smfn, entry->snapshot_mfn);
-
- // Compare guest's new contents to its snapshot, validating
- // and updating its shadow as appropriate.
- //
- guest = map_domain_page(entry->gmfn);
- snapshot = map_domain_page(entry->snapshot_mfn);
-
- if ( smfn )
- shadow = map_domain_page(smfn);
- else
- shadow = NULL;
-
- unshadow = 0;
-
- min_max_shadow = mfn_to_page(smfn)->tlbflush_timestamp;
- min_shadow = SHADOW_MIN(min_max_shadow);
- max_shadow = SHADOW_MAX(min_max_shadow);
-
- min_max_snapshot= mfn_to_page(entry->snapshot_mfn)->tlbflush_timestamp;
- min_snapshot = SHADOW_MIN(min_max_snapshot);
- max_snapshot = SHADOW_MAX(min_max_snapshot);
-
- switch ( stype )
- {
- case PGT_l1_shadow:
- {
- guest_l1_pgentry_t *guest1 = guest;
- l1_pgentry_t *shadow1 = shadow;
- l1_pgentry_t *shadow1_next = 0, *sl1e_p;
- guest_l1_pgentry_t *snapshot1 = snapshot;
- int unshadow_l1 = 0;
-
- ASSERT(shadow_mode_write_l1(d) ||
- shadow_mode_write_all(d) || shadow_mode_wr_pt_pte(d));
-
- if ( !shadow_mode_refcounts(d) )
- revalidate_l1(d, (l1_pgentry_t *)guest1, (l1_pgentry_t *)snapshot1);
- if ( !smfn )
- break;
-
- changed = 0;
-
- if ( SH_L1_HAS_NEXT_PAGE && shadow1 )
- shadow1_next = map_domain_page(smfn + 1);
-
- for ( i = min_shadow; i <= max_shadow; i++ )
- {
-
- if ( SH_L1_HAS_NEXT_PAGE && i >= L1_PAGETABLE_ENTRIES )
- sl1e_p = &shadow1_next[i - L1_PAGETABLE_ENTRIES];
- else
- sl1e_p = &shadow1[i];
-
- if ( (i < min_snapshot) || (i > max_snapshot) ||
- guest_l1e_has_changed(guest1[i], snapshot1[i], PAGE_FLAG_MASK) )
- {
- int error;
-
-#if CONFIG_PAGING_LEVELS >= 3
- unsigned long gpfn;
-
- gpfn = guest_l1e_get_paddr(guest1[i]) >> PAGE_SHIFT;
-
- if ( unlikely(gpfn != (gpfn & PGT_mfn_mask)) )
- {
- guest_l1_pgentry_t tmp_gl1e = guest_l1e_empty();
- validate_pte_change(d, tmp_gl1e, sl1e_p);
- unshadow_l1 = 1;
- continue;
- }
-#endif
-
- error = validate_pte_change(d, guest1[i], sl1e_p);
- if ( error == -1 )
- unshadow_l1 = 1;
- else {
- need_flush |= error;
- if ( l1e_get_flags(*sl1e_p) & _PAGE_PRESENT )
- set_guest_back_ptr(d, *sl1e_p, smfn, i);
- }
- // can't update snapshots of linear page tables -- they
- // are used multiple times...
- //
- // snapshot[i] = new_pte;
-
- changed++;
- }
- }
-
- if ( shadow1_next )
- unmap_domain_page(shadow1_next);
-
- perfc_incrc(resync_l1);
- perfc_incr_histo(wpt_updates, changed, PT_UPDATES);
- perfc_incr_histo(l1_entries_checked, max_shadow - min_shadow + 1, PT_UPDATES);
-
- if ( d->arch.ops->guest_paging_levels >= PAGING_L3 &&
- unshadow_l1 ) {
- pgentry_64_t l2e = { 0 };
-
- __shadow_get_l2e(entry->v, entry->va, &l2e);
-
- if ( entry_get_flags(l2e) & _PAGE_PRESENT ) {
- put_shadow_ref(entry_get_pfn(l2e));
- l2e = entry_empty();
- __shadow_set_l2e(entry->v, entry->va, &l2e);
-
- if (entry->v == current)
- need_flush = 1;
- }
- }
-
- break;
- }
-#if CONFIG_PAGING_LEVELS == 2
- case PGT_l2_shadow:
- {
- l2_pgentry_t *guest2 = guest;
- l2_pgentry_t *shadow2 = shadow;
- l2_pgentry_t *snapshot2 = snapshot;
-
- ASSERT(shadow_mode_write_all(d) || shadow_mode_wr_pt_pte(d));
- BUG_ON(!shadow_mode_refcounts(d)); // not yet implemented
-
- changed = 0;
- for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
- {
- if ( !is_guest_l2_slot(0,i) && !external )
- continue;
-
- l2_pgentry_t new_pde = guest2[i];
- if ( l2e_has_changed(new_pde, snapshot2[i], PAGE_FLAG_MASK))
- {
- need_flush |= validate_pde_change(d, new_pde, &shadow2[i]);
-
- // can't update snapshots of linear page tables -- they
- // are used multiple times...
- //
- // snapshot[i] = new_pde;
-
- changed++;
- }
- if ( l2e_get_intpte(new_pde) != 0 ) /* FIXME: check flags? */
- max = i;
-
- // XXX - This hack works for linux guests.
- // Need a better solution long term.
- if ( !(l2e_get_flags(new_pde) & _PAGE_PRESENT) &&
- unlikely(l2e_get_intpte(new_pde) != 0) &&
- !unshadow && MFN_PINNED(smfn) )
- unshadow = 1;
- }
- if ( max == -1 )
- unshadow = 1;
- perfc_incrc(resync_l2);
- perfc_incr_histo(shm_l2_updates, changed, PT_UPDATES);
- break;
- }
- case PGT_hl2_shadow:
- {
- l2_pgentry_t *guest2 = guest;
- l2_pgentry_t *snapshot2 = snapshot;
- l1_pgentry_t *shadow2 = shadow;
-
- ASSERT(shadow_mode_write_all(d) || shadow_mode_wr_pt_pte(d));
- BUG_ON(!shadow_mode_refcounts(d)); // not yet implemented
-
- changed = 0;
- for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
- {
- if ( !is_guest_l2_slot(0, i) && !external )
- continue;
-
- l2_pgentry_t new_pde = guest2[i];
- if ( l2e_has_changed(new_pde, snapshot2[i], PAGE_FLAG_MASK) )
- {
- need_flush |= validate_hl2e_change(d, new_pde, &shadow2[i]);
-
- // can't update snapshots of linear page tables -- they
- // are used multiple times...
- //
- // snapshot[i] = new_pde;
-
- changed++;
- }
- }
- perfc_incrc(resync_hl2);
- perfc_incr_histo(shm_hl2_updates, changed, PT_UPDATES);
- break;
- }
-#elif CONFIG_PAGING_LEVELS >= 3
- case PGT_l2_shadow:
- case PGT_l3_shadow:
- {
- pgentry_64_t *guest_pt = guest;
- pgentry_64_t *shadow_pt = shadow;
- pgentry_64_t *snapshot_pt = snapshot;
-
- changed = 0;
- for ( i = min_shadow; i <= max_shadow; i++ )
- {
- if ( (i < min_snapshot) || (i > max_snapshot) ||
- entry_has_changed(
- guest_pt[i], snapshot_pt[i], PAGE_FLAG_MASK) )
- {
- unsigned long gpfn;
-
- gpfn = entry_get_pfn(guest_pt[i]);
- /*
- * Looks like it's no longer a page table.
- */
- if ( unlikely(gpfn != (gpfn & PGT_mfn_mask)) )
- {
- if ( entry_get_flags(shadow_pt[i]) & _PAGE_PRESENT )
- put_shadow_ref(entry_get_pfn(shadow_pt[i]));
- shadow_pt[i] = entry_empty();
- continue;
- }
-
- need_flush |= validate_entry_change(
- d, &guest_pt[i], &shadow_pt[i],
- shadow_type_to_level(stype));
- changed++;
- }
-#if CONFIG_PAGING_LEVELS == 3
- if ( stype == PGT_l3_shadow )
- {
- if ( entry_get_value(guest_pt[i]) != 0 )
- max = i;
-
- if ( !(entry_get_flags(guest_pt[i]) & _PAGE_PRESENT) &&
- unlikely(entry_get_value(guest_pt[i]) != 0) &&
- !unshadow &&
- (mfn_to_page(smfn)->u.inuse.type_info & PGT_pinned) )
- unshadow = 1;
- }
-#endif
- }
-
- if ( d->arch.ops->guest_paging_levels == PAGING_L3
- && max == -1 && stype == PGT_l3_shadow )
- unshadow = 1;
-
- perfc_incrc(resync_l3);
- perfc_incr_histo(shm_l3_updates, changed, PT_UPDATES);
- break;
- }
- case PGT_l4_shadow:
- {
- guest_root_pgentry_t *guest_root = guest;
- guest_root_pgentry_t *snapshot_root = snapshot;
-
- changed = 0;
- for ( i = 0; i < GUEST_ROOT_PAGETABLE_ENTRIES; i++ )
- {
- guest_root_pgentry_t new_root_e = guest_root[i];
- if ( !is_guest_l4_slot(i) && !external )
- continue;
- if ( root_entry_has_changed(
- new_root_e, snapshot_root[i], PAGE_FLAG_MASK))
- {
-#ifndef GUEST_PGENTRY_32
- l4_pgentry_t *shadow4 = shadow;
- unsigned long gpfn;
-
- gpfn = l4e_get_pfn(new_root_e);
-
- /*
- * Looks like it's no longer a page table.
- */
- if ( unlikely(gpfn != (gpfn & PGT_mfn_mask)) )
- {
- if ( l4e_get_flags(shadow4[i]) & _PAGE_PRESENT )
- put_shadow_ref(l4e_get_pfn(shadow4[i]));
- shadow4[i] = l4e_empty();
- continue;
- }
-
- if ( d->arch.ops->guest_paging_levels == PAGING_L4 )
- {
- need_flush |= validate_entry_change(
- d, (pgentry_64_t *)&new_root_e,
- (pgentry_64_t *)&shadow4[i], shadow_type_to_level(stype));
- }
- else
-#endif
- {
- validate_bl2e_change(d, &new_root_e, shadow, i);
- }
- changed++;
- ESH_LOG("%d: shadow4 mfn: %lx, shadow root: %lx\n", i,
- smfn, pagetable_get_paddr(current->arch.shadow_table));
- }
- if ( guest_root_get_intpte(new_root_e) != 0 ) /* FIXME: check flags? */
- max = i;
-
- // Need a better solution in the long term.
- if ( !(guest_root_get_flags(new_root_e) & _PAGE_PRESENT) &&
- unlikely(guest_root_get_intpte(new_root_e) != 0) &&
- !unshadow &&
- (mfn_to_page(smfn)->u.inuse.type_info & PGT_pinned) )
- unshadow = 1;
- }
- if ( max == -1 )
- unshadow = 1;
- perfc_incrc(resync_l4);
- perfc_incr_histo(shm_l4_updates, changed, PT_UPDATES);
- break;
- }
-
-#endif /* CONFIG_PAGING_LEVELS >= 3 */
- default:
- BUG();
- }
-
- if ( smfn )
- unmap_domain_page(shadow);
- unmap_domain_page(snapshot);
- unmap_domain_page(guest);
-
- if ( unlikely(unshadow && stype == PGT_root_page_table) )
- {
- for_each_vcpu(d, v)
- if(smfn == pagetable_get_pfn(v->arch.shadow_table))
- return need_flush;
- perfc_incrc(unshadow_l2_count);
- shadow_unpin(smfn);
-#if CONFIG_PAGING_LEVELS == 2
- if ( unlikely(shadow_mode_external(d)) )
- {
- unsigned long hl2mfn;
-
- if ( (hl2mfn = __shadow_status(d, entry->gpfn, PGT_hl2_shadow)) &&
- MFN_PINNED(hl2mfn) )
- shadow_unpin(hl2mfn);
- }
-#endif
- }
- }
-
- return need_flush;
-}
-
-#if CONFIG_PAGING_LEVELS == 2
-static int resync_all_levels_guest_page(struct domain *d)
-{
- int need_flush = 0;
-
- need_flush |= resync_all(d, PGT_l1_shadow);
- if ( d->arch.ops->guest_paging_levels == PAGING_L2 &&
- shadow_mode_translate(d) )
- {
- need_flush |= resync_all(d, PGT_hl2_shadow);
- }
- return need_flush;
-}
-#elif CONFIG_PAGING_LEVELS == 3
-static int resync_all_levels_guest_page(struct domain *d)
-{
- int need_flush = 0;
-
- need_flush |= resync_all(d, PGT_l1_shadow);
- if ( d->arch.ops->guest_paging_levels == PAGING_L2 )
- need_flush |= resync_all(d, PGT_l4_shadow);
- else
- {
- need_flush |= resync_all(d, PGT_l2_shadow);
- if ( shadow_mode_log_dirty(d) )
- {
- need_flush |= resync_all(d, PGT_l3_shadow);
- need_flush |= resync_all(d, PGT_l4_shadow);
- }
- else
- resync_pae_guest_l3(d);
- }
-
- return need_flush;
-}
-#elif CONFIG_PAGING_LEVELS == 4
-static int resync_all_levels_guest_page(struct domain *d)
-{
- int need_flush = 0;
-
- need_flush |= resync_all(d, PGT_l1_shadow);
- if ( d->arch.ops->guest_paging_levels == PAGING_L2 )
- need_flush |= resync_all(d, PGT_l4_shadow);
- else
- {
- need_flush |= resync_all(d, PGT_l2_shadow);
- if ( d->arch.ops->guest_paging_levels == PAGING_L3 )
- resync_pae_guest_l3(d);
- else
- {
- need_flush |= resync_all(d, PGT_l3_shadow);
- need_flush |= resync_all(d, PGT_l4_shadow);
- }
- }
- return need_flush;
-}
-#endif
-
-static void sync_all(struct domain *d)
-{
- struct out_of_sync_entry *entry;
- int need_flush = 0;
- l1_pgentry_t *ppte, opte, npte;
- cpumask_t other_vcpus_mask;
-
- perfc_incrc(shadow_sync_all);
-
- ASSERT(shadow_lock_is_acquired(d));
-
- // First, remove all write permissions to the page tables
- //
- for ( entry = d->arch.out_of_sync; entry; entry = entry->next)
- {
- // Skip entries that have low bits set... Those aren't
- // real PTEs.
- //
- if ( entry->writable_pl1e & (sizeof(l1_pgentry_t)-1) )
- continue;
-
- ppte = (l1_pgentry_t *)(
- (char *)map_domain_page(entry->writable_pl1e >> PAGE_SHIFT) +
- (entry->writable_pl1e & ~PAGE_MASK));
- opte = npte = *ppte;
- l1e_remove_flags(npte, _PAGE_RW);
-
- if ( (l1e_get_flags(npte) & _PAGE_PRESENT) &&
- !shadow_get_page_from_l1e(npte, d) )
- BUG();
- *ppte = npte;
- set_guest_back_ptr(d, npte, (entry->writable_pl1e) >> PAGE_SHIFT,
- (entry->writable_pl1e & ~PAGE_MASK)/sizeof(l1_pgentry_t));
- shadow_put_page_from_l1e(opte, d);
-
- unmap_domain_page(ppte);
- }
-
- /* Other VCPUs mustn't use the revoked writable mappings. */
- other_vcpus_mask = d->domain_dirty_cpumask;
- cpu_clear(smp_processor_id(), other_vcpus_mask);
- flush_tlb_mask(other_vcpus_mask);
-
- /* Flush ourself later. */
- need_flush = 1;
-
- need_flush |= resync_all_levels_guest_page(d);
-
- if ( need_flush && !unlikely(shadow_mode_external(d)) )
- local_flush_tlb();
-
- free_out_of_sync_state(d);
-}
-
-static inline int l1pte_write_fault(
- struct vcpu *v, guest_l1_pgentry_t *gpte_p, l1_pgentry_t *spte_p,
- unsigned long va)
-{
- struct domain *d = v->domain;
- guest_l1_pgentry_t gpte = *gpte_p;
- l1_pgentry_t spte;
- unsigned long gpfn = l1e_get_pfn(gpte);
- unsigned long gmfn = gmfn_to_mfn(d, gpfn);
-
- //printk("l1pte_write_fault gmfn=%lx\n", gmfn);
-
- if ( unlikely(!VALID_MFN(gmfn)) )
- {
- SH_VLOG("l1pte_write_fault: invalid gpfn=%lx", gpfn);
- *spte_p = l1e_empty();
- return 0;
- }
-
- ASSERT(guest_l1e_get_flags(gpte) & _PAGE_RW);
- guest_l1e_add_flags(gpte, _PAGE_DIRTY | _PAGE_ACCESSED);
- spte = l1e_from_pfn(gmfn, guest_l1e_get_flags(gpte) & ~_PAGE_GLOBAL);
-
- SH_VVLOG("l1pte_write_fault: updating spte=0x%" PRIpte " gpte=0x%" PRIpte,
- l1e_get_intpte(spte), l1e_get_intpte(gpte));
-
- __mark_dirty(d, gmfn);
-
- if ( mfn_is_page_table(gmfn) )
- shadow_mark_va_out_of_sync(v, gpfn, gmfn, va);
-
- *gpte_p = gpte;
- *spte_p = spte;
-
- return 1;
-}
-
-static inline int l1pte_read_fault(
- struct domain *d, guest_l1_pgentry_t *gpte_p, l1_pgentry_t *spte_p)
-{
- guest_l1_pgentry_t gpte = *gpte_p;
- l1_pgentry_t spte = *spte_p;
- unsigned long pfn = l1e_get_pfn(gpte);
- unsigned long mfn = gmfn_to_mfn(d, pfn);
-
- if ( unlikely(!VALID_MFN(mfn)) )
- {
- SH_VLOG("l1pte_read_fault: invalid gpfn=%lx", pfn);
- *spte_p = l1e_empty();
- return 0;
- }
-
- guest_l1e_add_flags(gpte, _PAGE_ACCESSED);
- spte = l1e_from_pfn(mfn, guest_l1e_get_flags(gpte) & ~_PAGE_GLOBAL);
-
- if ( shadow_mode_log_dirty(d) || !(guest_l1e_get_flags(gpte) & _PAGE_DIRTY) ||
- mfn_is_page_table(mfn) )
- {
- l1e_remove_flags(spte, _PAGE_RW);
- }
-
- SH_VVLOG("l1pte_read_fault: updating spte=0x%" PRIpte " gpte=0x%" PRIpte,
- l1e_get_intpte(spte), l1e_get_intpte(gpte));
- *gpte_p = gpte;
- *spte_p = spte;
-
- return 1;
-}
-#if CONFIG_PAGING_LEVELS == 2
-static int shadow_fault_32(unsigned long va, struct cpu_user_regs *regs)
-{
- l1_pgentry_t gpte, spte, orig_gpte;
- struct vcpu *v = current;
- struct domain *d = v->domain;
- l2_pgentry_t gpde;
-
- spte = l1e_empty();
-
- SH_VVLOG("shadow_fault( va=%lx, code=%lu )",
- va, (unsigned long)regs->error_code);
- perfc_incrc(shadow_fault_calls);
-
- check_pagetable(v, "pre-sf");
-
- /*
- * Don't let someone else take the guest's table pages out-of-sync.
- */
- shadow_lock(d);
-
- /* XXX - FIX THIS COMMENT!!!
- * STEP 1. Check to see if this fault might have been caused by an
- * out-of-sync table page entry, or if we should pass this
- * fault onto the guest.
- */
- __shadow_sync_va(v, va);
-
- /*
- * STEP 2. Check the guest PTE.
- */
- __guest_get_l2e(v, va, &gpde);
- if ( unlikely(!(l2e_get_flags(gpde) & _PAGE_PRESENT)) )
- {
- SH_VVLOG("shadow_fault - EXIT: L1 not present");
- perfc_incrc(shadow_fault_bail_pde_not_present);
- goto fail;
- }
-
- // This can't fault because we hold the shadow lock and we've ensured that
- // the mapping is in-sync, so the check of the PDE's present bit, above,
- // covers this access.
- //
- //orig_gpte = gpte = linear_pg_table[l1_linear_offset(va)];
- __guest_get_l1e(v, va, &gpte);
- orig_gpte = gpte;
-
- if ( unlikely(!(l1e_get_flags(gpte) & _PAGE_PRESENT)) )
- {
- SH_VVLOG("shadow_fault - EXIT: gpte not present (%" PRIpte ")",
- l1e_get_intpte(gpte));
- perfc_incrc(shadow_fault_bail_pte_not_present);
- goto fail;
- }
-
- /* Write fault? */
- if ( regs->error_code & 2 )
- {
- int allow_writes = 0;
-
- if ( unlikely(!(l1e_get_flags(gpte) & _PAGE_RW)) )
- {
- if ( shadow_mode_page_writable(va, regs, l1e_get_pfn(gpte)) )
- {
- allow_writes = 1;
- l1e_add_flags(gpte, _PAGE_RW);
- }
- else
- {
- /* Write fault on a read-only mapping. */
- SH_VVLOG("shadow_fault - EXIT: wr fault on RO page (%" PRIpte ")",
- l1e_get_intpte(gpte));
- perfc_incrc(shadow_fault_bail_ro_mapping);
- goto fail;
- }
- }
- else if ( unlikely(!shadow_mode_wr_pt_pte(d) && mfn_is_page_table(l1e_get_pfn(gpte))) )
- {
- SH_LOG("l1pte_write_fault: no write access to page table page");
- domain_crash_synchronous();
- }
-
- if ( unlikely(!l1pte_write_fault(v, &gpte, &spte, va)) )
- {
- SH_VVLOG("shadow_fault - EXIT: l1pte_write_fault failed");
- perfc_incrc(write_fault_bail);
- shadow_unlock(d);
- return 0;
- }
-
- if ( allow_writes )
- l1e_remove_flags(gpte, _PAGE_RW);
- }
- else
- {
- if ( !l1pte_read_fault(d, &gpte, &spte) )
- {
- SH_VVLOG("shadow_fault - EXIT: l1pte_read_fault failed");
- perfc_incrc(read_fault_bail);
- shadow_unlock(d);
- return 0;
- }
- }
-
- /*
- * STEP 3. Write the modified shadow PTE and guest PTE back to the tables.
- */
- if ( l1e_has_changed(orig_gpte, gpte, PAGE_FLAG_MASK) )
- {
- /* XXX Watch out for read-only L2 entries! (not used in Linux). */
- /*if ( unlikely(__copy_to_user(&linear_pg_table[l1_linear_offset(va)],
- &gpte, sizeof(gpte))) )*/
- if ( unlikely(!__guest_set_l1e(v, va, &gpte)))
- {
- printk("%s() failed, crashing domain %d "
- "due to a read-only L2 page table (gpde=%" PRIpte "), va=%lx\n",
- __func__,d->domain_id, l2e_get_intpte(gpde), va);
- domain_crash_synchronous();
- }
-
- __mark_dirty(d, gmfn_to_mfn(d, l2e_get_pfn(gpde)));
- }
-
- shadow_set_l1e(va, spte, 1);
-
- perfc_incrc(shadow_fault_fixed);
- d->arch.shadow_fault_count++;
-
- shadow_unlock(d);
-
- check_pagetable(v, "post-sf");
- return EXCRET_fault_fixed;
-
-fail:
- shadow_unlock(d);
- return 0;
-}
-#endif /* CONFIG_PAGING_LEVELS == 2 */
-
-static inline unsigned long va_to_l1mfn(struct vcpu *v, unsigned long va)
-{
- struct domain *d = v->domain;
- guest_l2_pgentry_t gl2e = {0};
-
- __guest_get_l2e(v, va, &gl2e);
-
- if ( unlikely(!(guest_l2e_get_flags(gl2e) & _PAGE_PRESENT)) )
- return INVALID_MFN;
-
- return gmfn_to_mfn(d, l2e_get_pfn(gl2e));
-}
-
-static int do_update_va_mapping(unsigned long va,
- l1_pgentry_t val,
- struct vcpu *v)
-{
- struct domain *d = v->domain;
- l1_pgentry_t spte;
- int rc = 0;
-
- shadow_lock(d);
-
- // This is actually overkill - we don't need to sync the L1 itself,
- // just everything involved in getting to this L1 (i.e. we need
- // linear_pg_table[l1_linear_offset(va)] to be in sync)...
- //
- __shadow_sync_va(v, va);
-
- l1pte_propagate_from_guest(d, *(guest_l1_pgentry_t *)&val, &spte);
-#if CONFIG_PAGING_LEVELS == 2
- shadow_set_l1e(va, spte, 0);
-#elif CONFIG_PAGING_LEVELS >= 3
- shadow_set_l1e_64(va, (pgentry_64_t *) &spte, 0);
-#endif
- /*
- * If we're in log-dirty mode then we need to note that we've updated
- * the PTE in the PT-holding page. We need the machine frame number
- * for this.
- */
- __mark_dirty(d, va_to_l1mfn(v, va));
-
- shadow_unlock(d);
-
- return rc;
-}
-
-
-/*
- * What lives where in the 32-bit address space in the various shadow modes,
- * and what it uses to get/maintain that mapping.
- *
- * SHADOW MODE: none enable translate external
- *
- * 4KB things:
- * guest_vtable lin_l2 mapped per gl2 lin_l2 via hl2 mapped per gl2
- * shadow_vtable n/a sh_lin_l2 sh_lin_l2 mapped per gl2
- * hl2_vtable n/a n/a lin_hl2 via hl2 mapped per gl2
- * monitor_vtable n/a n/a n/a mapped once
- *
- * 4MB things:
- * guest_linear lin via gl2 lin via gl2 lin via hl2 lin via hl2
- * shadow_linear n/a sh_lin via sl2 sh_lin via sl2 sh_lin via sl2
- * monitor_linear n/a n/a n/a ???
- * perdomain perdomain perdomain perdomain perdomain
- * R/O M2P R/O M2P R/O M2P n/a n/a
- * R/W M2P R/W M2P R/W M2P R/W M2P R/W M2P
- * P2M n/a n/a R/O M2P R/O M2P
- *
- * NB:
- * update_pagetables(), shadow_update_pagetables(), shadow_mode_enable(),
- * shadow_l2_table(), shadow_hl2_table(), and alloc_monitor_pagetable()
- * all play a part in maintaining these mappings.
- */
-static void shadow_update_pagetables(struct vcpu *v)
-{
- struct domain *d = v->domain;
-#if CONFIG_PAGING_LEVELS == 4
- unsigned long gmfn = ((v->arch.flags & TF_kernel_mode)?
- pagetable_get_pfn(v->arch.guest_table) :
- pagetable_get_pfn(v->arch.guest_table_user));
-#else
- unsigned long gmfn = pagetable_get_pfn(v->arch.guest_table);
-#endif
-
- unsigned long gpfn = mfn_to_gmfn(d, gmfn);
- unsigned long smfn, old_smfn;
-
-#if CONFIG_PAGING_LEVELS == 2
- unsigned long hl2mfn;
-#endif
- int need_sync = 0;
-
- int max_mode = ( shadow_mode_external(d) ? SHM_external
- : shadow_mode_translate(d) ? SHM_translate
- : shadow_mode_enabled(d) ? SHM_enable
- : 0 );
-
- ASSERT( ! IS_INVALID_M2P_ENTRY(gpfn) );
- ASSERT( max_mode );
-
- /*
- * arch.guest_vtable
- */
- if ( max_mode & (SHM_enable | SHM_external) )
- {
- if ( likely(v->arch.guest_vtable != NULL) )
- unmap_domain_page_global(v->arch.guest_vtable);
- v->arch.guest_vtable = map_domain_page_global(gmfn);
- }
-
- /*
- * arch.shadow_table
- */
-#if CONFIG_PAGING_LEVELS == 3 & defined (GUEST_PGENTRY_32)
- /*
- * We use PGT_l4_shadow for 2-level paging guests on PAE
- */
- if ( d->arch.ops->guest_paging_levels == PAGING_L2 )
- {
- if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_l4_shadow))) )
- smfn = shadow_l3_table(v, gpfn, gmfn);
- }
- else
-#endif
-
-#if CONFIG_PAGING_LEVELS == 3 & defined ( GUEST_32PAE )
- /*
- * We use PGT_l4_shadow for 2-level paging guests on PAE
- */
- if ( d->arch.ops->guest_paging_levels == PAGING_L3 )
- {
- if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_l4_shadow))) )
- smfn = shadow_l3_table(v, gpfn, gmfn);
- else
- {
- update_top_level_shadow(v, smfn);
- need_sync = 1;
- }
- }
- else
-#endif
- if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_base_page_table))) )
- {
-#if CONFIG_PAGING_LEVELS == 2
- smfn = shadow_l2_table(v, gpfn, gmfn);
-#elif CONFIG_PAGING_LEVELS == 3
- smfn = shadow_l3_table(v, gpfn, gmfn);
-#elif CONFIG_PAGING_LEVELS == 4
- smfn = shadow_l4_table(v, gpfn, gmfn);
-#endif
- }
- else
- {
-#if CONFIG_PAGING_LEVELS >= 3
- if ( SH_GUEST_32PAE && d->arch.ops->guest_paging_levels == PAGING_L3 )
- update_top_level_shadow(v, smfn);
-#endif
- /*
- * move sync later in order to avoid this smfn been
- * unshadowed occasionally
- */
- need_sync = 1;
- }
-
-
- if ( !get_shadow_ref(smfn) )
- BUG();
- old_smfn = pagetable_get_pfn(v->arch.shadow_table);
- v->arch.shadow_table = pagetable_from_pfn(smfn);
- if ( old_smfn )
- put_shadow_ref(old_smfn);
-
- SH_VVLOG("shadow_update_pagetables(gmfn=%lx, smfn=%lx)", gmfn, smfn);
-
- /*
- * arch.shadow_vtable
- */
- if ( max_mode == SHM_external
-#if CONFIG_PAGING_LEVELS >=3
- || max_mode & SHM_enable
-#endif
- )
- {
- if ( v->arch.shadow_vtable )
- unmap_domain_page_global(v->arch.shadow_vtable);
- v->arch.shadow_vtable = map_domain_page_global(smfn);
- }
-
-#if CONFIG_PAGING_LEVELS == 2
- /*
- * arch.hl2_vtable
- */
-
- // if max_mode == SHM_translate, then the hl2 is already installed
- // correctly in its smfn, and there's nothing to do.
- //
- if ( max_mode == SHM_external )
- {
- if ( unlikely(!(hl2mfn = __shadow_status(d, gpfn, PGT_hl2_shadow))) )
- hl2mfn = shadow_hl2_table(d, gpfn, gmfn, smfn);
- if ( v->arch.hl2_vtable )
- unmap_domain_page_global(v->arch.hl2_vtable);
- v->arch.hl2_vtable = map_domain_page_global(hl2mfn);
- }
-
- /*
- * fixup pointers in monitor table, as necessary
- */
- if ( max_mode == SHM_external )
- {
- l2_pgentry_t *mpl2e = v->arch.monitor_vtable;
- l2_pgentry_t old_hl2e = mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)];
- l2_pgentry_t old_sl2e = mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)];
-
- ASSERT( shadow_mode_translate(d) );
-
- if ( !get_shadow_ref(hl2mfn) )
- BUG();
- mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
- l2e_from_pfn(hl2mfn, __PAGE_HYPERVISOR);
- if ( l2e_get_flags(old_hl2e) & _PAGE_PRESENT )
- put_shadow_ref(l2e_get_pfn(old_hl2e));
-
- if ( !get_shadow_ref(smfn) )
- BUG();
- mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
- l2e_from_pfn(smfn, __PAGE_HYPERVISOR);
- if ( l2e_get_flags(old_sl2e) & _PAGE_PRESENT )
- put_shadow_ref(l2e_get_pfn(old_sl2e));
-
- // XXX - maybe this can be optimized somewhat??
- local_flush_tlb();
- }
-#endif /* CONFIG_PAGING_LEVELS == 2 */
-
-#if CONFIG_PAGING_LEVELS == 3
- /*
- * fixup pointers in monitor table, as necessary
- */
- if ( max_mode == SHM_external )
- {
- l3_pgentry_t *mpl3e = (l3_pgentry_t *) v->arch.monitor_vtable;
- l2_pgentry_t *spl2e;
- unsigned long s2mfn;
- int i;
-
- ASSERT( shadow_mode_translate(d) );
- s2mfn = l3e_get_pfn(mpl3e[L3_PAGETABLE_ENTRIES - 1]);
-
- ASSERT( s2mfn);
- spl2e = map_domain_page(s2mfn);
-
- for ( i = 0; i < (LINEARPT_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
- spl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START) + i] =
- (l3e_get_flags(mpl3e[i]) & _PAGE_PRESENT) ?
- l2e_from_pfn(l3e_get_pfn(mpl3e[i]), __PAGE_HYPERVISOR) :
- l2e_empty();
-
- unmap_domain_page(spl2e);
- local_flush_tlb();
- }
-#endif
-
- if(likely(need_sync))
- shadow_sync_all(d);
-}
-
-
-/************************************************************************/
-/************************************************************************/
-/************************************************************************/
-
-#if 0 // this code has not been updated for 32pae & 64 bit modes
-#if SHADOW_DEBUG
-
-// The following is entirely for _check_pagetable()'s benefit.
-// _check_pagetable() wants to know whether a given entry in a
-// shadow page table is supposed to be the shadow of the guest's
-// current entry, or the shadow of the entry held in the snapshot
-// taken above.
-//
-// Here, we mark all currently existing entries as reflecting
-// the snapshot, above. All other places in xen that update
-// the shadow will keep the shadow in sync with the guest's
-// entries (via l1pte_propagate_from_guest and friends), which clear
-// the SHADOW_REFLECTS_SNAPSHOT bit.
-//
-static void
-mark_shadows_as_reflecting_snapshot(struct domain *d, unsigned long gpfn)
-{
- unsigned long smfn;
- l1_pgentry_t *l1e;
- l2_pgentry_t *l2e;
- unsigned i;
-
- if ( (smfn = __shadow_status(d, gpfn, PGT_l1_shadow)) )
- {
- l1e = map_domain_page(smfn);
- for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
- if ( is_guest_l1_slot(i) &&
- (l1e_get_flags(l1e[i]) & _PAGE_PRESENT) )
- l1e_add_flags(l1e[i], SHADOW_REFLECTS_SNAPSHOT);
- unmap_domain_page(l1e);
- }
-
- if ( (smfn = __shadow_status(d, gpfn, PGT_l2_shadow)) )
- {
- l2e = map_domain_page(smfn);
- for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
- if ( is_guest_l2_slot(0, i) &&
- (l2e_get_flags(l2e[i]) & _PAGE_PRESENT) )
- l2e_add_flags(l2e[i], SHADOW_REFLECTS_SNAPSHOT);
- unmap_domain_page(l2e);
- }
-}
-
-// BUG: these are not SMP safe...
-static int sh_l2_present;
-static int sh_l1_present;
-static char *sh_check_name;
-// int shadow_status_noswap; // declared in shadow32.c
-
-#define v2m(_v, _adr) ({ \
- unsigned long _a = (unsigned long)(_adr); \
- l2_pgentry_t _pde = shadow_linear_l2_table(_v)[l2_table_offset(_a)]; \
- unsigned long _pa = -1; \
- if ( l2e_get_flags(_pde) & _PAGE_PRESENT ) \
- { \
- l1_pgentry_t _pte; \
- _pte = shadow_linear_pg_table[l1_linear_offset(_a)]; \
- if ( l1e_get_flags(_pte) & _PAGE_PRESENT ) \
- _pa = l1e_get_paddr(_pte); \
- } \
- _pa | (_a & ~PAGE_MASK); \
-})
-
-#define FAIL(_f, _a...) \
- do { \
- printk("XXX %s-FAIL (%d,%d,%d) " _f " at %s(%d)\n", \
- sh_check_name, level, l2_idx, l1_idx, ## _a, \
- __FILE__, __LINE__); \
- printk("guest_pte=%" PRIpte " eff_guest_pte=%" PRIpte \
- " shadow_pte=%" PRIpte " snapshot_pte=%" PRIpte \
- " &guest=%p &shadow=%p &snap=%p v2m(&guest)=%p" \
- " v2m(&shadow)=%p v2m(&snap)=%p ea=%08x\n", \
- l1e_get_intpte(guest_pte), l1e_get_intpte(eff_guest_pte), \
- l1e_get_intpte(shadow_pte), l1e_get_intpte(snapshot_pte), \
- p_guest_pte, p_shadow_pte, p_snapshot_pte, \
- (void *)v2m(v, p_guest_pte), (void *)v2m(v, p_shadow_pte), \
- (void *)v2m(v, p_snapshot_pte), \
- (l2_idx << L2_PAGETABLE_SHIFT) | \
- (l1_idx << L1_PAGETABLE_SHIFT)); \
- errors++; \
- } while ( 0 )
-
-static int check_pte(
- struct vcpu *v,
- l1_pgentry_t *p_guest_pte,
- l1_pgentry_t *p_shadow_pte,
- l1_pgentry_t *p_snapshot_pte,
- int level, int l2_idx, int l1_idx)
-{
- struct domain *d = v->domain;
- l1_pgentry_t guest_pte = *p_guest_pte;
- l1_pgentry_t shadow_pte = *p_shadow_pte;
- l1_pgentry_t snapshot_pte = p_snapshot_pte ? *p_snapshot_pte : l1e_empty();
- l1_pgentry_t eff_guest_pte;
- unsigned long mask, eff_guest_pfn, eff_guest_mfn, shadow_mfn;
- int errors = 0, guest_writable;
- int page_table_page;
-
- if ( (l1e_get_intpte(shadow_pte) == 0) ||
- (l1e_get_intpte(shadow_pte) == 0xdeadface) ||
- (l1e_get_intpte(shadow_pte) == 0x00000E00) )
- return errors; /* always safe */
-
- if ( !(l1e_get_flags(shadow_pte) & _PAGE_PRESENT) )
- FAIL("Non zero not present shadow_pte");
-
- if ( level == 2 ) sh_l2_present++;
- if ( level == 1 ) sh_l1_present++;
-
- if ( (l1e_get_flags(shadow_pte) & SHADOW_REFLECTS_SNAPSHOT) && p_snapshot_pte )
- eff_guest_pte = snapshot_pte;
- else
- eff_guest_pte = guest_pte;
-
- if ( !(l1e_get_flags(eff_guest_pte) & _PAGE_PRESENT) )
- FAIL("Guest not present yet shadow is");
-
- mask = ~(_PAGE_GLOBAL|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW|_PAGE_AVAIL|PAGE_MASK);
-
- if ( ((l1e_get_intpte(shadow_pte) & mask) != (l1e_get_intpte(eff_guest_pte) & mask)) )
- FAIL("Corrupt?");
-
- if ( (level == 1) &&
- (l1e_get_flags(shadow_pte) & _PAGE_DIRTY) &&
- !(l1e_get_flags(eff_guest_pte) & _PAGE_DIRTY) )
- FAIL("Dirty coherence");
-
- if ( (l1e_get_flags(shadow_pte) & _PAGE_ACCESSED) &&
- !(l1e_get_flags(eff_guest_pte) & _PAGE_ACCESSED) )
- FAIL("Accessed coherence");
-
- if ( l1e_get_flags(shadow_pte) & _PAGE_GLOBAL )
- FAIL("global bit set in shadow");
-
- eff_guest_pfn = l1e_get_pfn(eff_guest_pte);
- eff_guest_mfn = gmfn_to_mfn(d, eff_guest_pfn);
- shadow_mfn = l1e_get_pfn(shadow_pte);
-
- if ( !VALID_MFN(eff_guest_mfn) && !shadow_mode_refcounts(d) )
- FAIL("%s: invalid eff_guest_pfn=%lx eff_guest_pte=%" PRIpte "\n",
- __func__, eff_guest_pfn, l1e_get_intpte(eff_guest_pte));
-
- page_table_page = mfn_is_page_table(eff_guest_mfn);
-
- guest_writable =
- (l1e_get_flags(eff_guest_pte) & _PAGE_RW) ||
- (shadow_mode_write_l1(d) && (level == 1) && mfn_out_of_sync(eff_guest_mfn));
-
- if ( (l1e_get_flags(shadow_pte) & _PAGE_RW ) && !guest_writable )
- {
- printk("eff_guest_pfn=%lx eff_guest_mfn=%lx shadow_mfn=%lx t=0x%08lx page_table_page=%d\n",
- eff_guest_pfn, eff_guest_mfn, shadow_mfn,
- mfn_to_page(eff_guest_mfn)->u.inuse.type_info,
- page_table_page);
- FAIL("RW coherence");
- }
-
- if ( (level == 1) &&
- (l1e_get_flags(shadow_pte) & _PAGE_RW ) &&
- !(guest_writable && (l1e_get_flags(eff_guest_pte) & _PAGE_DIRTY)) )
- {
- printk("eff_guest_pfn=%lx eff_guest_mfn=%lx shadow_mfn=%lx t=0x%08lx page_table_page=%d\n",
- eff_guest_pfn, eff_guest_mfn, shadow_mfn,
- mfn_to_page(eff_guest_mfn)->u.inuse.type_info,
- page_table_page);
- FAIL("RW2 coherence");
- }
-
- if ( eff_guest_mfn == shadow_mfn )
- {
- if ( level > 1 )
- FAIL("Linear map ???"); /* XXX this will fail on BSD */
- }
- else
- {
- if ( level < 2 )
- FAIL("Shadow in L1 entry?");
-
- if ( level == 2 )
- {
- if ( __shadow_status(d, eff_guest_pfn, PGT_l1_shadow) != shadow_mfn )
- FAIL("shadow_mfn problem eff_guest_pfn=%lx shadow_mfn=%lx", eff_guest_pfn,
- __shadow_status(d, eff_guest_pfn, PGT_l1_shadow));
- }
- else
- BUG(); // XXX -- not handled yet.
- }
-
- return errors;
-}
-#undef FAIL
-#undef v2m
-
-static int check_l1_table(
- struct vcpu *v, unsigned long gpfn,
- unsigned long gmfn, unsigned long smfn, unsigned l2_idx)
-{
- struct domain *d = v->domain;
- int i;
- unsigned long snapshot_mfn;
- l1_pgentry_t *p_guest, *p_shadow, *p_snapshot = NULL;
- int errors = 0;
-
- if ( page_out_of_sync(mfn_to_page(gmfn)) )
- {
- snapshot_mfn = __shadow_status(d, gpfn, PGT_snapshot);
- ASSERT(snapshot_mfn);
- p_snapshot = map_domain_page(snapshot_mfn);
- }
-
- p_guest = map_domain_page(gmfn);
- p_shadow = map_domain_page(smfn);
-
- for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
- errors += check_pte(v, p_guest+i, p_shadow+i,
- p_snapshot ? p_snapshot+i : NULL,
- 1, l2_idx, i);
-
- unmap_domain_page(p_shadow);
- unmap_domain_page(p_guest);
- if ( p_snapshot )
- unmap_domain_page(p_snapshot);
-
- return errors;
-}
-
-#define FAILPT(_f, _a...) \
- do { \
- printk("XXX FAIL %s-PT " _f "\n", sh_check_name, ## _a ); \
- errors++; \
- } while ( 0 )
-
-static int check_l2_table(
- struct vcpu *v, unsigned long gmfn, unsigned long smfn, int oos_pdes)
-{
- struct domain *d = v->domain;
- l2_pgentry_t *gpl2e = (l2_pgentry_t *)map_domain_page(gmfn);
- l2_pgentry_t *spl2e = (l2_pgentry_t *)map_domain_page(smfn);
- l2_pgentry_t match;
- int i;
- int errors = 0;
- int limit;
-
- if ( !oos_pdes && (page_get_owner(mfn_to_page(gmfn)) != d) )
- FAILPT("domain doesn't own page");
- if ( oos_pdes && (page_get_owner(mfn_to_page(gmfn)) != NULL) )
- FAILPT("bogus owner for snapshot page");
- if ( page_get_owner(mfn_to_page(smfn)) != NULL )
- FAILPT("shadow page mfn=0x%lx is owned by someone, domid=%d",
- smfn, page_get_owner(mfn_to_page(smfn))->domain_id);
-
-#if 0
- if ( memcmp(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
- &gpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
- ((SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT) -
- DOMAIN_ENTRIES_PER_L2_PAGETABLE) * sizeof(l2_pgentry_t)) )
- {
- for ( i = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
- i < (SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT);
- i++ )
- printk("+++ (%d) %lx %lx\n",i,
- l2_pgentry_val(gpl2e[i]), l2_pgentry_val(spl2e[i]));
- FAILPT("hypervisor entries inconsistent");
- }
-
- if ( (l2_pgentry_val(spl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]) !=
- l2_pgentry_val(gpl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT])) )
- FAILPT("hypervisor linear map inconsistent");
-#endif
-
- match = l2e_from_pfn(smfn, __PAGE_HYPERVISOR);
- if ( !shadow_mode_external(d) &&
- l2e_has_changed(spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT],
- match, PAGE_FLAG_MASK))
- {
- FAILPT("hypervisor shadow linear map inconsistent %" PRIpte " %" PRIpte,
- l2e_get_intpte(spl2e[SH_LINEAR_PT_VIRT_START >>
- L2_PAGETABLE_SHIFT]),
- l2e_get_intpte(match));
- }
-
- match = l2e_from_paddr(__pa(d->arch.mm_perdomain_pt), __PAGE_HYPERVISOR);
- if ( !shadow_mode_external(d) &&
- l2e_has_changed(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT],
- match, PAGE_FLAG_MASK))
- {
- FAILPT("hypervisor per-domain map inconsistent saw %" PRIpte ", expected (va=%p) %" PRIpte,
- l2e_get_intpte(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT]),
- d->arch.mm_perdomain_pt,
- l2e_get_intpte(match));
- }
-
-#if CONFIG_PAGING_LEVELS == 2
- if ( shadow_mode_external(d) )
- limit = L2_PAGETABLE_ENTRIES;
- else
- limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
-#else
- limit = 0; /* XXX x86/64 XXX */
-#endif
-
- /* Check the whole L2. */
- for ( i = 0; i < limit; i++ )
- errors += check_pte(v,
- (l1_pgentry_t*)(&gpl2e[i]), /* Hmm, dirty ... */
- (l1_pgentry_t*)(&spl2e[i]),
- NULL,
- 2, i, 0);
-
- unmap_domain_page(spl2e);
- unmap_domain_page(gpl2e);
-
-#if 1
- if ( errors )
- printk("check_l2_table returning %d errors\n", errors);
-#endif
-
- return errors;
-}
-#undef FAILPT
-
-int _check_pagetable(struct vcpu *v, char *s)
-{
- struct domain *d = v->domain;
-#if CONFIG_PAGING_LEVELS == 4
- pagetable_t pt = ((v->arch.flags & TF_kernel_mode)?
- v->arch.guest_table : v->arch.guest_table_user);
-#else
- pagetable_t pt = v->arch.guest_table;
-#endif
- unsigned long gptbase = pagetable_get_paddr(pt);
- unsigned long ptbase_pfn, smfn;
- unsigned long i;
- l2_pgentry_t *gpl2e, *spl2e;
- unsigned long ptbase_mfn = 0;
- int errors = 0, limit, oos_pdes = 0;
-
- //_audit_domain(d, AUDIT_QUIET);
- shadow_lock(d);
-
- sh_check_name = s;
- //SH_VVLOG("%s-PT Audit", s);
- sh_l2_present = sh_l1_present = 0;
- perfc_incrc(check_pagetable);
-
- ptbase_mfn = gptbase >> PAGE_SHIFT;
- ptbase_pfn = mfn_to_gmfn(d, ptbase_mfn);
-
- if ( !(smfn = __shadow_status(d, ptbase_pfn, PGT_base_page_table)) )
- {
- printk("%s-PT %lx not shadowed\n", s, gptbase);
- goto out;
- }
- if ( page_out_of_sync(mfn_to_page(ptbase_mfn)) )
- {
- ptbase_mfn = __shadow_status(d, ptbase_pfn, PGT_snapshot);
- oos_pdes = 1;
- ASSERT(ptbase_mfn);
- }
-
- errors += check_l2_table(v, ptbase_mfn, smfn, oos_pdes);
-
- gpl2e = (l2_pgentry_t *) map_domain_page(ptbase_mfn);
- spl2e = (l2_pgentry_t *) map_domain_page(smfn);
-
- /* Go back and recurse. */
-#if CONFIG_PAGING_LEVELS == 2
- if ( shadow_mode_external(d) )
- limit = L2_PAGETABLE_ENTRIES;
- else
- limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
-#else
- limit = 0; /* XXX x86/64 XXX */
-#endif
-
- for ( i = 0; i < limit; i++ )
- {
- unsigned long gl1pfn = l2e_get_pfn(gpl2e[i]);
- unsigned long gl1mfn = gmfn_to_mfn(d, gl1pfn);
- unsigned long sl1mfn = l2e_get_pfn(spl2e[i]);
-
- if ( l2e_get_intpte(spl2e[i]) != 0 ) /* FIXME: check flags? */
- {
- errors += check_l1_table(v, gl1pfn, gl1mfn, sl1mfn, i);
- }
- }
-
- unmap_domain_page(spl2e);
- unmap_domain_page(gpl2e);
-
-#if 0
- SH_VVLOG("PT verified : l2_present = %d, l1_present = %d",
- sh_l2_present, sh_l1_present);
-#endif
-
- out:
- if ( errors )
- BUG();
-
- shadow_unlock(d);
-
- return errors;
-}
-
-int _check_all_pagetables(struct vcpu *v, char *s)
-{
- struct domain *d = v->domain;
- int i;
- struct shadow_status *a;
- unsigned long gmfn;
- int errors = 0;
-
- shadow_status_noswap = 1;
-
- sh_check_name = s;
- SH_VVLOG("%s-PT Audit domid=%d", s, d->domain_id);
- sh_l2_present = sh_l1_present = 0;
- perfc_incrc(check_all_pagetables);
-
- for (i = 0; i < shadow_ht_buckets; i++)
- {
- a = &d->arch.shadow_ht[i];
- while ( a && a->gpfn_and_flags )
- {
- gmfn = gmfn_to_mfn(d, a->gpfn_and_flags & PGT_mfn_mask);
-
- switch ( a->gpfn_and_flags & PGT_type_mask )
- {
- case PGT_l1_shadow:
- errors += check_l1_table(v, a->gpfn_and_flags & PGT_mfn_mask,
- gmfn, a->smfn, 0);
- break;
- case PGT_l2_shadow:
- errors += check_l2_table(v, gmfn, a->smfn,
- page_out_of_sync(mfn_to_page(gmfn)));
- break;
- case PGT_l3_shadow:
- case PGT_l4_shadow:
- case PGT_hl2_shadow:
- BUG(); // XXX - ought to fix this...
- break;
- case PGT_snapshot:
- case PGT_writable_pred:
- break;
- default:
- errors++;
- printk("unexpected shadow type %lx, gpfn=%lx, "
- "gmfn=%lx smfn=%lx\n",
- a->gpfn_and_flags & PGT_type_mask,
- a->gpfn_and_flags & PGT_mfn_mask,
- gmfn, a->smfn);
- BUG();
- }
- a = a->next;
- }
- }
-
- shadow_status_noswap = 0;
-
- if ( errors )
- BUG();
-
- return errors;
-}
-
-#endif // SHADOW_DEBUG
-#endif // this code has not been updated for 32pae & 64 bit modes
-
-#if CONFIG_PAGING_LEVELS >= 3
-/****************************************************************************/
-/* 64-bit shadow-mode code testing */
-/****************************************************************************/
-/*
- * init_bl2() is for 32-bit VMX guest on 64-bit host
- * Using 1 shadow L4(l3) and 4 shadow L2s to simulate guest L2
- */
-static inline unsigned long init_bl2(
- struct domain *d, unsigned long gpfn, unsigned long gmfn)
-{
- unsigned int count;
- unsigned long sl2mfn;
- unsigned long smfn;
- struct page_info *page;
- l4_pgentry_t *spl4e;
- void *l2;
-
- if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l4_shadow))) )
- {
- printk("Couldn't alloc an L4 shadow for pfn=%lx mfn=%lx\n", gpfn, gmfn);
- /* XXX Deal gracefully with failure. */
- domain_crash_synchronous();
- }
-
- spl4e = (l4_pgentry_t *)map_domain_page(smfn);
-
- /* Map the self entry, L4&L3 share the same page */
- spl4e[PAE_SHADOW_SELF_ENTRY] = l4e_from_pfn(smfn, __PAGE_HYPERVISOR);
-
- /* Allocate 4 shadow L2s */
- page = alloc_domheap_pages(NULL, SL2_ORDER, 0);
- if ( !page )
- domain_crash_synchronous();
-
- for ( count = 0; count < PAE_L3_PAGETABLE_ENTRIES; count++ )
- {
- sl2mfn = page_to_mfn(page+count);
- l2 = map_domain_page(sl2mfn);
- memset(l2, 0, PAGE_SIZE);
- unmap_domain_page(l2);
- spl4e[count] = l4e_from_pfn(sl2mfn, _PAGE_PRESENT);
- }
-
- unmap_domain_page(spl4e);
-
- return smfn;
-}
-
-static inline unsigned long init_l3(
- struct vcpu *v, unsigned long gpfn, unsigned long gmfn)
-{
- unsigned long smfn;
- l4_pgentry_t *spl4e;
- unsigned long index;
-
- if ( unlikely(!(smfn = alloc_shadow_page(v->domain, gpfn, gmfn, PGT_l4_shadow))) )
- {
- printk("Couldn't alloc an L4 shadow for pfn= %lx mfn= %lx\n", gpfn, gmfn);
- BUG(); /* XXX Deal gracefully wiht failure. */
- }
-
- /* Map the self entry, L4&L3 share the same page */
- spl4e = (l4_pgentry_t *)map_domain_page(smfn);
-
- /*
- * Shadow L4's pfn_info->tlbflush_timestamp
- * should also save it's own index.
- */
-
- index = get_cr3_idxval(v);
- frame_table[smfn].tlbflush_timestamp = index;
-
- memset(spl4e, 0, L4_PAGETABLE_ENTRIES*sizeof(l4_pgentry_t));
- spl4e[PAE_SHADOW_SELF_ENTRY] = l4e_from_pfn(smfn, __PAGE_HYPERVISOR);
- unmap_domain_page(spl4e);
- return smfn;
-}
-#endif
-
-#if CONFIG_PAGING_LEVELS == 3
-static unsigned long shadow_l3_table(
- struct vcpu *v, unsigned long gpfn, unsigned long gmfn)
-{
- unsigned long smfn;
- l3_pgentry_t *spl3e;
- struct domain *d = v->domain;
-
- perfc_incrc(shadow_l3_table_count);
-
- SH_VVLOG("shadow_l3_table(gpfn=%lx, gmfn=%lx)", gpfn, gmfn);
-
- if ( SH_L1_HAS_NEXT_PAGE &&
- d->arch.ops->guest_paging_levels == PAGING_L2 )
- {
- return init_bl2(d, gpfn, gmfn);
- }
-
- if ( SH_GUEST_32PAE &&
- d->arch.ops->guest_paging_levels == PAGING_L3 )
- {
- return init_l3(v, gpfn, gmfn);
- }
-
- if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l3_shadow))) )
- {
- printk("Couldn't alloc an L3 shadow for pfn=%lx mfn=%lx\n", gpfn, gmfn);
- BUG(); /* XXX Deal gracefully with failure. */
- }
-
- spl3e = (l3_pgentry_t *)map_domain_page(smfn);
-
- /* Make the self entry */
- spl3e[PAE_SHADOW_SELF_ENTRY] = l3e_from_pfn(smfn, __PAGE_HYPERVISOR);
-
- if ( (PGT_base_page_table == PGT_l3_page_table) &&
- !shadow_mode_external(d) ) {
- int i;
- unsigned long g2mfn, s2mfn;
- l2_pgentry_t *spl2e;
- l3_pgentry_t *gpl3e;
-
- /* Get the top entry */
- gpl3e = (l3_pgentry_t *)map_domain_page(gmfn);
-
- if ( !(l3e_get_flags(gpl3e[L3_PAGETABLE_ENTRIES - 1]) & _PAGE_PRESENT) )
- {
- BUG();
- }
-
- g2mfn = l3e_get_pfn(gpl3e[L3_PAGETABLE_ENTRIES - 1]);
-
- /* NB. g2mfn should be same as g2pfn */
- if (!(s2mfn = __shadow_status(d, g2mfn, PGT_l2_shadow))) {
- if ( unlikely(!(s2mfn =
- alloc_shadow_page(d, g2mfn, g2mfn, PGT_l2_shadow))) ) {
- printk("Couldn't alloc an L2 shadow for pfn=%lx mfn=%lx\n",
- g2mfn, g2mfn);
- BUG(); /* XXX Deal gracefully with failure. */
- }
- }
-
- if (!get_shadow_ref(s2mfn))
- BUG();
-
- /* Map shadow L2 into shadow L3 */
- spl3e[L3_PAGETABLE_ENTRIES - 1] = l3e_from_pfn(s2mfn, _PAGE_PRESENT);
- shadow_update_min_max(smfn, L3_PAGETABLE_ENTRIES -1);
-
- /*
- * Xen private mappings. Do the similar things as
- * create_pae_xen_mappings().
- */
- spl2e = (l2_pgentry_t *)map_domain_page(s2mfn);
-
- /*
- * When we free L2 pages, we need to tell if the page contains
- * Xen private mappings. Use the va_mask part.
- */
- mfn_to_page(s2mfn)->u.inuse.type_info |=
- (unsigned long) 3 << PGT_score_shift;
-
- memset(spl2e, 0,
- (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)) * sizeof(l2_pgentry_t));
-
- memcpy(&spl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)],
- &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT],
- L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
-
- for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
- spl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
- l2e_from_page(
- virt_to_page(page_get_owner(mfn_to_page(gmfn))->arch.mm_perdomain_pt) + i,
- __PAGE_HYPERVISOR);
- for ( i = 0; i < (LINEARPT_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
- spl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] =
- (l3e_get_flags(gpl3e[i]) & _PAGE_PRESENT) ?
- l2e_from_pfn(l3e_get_pfn(gpl3e[i]), __PAGE_HYPERVISOR) :
- l2e_empty();
-
- unmap_domain_page(spl2e);
- unmap_domain_page(gpl3e);
- }
- unmap_domain_page(spl3e);
-
- return smfn;
-}
-#endif /* CONFIG_PAGING_LEVELS == 3 */
-
-#if (!defined(GUEST_PGENTRY_32) && !defined(GUEST_32PAE))
-static unsigned long gva_to_gpa_pae(unsigned long gva)
-{
- BUG();
- return 43;
-}
-#endif
-
-#if CONFIG_PAGING_LEVELS == 4
-static unsigned long shadow_l4_table(
- struct vcpu *v, unsigned long gpfn, unsigned long gmfn)
-{
- unsigned long smfn;
- l4_pgentry_t *spl4e;
- struct domain *d = v->domain;
-
- SH_VVLOG("shadow_l4_table(gpfn=%lx, gmfn=%lx)", gpfn, gmfn);
-
- perfc_incrc(shadow_l4_table_count);
-
- if ( d->arch.ops->guest_paging_levels == PAGING_L2 )
- {
- return init_bl2(d, gpfn, gmfn);
- }
-
- if ( SH_GUEST_32PAE && d->arch.ops->guest_paging_levels == PAGING_L3 )
- {
- return init_l3(v, gpfn, gmfn);
- }
-
- if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l4_shadow))) )
- {
- printk("Couldn't alloc an L4 shadow for pfn=%lx mfn=%lx\n", gpfn, gmfn);
- BUG(); /* XXX Deal gracefully with failure. */
- }
-
- spl4e = (l4_pgentry_t *)map_domain_page(smfn);
-
- /* Install hypervisor and 4x linear p.t. mapings. */
- if ( (PGT_base_page_table == PGT_l4_page_table) &&
- !shadow_mode_external(d) )
- {
- /*
- * We could proactively fill in PDEs for pages that are already
- * shadowed *and* where the guest PDE has _PAGE_ACCESSED set
- * (restriction required for coherence of the accessed bit). However,
- * we tried it and it didn't help performance. This is simpler.
- */
- memset(spl4e, 0, L4_PAGETABLE_ENTRIES*sizeof(l4_pgentry_t));
-
- /* Install hypervisor and 2x linear p.t. mapings. */
- memcpy(&spl4e[ROOT_PAGETABLE_FIRST_XEN_SLOT],
- &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
- ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t));
-
- spl4e[l4_table_offset(PERDOMAIN_VIRT_START)] =
- l4e_from_paddr(__pa(page_get_owner(mfn_to_page(gmfn))->arch.mm_perdomain_l3),
- __PAGE_HYPERVISOR);
-
- if ( shadow_mode_translate(d) ) // NB: not external
- {
- spl4e[l4_table_offset(RO_MPT_VIRT_START)] =
- l4e_from_paddr(pagetable_get_paddr(d->arch.phys_table),
- __PAGE_HYPERVISOR);
- }
- else
- spl4e[l4_table_offset(LINEAR_PT_VIRT_START)] =
- l4e_from_pfn(gmfn, __PAGE_HYPERVISOR);
-
- } else
- memset(spl4e, 0, L4_PAGETABLE_ENTRIES*sizeof(l4_pgentry_t));
-
- unmap_domain_page(spl4e);
-
- ESH_LOG("shadow_l4_table(%lx -> %lx)", gmfn, smfn);
- return smfn;
-}
-#endif /* CONFIG_PAGING_LEVELS == 4 */
-
-#if CONFIG_PAGING_LEVELS >= 3
-static void
-update_top_level_shadow(struct vcpu *v, unsigned long smfn)
-{
- unsigned long index = get_cr3_idxval(v);
- pgentry_64_t *sple = (pgentry_64_t *)map_domain_page(smfn);
- pgentry_64_t *gple = (pgentry_64_t *)&v->arch.guest_vtable;
- int i;
-
- for ( i = 0; i < PAE_L3_PAGETABLE_ENTRIES; i++ )
- {
- unsigned long gpfn;
-
- /*
- * Looks like it's no longer a page table.
- */
- if ( unlikely(entry_get_value(gple[index*4+i]) & PAE_PDPT_RESERVED) )
- {
- if ( entry_get_flags(sple[i]) & _PAGE_PRESENT )
- put_shadow_ref(entry_get_pfn(sple[i]));
-
- sple[i] = entry_empty();
- continue;
- }
-
- gpfn = entry_get_pfn(gple[index*4+i]);
-
- if ( unlikely(gpfn != (gpfn & PGT_mfn_mask)) )
- {
- if ( entry_get_flags(sple[i]) & _PAGE_PRESENT )
- put_shadow_ref(entry_get_pfn(sple[i]));
-
- sple[i] = entry_empty();
- continue;
- }
-
- validate_entry_change(
- v->domain, &gple[index*4+i], &sple[i], PAGING_L3);
- }
-
- unmap_domain_page(sple);
-}
-
-/*
- * validate_bl2e_change()
- * The code is for 32-bit HVM guest on 64-bit host.
- * To sync guest L2.
- */
-
-static inline void
-validate_bl2e_change(
- struct domain *d,
- guest_root_pgentry_t *new_gle_p,
- pgentry_64_t *shadow_l3,
- int index)
-{
- int sl3_idx, sl2_idx;
- unsigned long sl2mfn, sl1mfn;
- pgentry_64_t *sl2_p;
-
- /* Using guest l2 pte index to get shadow l3&l2 index
- * index: 0 ~ 1023, PAGETABLE_ENTRIES: 512
- */
- sl3_idx = index / (PAGETABLE_ENTRIES / 2);
- sl2_idx = (index % (PAGETABLE_ENTRIES / 2)) * 2;
-
- sl2mfn = entry_get_pfn(shadow_l3[sl3_idx]);
- sl2_p = (pgentry_64_t *)map_domain_page(sl2mfn);
-
- validate_pde_change(
- d, *(guest_l2_pgentry_t *)new_gle_p, (l2_pgentry_t *)&sl2_p[sl2_idx]);
-
- /* Mapping the second l1 shadow page */
- if (entry_get_flags(sl2_p[sl2_idx]) & _PAGE_PRESENT) {
- sl1mfn = entry_get_pfn(sl2_p[sl2_idx]);
- sl2_p[sl2_idx + 1] =
- entry_from_pfn(sl1mfn + 1, entry_get_flags(sl2_p[sl2_idx]));
- }
- else
- sl2_p[sl2_idx + 1] = (pgentry_64_t){0};
- unmap_domain_page(sl2_p);
-
-}
-
-/*
- * This shadow_mark_va_out_of_sync() is for 2M page shadow
- */
-static void shadow_mark_va_out_of_sync_2mp(
- struct vcpu *v, unsigned long gpfn, unsigned long mfn, paddr_t writable_pl1e)
-{
- struct out_of_sync_entry *entry =
- shadow_mark_mfn_out_of_sync(v, gpfn, mfn);
-
- entry->writable_pl1e = writable_pl1e;
- ESH_LOG("<shadow_mark_va_out_of_sync_2mp> gpfn = %lx\n", gpfn);
- if ( !get_shadow_ref(writable_pl1e >> L1_PAGETABLE_SHIFT) )
- BUG();
-}
-
-static int get_shadow_mfn(struct domain *d, unsigned long gpfn, unsigned long *spmfn, u32 flag)
-{
- unsigned long gmfn;
- if ( !(*spmfn = __shadow_status(d, gpfn, flag)) )
- {
- /* This is NOT already shadowed so we need to shadow it. */
- SH_VVLOG("<get_shadow_mfn>: not shadowed");
-
- gmfn = gmfn_to_mfn(d, gpfn);
- if ( unlikely(!VALID_MFN(gmfn)) )
- {
- // Attempt to use an invalid pfn as an shadow page.
- // XXX this needs to be more graceful!
- BUG();
- }
-
- if ( unlikely(!(*spmfn =
- alloc_shadow_page(d, gpfn, gmfn, flag))) )
- {
- printk("<get_shadow_mfn>Couldn't alloc an shadow for pfn=%lx mfn=%lx\n", gpfn, gmfn);
- BUG(); /* XXX Need to deal gracefully with failure. */
- }
- switch(flag) {
- case PGT_l1_shadow:
- perfc_incrc(shadow_l1_table_count);
- break;
- case PGT_l2_shadow:
- perfc_incrc(shadow_l2_table_count);
- break;
- case PGT_l3_shadow:
- perfc_incrc(shadow_l3_table_count);
- break;
- case PGT_hl2_shadow:
- perfc_incrc(shadow_hl2_table_count);
- break;
- }
-
- return 1;
- } else {
- /* This L1 is shadowed already, but the L2 entry is missing. */
- SH_VVLOG("4b: was shadowed, l2 missing (%lx)", *spmfn);
- return 0;
- }
-}
-
-static void shadow_map_into_current(struct vcpu *v,
- unsigned long va, unsigned int from, unsigned int to)
-{
- pgentry_64_t gle = {0}, sle;
- unsigned long gpfn, smfn;
-
- if (from == PAGING_L1 && to == PAGING_L2) {
- shadow_map_l1_into_current_l2(va);
- return;
- }
-
- __rw_entry(v, va, &gle, GUEST_ENTRY | GET_ENTRY | to);
- ASSERT(entry_get_flags(gle) & _PAGE_PRESENT);
- gpfn = entry_get_pfn(gle);
-
- get_shadow_mfn(v->domain, gpfn, &smfn, shadow_level_to_type(from));
-
- if ( !get_shadow_ref(smfn) )
- BUG();
- entry_general(v->domain, &gle, &sle, smfn, to);
- __rw_entry(v, va, &gle, GUEST_ENTRY | SET_ENTRY | to);
- __rw_entry(v, va, &sle, SHADOW_ENTRY | SET_ENTRY | to);
-}
-
-/*
- * shadow_set_lxe should be put in shadow.h
- */
-static void shadow_set_l2e_64(unsigned long va, l2_pgentry_t sl2e,
- int create_l2_shadow, int put_ref_check)
-{
- struct vcpu *v = current;
- l4_pgentry_t sl4e;
- l3_pgentry_t sl3e;
-
- __shadow_get_l4e(v, va, &sl4e);
- if (!(l4e_get_flags(sl4e) & _PAGE_PRESENT)) {
- if (create_l2_shadow) {
- perfc_incrc(shadow_set_l3e_force_map);
- shadow_map_into_current(v, va, PAGING_L3, PAGING_L4);
- __shadow_get_l4e(v, va, &sl4e);
- } else {
- printk("For non HVM shadow, create_l1_shadow:%d\n", create_l2_shadow);
- }
- }
-
- __shadow_get_l3e(v, va, &sl3e);
- if (!(l3e_get_flags(sl3e) & _PAGE_PRESENT)) {
- if (create_l2_shadow) {
- perfc_incrc(shadow_set_l2e_force_map);
- shadow_map_into_current(v, va, PAGING_L2, PAGING_L3);
- __shadow_get_l3e(v, va, &sl3e);
- } else {
- printk("For non HVM shadow, create_l1_shadow:%d\n", create_l2_shadow);
- }
-
- if ( v->domain->arch.ops->guest_paging_levels == PAGING_L4 )
- shadow_update_min_max(l4e_get_pfn(sl4e), l3_table_offset(va));
- }
-
- if ( put_ref_check ) {
- l2_pgentry_t tmp_sl2e;
- if ( __shadow_get_l2e(v, va, &tmp_sl2e) ) {
- if ( l2e_get_flags(tmp_sl2e) & _PAGE_PRESENT )
- if ( l2e_get_pfn(tmp_sl2e) == l2e_get_pfn(sl2e) ) {
- put_shadow_ref(l2e_get_pfn(sl2e));
- }
- }
-
- }
-
- if (! __shadow_set_l2e(v, va, &sl2e))
- BUG();
- shadow_update_min_max(l3e_get_pfn(sl3e), l2_table_offset(va));
-}
-
-
-/* As 32-bit guest don't support 4M page yet,
- * we don't concern double compile for this function
- */
-static inline int l2e_rw_fault(
- struct vcpu *v, l2_pgentry_t *gl2e_p, unsigned long va, int rw)
-{
- struct domain *d = v->domain;
- l2_pgentry_t gl2e = *gl2e_p;
- l2_pgentry_t tmp_l2e = gl2e;
- unsigned long start_gpfn = l2e_get_pfn(gl2e);
- unsigned long gpfn, mfn;
- unsigned long l1_mfn, gmfn;
- l1_pgentry_t *l1_p;
- l1_pgentry_t sl1e;
- l1_pgentry_t old_sl1e;
- l2_pgentry_t sl2e;
-#ifdef __x86_64__
- u64 nx = 0;
-#endif
- int put_ref_check = 0;
- /* Check if gpfn is 2M aligned */
-
- /* Update guest l2e */
- if (rw) {
- ASSERT(l2e_get_flags(gl2e) & _PAGE_RW);
- l2e_add_flags(gl2e, _PAGE_DIRTY | _PAGE_ACCESSED);
- } else {
- l2e_add_flags(gl2e, _PAGE_ACCESSED);
- }
-
- l2e_remove_flags(tmp_l2e, _PAGE_PSE);
- if (l2e_get_flags(gl2e) & _PAGE_NX) {
- l2e_remove_flags(tmp_l2e, _PAGE_NX);
-#ifdef __x86_64__
- nx = PGT_high_mfn_nx;
-#endif
- }
-
-
- /* Get the shadow l2 first */
- if ( !__shadow_get_l2e(v, va, &sl2e) )
- sl2e = l2e_empty();
-
-#ifdef __x86_64__
- l1_mfn = __shadow_status(d, start_gpfn | nx, PGT_fl1_shadow);
-#else
- l1_mfn = __shadow_status(d, start_gpfn, PGT_fl1_shadow);
-#endif
-
- /* Check the corresponding l2e */
- if (l1_mfn) {
- /* Why it is PRESENT?*/
- if ((l2e_get_flags(sl2e) & _PAGE_PRESENT) &&
- l2e_get_pfn(sl2e) == l1_mfn) {
- ESH_LOG("sl2e PRSENT bit is set: %lx, l1_mfn = %lx\n", l2e_get_pfn(sl2e), l1_mfn);
- } else {
- put_ref_check = 1;
- if (!get_shadow_ref(l1_mfn))
- BUG();
- }
- l1_p = (l1_pgentry_t *)map_domain_page(l1_mfn);
- sl2e = l2e_from_pfn(l1_mfn, l2e_get_flags(tmp_l2e));
- } else {
- /* Allocate a new page as shadow page table if need */
- gmfn = gmfn_to_mfn(d, start_gpfn);
-#ifdef __x86_64__
- l1_mfn = alloc_shadow_page(d, start_gpfn | nx, gmfn, PGT_fl1_shadow);
-#else
- l1_mfn = alloc_shadow_page(d, start_gpfn, gmfn, PGT_fl1_shadow);
-#endif
- if (unlikely(!l1_mfn)) {
- BUG();
- }
-
- if (!get_shadow_ref(l1_mfn))
- BUG();
- l1_p = (l1_pgentry_t *)map_domain_page(l1_mfn );
- sl2e = l2e_from_pfn(l1_mfn, l2e_get_flags(tmp_l2e));
- memset(l1_p, 0, PAGE_SIZE);
- ESH_LOG("Alloc a shadow page: %lx\n", l1_mfn);
- }
-
- ESH_LOG("<%s>: sl2e = %lx\n", __func__, l2e_get_intpte(sl2e));
- /* Map the page to l2*/
- shadow_set_l2e_64(va, sl2e, 1, put_ref_check);
-
- if (l2e_get_flags(gl2e) & _PAGE_NX)
- l2e_add_flags(tmp_l2e, _PAGE_NX);
-
- /* Propagate the shadow page table, i.e. setting sl1e */
- for (gpfn = start_gpfn;
- gpfn < (start_gpfn + L1_PAGETABLE_ENTRIES); gpfn++) {
-
- mfn = gmfn_to_mfn(d, gpfn);
-
- if ( unlikely(!VALID_MFN(mfn)) )
- {
- continue;
- }
-
- sl1e = l1e_from_pfn(mfn, l2e_get_flags(tmp_l2e));
-
- if (!rw) {
- if ( shadow_mode_log_dirty(d) ||
- !(l2e_get_flags(gl2e) & _PAGE_DIRTY) || mfn_is_page_table(mfn) )
- {
- l1e_remove_flags(sl1e, _PAGE_RW);
- }
- } else {
- /* __mark_dirty(d, gmfn); */
- }
- // printk("<%s> gpfn: %lx, mfn: %lx, sl1e: %lx\n", __func__, gpfn, mfn, l1e_get_intpte(sl1e));
- /* The shadow entrys need setup before shadow_mark_va_out_of_sync()*/
- old_sl1e = l1_p[gpfn - start_gpfn];
-
- if ( l1e_has_changed(old_sl1e, sl1e, _PAGE_RW | _PAGE_PRESENT) )
- {
- if ( (l1e_get_flags(sl1e) & _PAGE_PRESENT) &&
- !shadow_get_page_from_l1e(sl1e, d) ) {
- ESH_LOG("%lx, mfn: %lx why make me empty, start_pfn: %lx, gpfn: %lx\n", l1e_get_intpte(sl1e),mfn, start_gpfn, gpfn);
- sl1e = l1e_empty();
- }
- if ( l1e_get_flags(old_sl1e) & _PAGE_PRESENT )
- put_page_from_l1e(old_sl1e, d);
- }
-
- if (rw) {
- /* shadow_mark_va_out_of_sync() need modificatin for 2M pages*/
- if ( mfn_is_page_table(mfn) )
- shadow_mark_va_out_of_sync_2mp(v, gpfn, mfn,
- l2e_get_paddr(sl2e) | (sizeof(l1_pgentry_t) * (gpfn - start_gpfn)));
- }
-
- l1_p[gpfn - start_gpfn] = sl1e;
- }
-
- unmap_domain_page(l1_p);
- *gl2e_p = gl2e;
- return 1;
-}
-
-/*
- * Check P, R/W, U/S bits in the guest page table.
- * If the fault belongs to guest return 1,
- * else return 0.
- */
-#if defined( GUEST_PGENTRY_32 )
-static inline int guest_page_fault(
- struct vcpu *v,
- unsigned long va, unsigned int error_code,
- guest_l2_pgentry_t *gpl2e, guest_l1_pgentry_t *gpl1e)
-{
- /* The following check for 32-bit guest on 64-bit host */
-
- __guest_get_l2e(v, va, gpl2e);
-
- /* Check the guest L2 page-table entry first*/
- if ( unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_PRESENT)) )
- return 1;
-
- if ( error_code & ERROR_W )
- {
- if ( unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_RW)) )
- return 1;
- }
-
- if ( error_code & ERROR_U )
- {
- if ( unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_USER)) )
- return 1;
- }
-
- if ( guest_l2e_get_flags(*gpl2e) & _PAGE_PSE )
- {
- printk("None-PAE HVM guests can NOT use PSE, "
- "because we don't support 4MBytes PSE pages.\n");
- printk("remove pae=1 from your config file.\n");
- domain_crash_synchronous();
- return 0;
- }
-
- __guest_get_l1e(v, va, gpl1e);
-
- /* Then check the guest L1 page-table entry */
- if ( unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_PRESENT)) )
- return 1;
-
- if ( error_code & ERROR_W )
- {
- if ( unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_RW)) )
- return 1;
- }
-
- if ( error_code & ERROR_U )
- {
- if ( unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_USER)) )
- return 1;
- }
-
- return 0;
-}
-#else
-static inline int guest_page_fault(
- struct vcpu *v,
- unsigned long va, unsigned int error_code,
- guest_l2_pgentry_t *gpl2e, guest_l1_pgentry_t *gpl1e)
-{
- struct domain *d = v->domain;
- pgentry_64_t gle = { 0 };
- unsigned long gpfn = 0, mfn;
- int i;
- unsigned int base_idx = 0;
- base_idx = get_cr3_idxval(v);
-
- ASSERT( d->arch.ops->guest_paging_levels >= PAGING_L3 );
-
-#if CONFIG_PAGING_LEVELS >= 3
- if ( (error_code & (ERROR_I | ERROR_P)) == (ERROR_I | ERROR_P) )
- return 1;
-#endif
-
-#if CONFIG_PAGING_LEVELS == 4
- if ( d->arch.ops->guest_paging_levels == PAGING_L4 )
- {
- __rw_entry(v, va, &gle, GUEST_ENTRY | GET_ENTRY | PAGING_L4);
- if ( unlikely(!(entry_get_flags(gle) & _PAGE_PRESENT)) )
- return 1;
-
- if ( error_code & ERROR_W )
- {
- if ( unlikely(!(entry_get_flags(gle) & _PAGE_RW)) )
- return 1;
- }
-
- if ( error_code & ERROR_U )
- {
- if ( unlikely(!(entry_get_flags(gle) & _PAGE_USER)) )
- return 1;
- }
- gpfn = entry_get_pfn(gle);
- }
-#endif
-
-#if CONFIG_PAGING_LEVELS >= 3
- if ( d->arch.ops->guest_paging_levels == PAGING_L3 )
- {
- if ( SH_GUEST_32PAE )
- gpfn = (hvm_get_guest_ctrl_reg(v, 3)) >> PAGE_SHIFT;
- else
- gpfn = pagetable_get_pfn(v->arch.guest_table);
- }
-#endif
-
- for ( i = PAGING_L3; i >= PAGING_L1; i-- )
- {
- pgentry_64_t *lva;
- /*
- * If it's not external mode, then mfn should be machine physical.
- */
- mfn = gmfn_to_mfn(d, gpfn);
-
- lva = (pgentry_64_t *) map_domain_page(mfn);
- gle = lva[guest_table_offset_64(va, i, base_idx)];
-
- unmap_domain_page(lva);
-
- gpfn = entry_get_pfn(gle);
-
- if ( unlikely(!(entry_get_flags(gle) & _PAGE_PRESENT)) )
- return 1;
-
- if ( i < PAGING_L3 ||
- d->arch.ops->guest_paging_levels == PAGING_L4 )
- {
- if ( error_code & ERROR_W )
- {
- if ( unlikely(!(entry_get_flags(gle) & _PAGE_RW)) )
- {
- if ( i == PAGING_L1 )
- if ( gpl1e )
- gpl1e->l1 = gle.lo;
- return 1;
- }
- }
- if ( error_code & ERROR_U )
- {
- if ( unlikely(!(entry_get_flags(gle) & _PAGE_USER)) )
- return 1;
- }
- }
-
- if ( i == PAGING_L2 )
- {
- if ( gpl2e )
- gpl2e->l2 = gle.lo;
- if ( likely(entry_get_flags(gle) & _PAGE_PSE) )
- return 0;
- }
-
- if ( i == PAGING_L1 )
- if ( gpl1e )
- gpl1e->l1 = gle.lo;
- }
-
- return 0;
-
-}
-#endif
-
-static int shadow_fault_64(unsigned long va, struct cpu_user_regs *regs)
-{
- struct vcpu *v = current;
- struct domain *d = v->domain;
- guest_l2_pgentry_t gl2e;
- guest_l1_pgentry_t gl1e, orig_gl1e;
- l1_pgentry_t sl1e;
-
- gl1e = guest_l1e_empty(); gl2e = guest_l2e_empty();
-
- sl1e = l1e_empty();
-
- perfc_incrc(shadow_fault_calls);
-
- ESH_LOG("<shadow_fault_64> va=%lx, rip = %lx, error code = %x\n",
- va, regs->eip, regs->error_code);
-
- /*
- * Don't let someone else take the guest's table pages out-of-sync.
- */
- shadow_lock(d);
-
- /*
- * STEP 1. Check to see if this fault might have been caused by an
- * out-of-sync table page entry, or if we should pass this
- * fault onto the guest.
- */
- __shadow_sync_va(v, va);
-
- /*
- * STEP 2. Check if the fault belongs to guest
- */
- if ( guest_page_fault(v, va, regs->error_code, &gl2e, &gl1e) )
- {
- if ( unlikely(shadow_mode_log_dirty(d)) && l1e_get_intpte(gl1e) != 0 )
- goto check_writeable;
-
- goto fail;
- }
-
- if ( unlikely((guest_l2e_get_flags(gl2e) & _PAGE_PSE)) )
- goto pse;
-
- /*
- * Handle 4K pages here
- */
-check_writeable:
- orig_gl1e = gl1e;
-
- /* Write fault? */
- if ( regs->error_code & 2 )
- {
- int allow_writes = 0;
-
- if ( unlikely(!(guest_l1e_get_flags(gl1e) & _PAGE_RW)) )
- {
- if ( shadow_mode_page_writable(va, regs, l1e_get_pfn(gl1e)) )
- {
- allow_writes = 1;
- l1e_add_flags(gl1e, _PAGE_RW);
- }
- else
- {
- /* Write fault on a read-only mapping. */
- SH_VVLOG("shadow_fault - EXIT: wr fault on RO page (%" PRIpte ")",
- l1e_get_intpte(gl1e));
- perfc_incrc(shadow_fault_bail_ro_mapping);
- goto fail;
- }
- }
-
- if ( !l1pte_write_fault(v, &gl1e, &sl1e, va) )
- {
- SH_VVLOG("shadow_fault - EXIT: l1pte_write_fault failed");
- perfc_incrc(write_fault_bail);
- shadow_unlock(d);
- return 0;
- }
-
- if (allow_writes)
- l1e_remove_flags(gl1e, _PAGE_RW);
- }
- else
- {
- if ( !l1pte_read_fault(d, &gl1e, &sl1e) )
- {
- SH_VVLOG("shadow_fault - EXIT: l1pte_read_fault failed");
- perfc_incrc(read_fault_bail);
- shadow_unlock(d);
- return 0;
- }
- }
-
- /*
- * STEP 3. Write the modified shadow PTE and guest PTE back to the tables
- */
- if ( l1e_has_changed(orig_gl1e, gl1e, PAGE_FLAG_MASK) )
- {
- if (unlikely(!__guest_set_l1e(v, va, &gl1e)))
- domain_crash_synchronous();
-
- __mark_dirty(d, gmfn_to_mfn(d, l2e_get_pfn(gl2e)));
- }
-
- shadow_set_l1e_64(va, (pgentry_64_t *)&sl1e, 1);
-
- perfc_incrc(shadow_fault_fixed);
- d->arch.shadow_fault_count++;
-
- shadow_unlock(d);
-
- return EXCRET_fault_fixed;
-
-pse:
- /*
- * Handle 2M pages here
- */
- if ( unlikely(!shadow_mode_external(d)) )
- BUG();
-
- /* Write fault? */
- if ( regs->error_code & 2 )
- {
- if ( !l2e_rw_fault(v, (l2_pgentry_t *)&gl2e, va, WRITE_FAULT) )
- {
- goto fail;
- }
- }
- else
- {
- l2e_rw_fault(v, (l2_pgentry_t *)&gl2e, va, READ_FAULT);
- }
-
- /*
- * STEP 3. Write guest/shadow l2e back
- */
-
- if ( unlikely(!__guest_set_l2e(v, va, &gl2e)) )
- {
- domain_crash_synchronous();
- }
-
- /*
- * Todo: if necessary, record the page table page as dirty
- */
-
- perfc_incrc(shadow_fault_fixed);
- d->arch.shadow_fault_count++;
-
- shadow_unlock(d);
-
- return EXCRET_fault_fixed;
-fail:
- shadow_unlock(d);
- ESH_LOG("Guest fault~~~\n");
- return 0;
-}
-
-static void shadow_invlpg_64(struct vcpu *v, unsigned long va)
-{
- struct domain *d = v->domain;
- l1_pgentry_t sl1e, old_sl1e;
-
- shadow_lock(d);
-
- __shadow_sync_va(v, va);
-
- if ( shadow_mode_external(d) && __shadow_get_l1e(v, va, &old_sl1e) )
- if ( l1e_get_flags(old_sl1e) & _PAGE_PRESENT )
- put_page_from_l1e(old_sl1e, d);
-
- sl1e = l1e_empty();
- __shadow_set_l1e(v, va, &sl1e);
-
- shadow_unlock(d);
-}
-
-static unsigned long gva_to_gpa_64(unsigned long gva)
-{
- struct vcpu *v = current;
- guest_l1_pgentry_t gl1e = {0};
- guest_l2_pgentry_t gl2e = {0};
- unsigned long gpa;
-
- if (guest_page_fault(v, gva, 0, &gl2e, &gl1e))
- return 0;
-
- if (guest_l2e_get_flags(gl2e) & _PAGE_PSE)
- gpa = guest_l2e_get_paddr(gl2e) + (gva & ((1 << GUEST_L2_PAGETABLE_SHIFT) - 1));
- else
- gpa = guest_l1e_get_paddr(gl1e) + (gva & ~PAGE_MASK);
-
- return gpa;
-}
-
-/*
- * The naming convention of the shadow_ops:
- * MODE_<pgentry size>_<guest paging levels>_HANDLER
- */
-#if (!defined(GUEST_PGENTRY_32) && !defined(GUEST_32PAE))
-struct shadow_ops MODE_64_3_HANDLER = {
- .guest_paging_levels = 3,
- .invlpg = shadow_invlpg_64,
- .fault = shadow_fault_64,
- .update_pagetables = shadow_update_pagetables,
- .sync_all = sync_all,
- .remove_all_write_access = remove_all_write_access,
- .do_update_va_mapping = do_update_va_mapping,
- .mark_mfn_out_of_sync = mark_mfn_out_of_sync,
- .is_out_of_sync = is_out_of_sync,
- .gva_to_gpa = gva_to_gpa_pae,
-};
-
-struct shadow_ops MODE_64_4_HANDLER = {
- .guest_paging_levels = 4,
- .invlpg = shadow_invlpg_64,
- .fault = shadow_fault_64,
- .update_pagetables = shadow_update_pagetables,
- .sync_all = sync_all,
- .remove_all_write_access = remove_all_write_access,
- .do_update_va_mapping = do_update_va_mapping,
- .mark_mfn_out_of_sync = mark_mfn_out_of_sync,
- .is_out_of_sync = is_out_of_sync,
- .gva_to_gpa = gva_to_gpa_64,
-};
-#endif /* GUEST_PGENTRY_32 */
-#endif /* CONFIG_PAGING_LEVELS >= 3 */
-
-
-#if CONFIG_PAGING_LEVELS == 2
-struct shadow_ops MODE_32_2_HANDLER = {
- .guest_paging_levels = 2,
- .invlpg = shadow_invlpg_32,
- .fault = shadow_fault_32,
- .update_pagetables = shadow_update_pagetables,
- .sync_all = sync_all,
- .remove_all_write_access = remove_all_write_access,
- .do_update_va_mapping = do_update_va_mapping,
- .mark_mfn_out_of_sync = mark_mfn_out_of_sync,
- .is_out_of_sync = is_out_of_sync,
- .gva_to_gpa = gva_to_gpa_64,
-};
-#endif
-
-#if ( CONFIG_PAGING_LEVELS == 3 && !defined (GUEST_PGENTRY_32) && !defined (GUEST_32PAE) ) || \
- ( CONFIG_PAGING_LEVELS == 4 && defined (GUEST_PGENTRY_32) )
-
-
-/*
- * Use GUEST_PGENTRY_32 to force PAE_SHADOW_SELF_ENTRY for L4.
- *
- * Very simple shadow code to handle 1:1 direct mapping for guest
- * non-paging code, which actually is running in PAE/vm86 mode with
- * paging-enabled.
- *
- * We expect that the top level (L3) page has been allocated and initialized.
- */
-int shadow_direct_map_fault(unsigned long vpa, struct cpu_user_regs *regs)
-{
- struct vcpu *v = current;
- struct domain *d = v->domain;
- l3_pgentry_t sl3e, *sl3e_p;
- l2_pgentry_t sl2e, *sl2e_p;
- l1_pgentry_t sl1e;
- unsigned long mfn, smfn;
- struct page_info *page;
-
- /*
- * If the faulting address is within the MMIO range, we continue
- * on handling the #PF as such.
- */
- if ( (mfn = get_mfn_from_gpfn(vpa >> PAGE_SHIFT)) == INVALID_MFN )
- return 0;
-
- shadow_lock(d);
-
- __direct_get_l3e(v, vpa, &sl3e);
-
- if ( !(l3e_get_flags(sl3e) & _PAGE_PRESENT) )
- {
- page = alloc_domheap_page(NULL);
- if ( !page )
- goto nomem;
-
- smfn = page_to_mfn(page);
- sl3e = l3e_from_pfn(smfn, _PAGE_PRESENT);
-
- sl3e_p = (l3_pgentry_t *)map_domain_page(smfn);
- memset(sl3e_p, 0, PAGE_SIZE);
- unmap_domain_page(sl3e_p);
-
- __direct_set_l3e(v, vpa, &sl3e);
- }
-
- __direct_get_l2e(v, vpa, &sl2e);
-
- if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) )
- {
- page = alloc_domheap_page(NULL);
- if ( !page )
- goto nomem;
-
- smfn = page_to_mfn(page);
- sl2e = l2e_from_pfn(smfn, __PAGE_HYPERVISOR | _PAGE_USER);
- sl2e_p = (l2_pgentry_t *)map_domain_page(smfn);
- memset(sl2e_p, 0, PAGE_SIZE);
- unmap_domain_page(sl2e_p);
-
- __direct_set_l2e(v, vpa, &sl2e);
- }
-
- __direct_get_l1e(v, vpa, &sl1e);
-
- if ( !(l1e_get_flags(sl1e) & _PAGE_PRESENT) )
- {
- sl1e = l1e_from_pfn(mfn, __PAGE_HYPERVISOR | _PAGE_USER);
- __direct_set_l1e(v, vpa, &sl1e);
- }
-
- shadow_unlock(d);
- return EXCRET_fault_fixed;
-
-nomem:
- shadow_direct_map_clean(d);
- domain_crash_synchronous();
-}
-#endif
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */