aboutsummaryrefslogtreecommitdiffstats
path: root/xen/arch/x86/shadow_public.c
diff options
context:
space:
mode:
Diffstat (limited to 'xen/arch/x86/shadow_public.c')
-rw-r--r--xen/arch/x86/shadow_public.c2143
1 files changed, 0 insertions, 2143 deletions
diff --git a/xen/arch/x86/shadow_public.c b/xen/arch/x86/shadow_public.c
deleted file mode 100644
index 40aa22e4ea..0000000000
--- a/xen/arch/x86/shadow_public.c
+++ /dev/null
@@ -1,2143 +0,0 @@
-/******************************************************************************
- * arch/x86/shadow_public.c
- *
- * Copyright (c) 2005 Michael A Fetterman
- * Based on an earlier implementation by Ian Pratt et al
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-
-#include <xen/config.h>
-#include <xen/types.h>
-#include <xen/mm.h>
-#include <xen/domain_page.h>
-#include <asm/shadow.h>
-#include <asm/page.h>
-#include <xen/event.h>
-#include <xen/sched.h>
-#include <xen/trace.h>
-#include <xen/guest_access.h>
-#include <asm/shadow_64.h>
-
-static int alloc_p2m_table(struct domain *d);
-static void free_p2m_table(struct domain *d);
-
-#define SHADOW_MAX_GUEST32(_encoded) ((L1_PAGETABLE_ENTRIES_32 - 1) - ((_encoded) >> 16))
-
-
-int shadow_direct_map_init(struct domain *d)
-{
- struct page_info *page;
- l3_pgentry_t *root;
-
- if ( !(page = alloc_domheap_pages(NULL, 0, MEMF_dma)) )
- return 0;
-
- root = map_domain_page(page_to_mfn(page));
- memset(root, 0, PAGE_SIZE);
- root[PAE_SHADOW_SELF_ENTRY] = l3e_from_page(page, __PAGE_HYPERVISOR);
-
- d->arch.phys_table = pagetable_from_page(page);
-
- unmap_domain_page(root);
- return 1;
-}
-
-void shadow_direct_map_clean(struct domain *d)
-{
- unsigned long mfn;
- l2_pgentry_t *l2e;
- l3_pgentry_t *l3e;
- int i, j;
-
- mfn = pagetable_get_pfn(d->arch.phys_table);
-
- /*
- * We may fail very early before direct map is built.
- */
- if ( !mfn )
- return;
-
- l3e = (l3_pgentry_t *)map_domain_page(mfn);
-
- for ( i = 0; i < PAE_L3_PAGETABLE_ENTRIES; i++ )
- {
- if ( l3e_get_flags(l3e[i]) & _PAGE_PRESENT )
- {
- l2e = map_domain_page(l3e_get_pfn(l3e[i]));
-
- for ( j = 0; j < L2_PAGETABLE_ENTRIES; j++ )
- {
- if ( l2e_get_flags(l2e[j]) & _PAGE_PRESENT )
- free_domheap_page(mfn_to_page(l2e_get_pfn(l2e[j])));
- }
- unmap_domain_page(l2e);
- free_domheap_page(mfn_to_page(l3e_get_pfn(l3e[i])));
- }
- }
- free_domheap_page(mfn_to_page(mfn));
-
- unmap_domain_page(l3e);
-
- d->arch.phys_table = pagetable_null();
-}
-
-/****************************************************************************/
-/************* export interface functions ***********************************/
-/****************************************************************************/
-void free_shadow_pages(struct domain *d);
-
-int shadow_set_guest_paging_levels(struct domain *d, int levels)
-{
- struct vcpu *v = current;
-
- /*
- * Need to wait for VCPU0 to complete the on-going shadow ops.
- */
-
- if ( v->domain == d && v->vcpu_id )
- return 1;
-
- shadow_lock(d);
-
- switch(levels) {
-#if CONFIG_PAGING_LEVELS == 4
- case 4:
- if ( d->arch.ops != &MODE_64_4_HANDLER )
- d->arch.ops = &MODE_64_4_HANDLER;
- shadow_unlock(d);
- return 1;
-#endif
-#if CONFIG_PAGING_LEVELS == 3
- case 3:
- if ( d->arch.ops == NULL ||
- shadow_mode_log_dirty(d) )
- {
- if ( d->arch.ops != &MODE_64_3_HANDLER )
- d->arch.ops = &MODE_64_3_HANDLER;
- }
- else
- {
- if ( d->arch.ops == &MODE_64_2_HANDLER )
- free_shadow_pages(d);
- if ( d->arch.ops != &MODE_64_PAE_HANDLER )
- d->arch.ops = &MODE_64_PAE_HANDLER;
- }
- shadow_unlock(d);
- return 1;
-#endif
-#if CONFIG_PAGING_LEVELS == 4
- case 3:
- if ( d->arch.ops == &MODE_64_2_HANDLER )
- free_shadow_pages(d);
- if ( d->arch.ops != &MODE_64_PAE_HANDLER )
- d->arch.ops = &MODE_64_PAE_HANDLER;
- shadow_unlock(d);
- return 1;
-#endif
- case 2:
-#if CONFIG_PAGING_LEVELS == 2
- if ( d->arch.ops != &MODE_32_2_HANDLER )
- d->arch.ops = &MODE_32_2_HANDLER;
-#elif CONFIG_PAGING_LEVELS >= 3
- if ( d->arch.ops != &MODE_64_2_HANDLER )
- d->arch.ops = &MODE_64_2_HANDLER;
-#endif
- shadow_unlock(d);
- return 1;
- default:
- shadow_unlock(d);
- return 0;
- }
-}
-
-void shadow_invlpg(struct vcpu *v, unsigned long va)
-{
- struct domain *d = current->domain;
- d->arch.ops->invlpg(v, va);
-}
-
-int shadow_fault(unsigned long va, struct cpu_user_regs *regs)
-{
- struct domain *d = current->domain;
- return d->arch.ops->fault(va, regs);
-}
-
-void __update_pagetables(struct vcpu *v)
-{
- struct domain *d = v->domain;
- d->arch.ops->update_pagetables(v);
-}
-
-void __shadow_sync_all(struct domain *d)
-{
- d->arch.ops->sync_all(d);
-}
-
-int shadow_remove_all_write_access(
- struct domain *d, unsigned long readonly_gpfn, unsigned long readonly_gmfn)
-{
- return d->arch.ops->remove_all_write_access(d, readonly_gpfn, readonly_gmfn);
-}
-
-int shadow_do_update_va_mapping(unsigned long va,
- l1_pgentry_t val,
- struct vcpu *v)
-{
- struct domain *d = v->domain;
- return d->arch.ops->do_update_va_mapping(va, val, v);
-}
-
-struct out_of_sync_entry *
-shadow_mark_mfn_out_of_sync(struct vcpu *v, unsigned long gpfn,
- unsigned long mfn)
-{
- struct domain *d = v->domain;
- return d->arch.ops->mark_mfn_out_of_sync(v, gpfn, mfn);
-}
-
-/*
- * Returns 1 if va's shadow mapping is out-of-sync.
- * Returns 0 otherwise.
- */
-int __shadow_out_of_sync(struct vcpu *v, unsigned long va)
-{
- struct domain *d = v->domain;
- return d->arch.ops->is_out_of_sync(v, va);
-}
-
-unsigned long gva_to_gpa(unsigned long gva)
-{
- struct domain *d = current->domain;
- return d->arch.ops->gva_to_gpa(gva);
-}
-/****************************************************************************/
-/****************************************************************************/
-#if CONFIG_PAGING_LEVELS >= 3
-
-static void inline
-free_shadow_fl1_table(struct domain *d, unsigned long smfn)
-{
- l1_pgentry_t *pl1e = map_domain_page(smfn);
- int i;
-
- for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
- put_page_from_l1e(pl1e[i], d);
-
- unmap_domain_page(pl1e);
-}
-
-/*
- * Free l2, l3, l4 shadow tables
- */
-
-void free_fake_shadow_l2(struct domain *d,unsigned long smfn);
-
-static void inline
-free_shadow_tables(struct domain *d, unsigned long smfn, u32 level)
-{
- pgentry_64_t *ple = map_domain_page(smfn);
- int i, external = shadow_mode_external(d);
-
-#if CONFIG_PAGING_LEVELS >= 3
- if ( d->arch.ops->guest_paging_levels == PAGING_L2 )
- {
- struct page_info *page = mfn_to_page(smfn);
- for ( i = 0; i < PAE_L3_PAGETABLE_ENTRIES; i++ )
- {
- if ( entry_get_flags(ple[i]) & _PAGE_PRESENT )
- free_fake_shadow_l2(d, entry_get_pfn(ple[i]));
- }
-
- page = mfn_to_page(entry_get_pfn(ple[0]));
- free_domheap_pages(page, SL2_ORDER);
- unmap_domain_page(ple);
- }
- else
-#endif
- {
- /*
- * No Xen mappings in external pages
- */
- if ( external )
- {
- for ( i = 0; i < PAGETABLE_ENTRIES; i++ ) {
- if ( entry_get_flags(ple[i]) & _PAGE_PRESENT )
- put_shadow_ref(entry_get_pfn(ple[i]));
- if (d->arch.ops->guest_paging_levels == PAGING_L3)
- {
-#if CONFIG_PAGING_LEVELS >= 3
- if ( i == PAE_L3_PAGETABLE_ENTRIES && level == PAGING_L4 )
-#endif
- break;
- }
- }
- }
- else
- {
- for ( i = 0; i < PAGETABLE_ENTRIES; i++ )
- {
- /*
- * List the skip/break conditions to avoid freeing
- * Xen private mappings.
- */
-#if CONFIG_PAGING_LEVELS == 2
- if ( level == PAGING_L2 && !is_guest_l2_slot(0, i) )
- continue;
-#endif
-#if CONFIG_PAGING_LEVELS == 3
- if ( level == PAGING_L3 && i == L3_PAGETABLE_ENTRIES )
- break;
- if ( level == PAGING_L2 )
- {
- struct page_info *page = mfn_to_page(smfn);
- if ( is_xen_l2_slot(page->u.inuse.type_info, i) )
- continue;
- }
-#endif
-#if CONFIG_PAGING_LEVELS == 4
- if ( level == PAGING_L4 && !is_guest_l4_slot(i))
- continue;
-#endif
- if ( entry_get_flags(ple[i]) & _PAGE_PRESENT )
- put_shadow_ref(entry_get_pfn(ple[i]));
- }
- }
- unmap_domain_page(ple);
- }
-}
-#endif
-
-#if CONFIG_PAGING_LEVELS == 4
-static void alloc_monitor_pagetable(struct vcpu *v)
-{
- unsigned long mmfn;
- l4_pgentry_t *mpl4e;
- struct page_info *mmfn_info;
- struct domain *d = v->domain;
-
- ASSERT(!pagetable_get_paddr(v->arch.monitor_table)); /* we should only get called once */
-
- mmfn_info = alloc_domheap_page(NULL);
- ASSERT( mmfn_info );
- if (!mmfn_info)
- {
- printk("Fail to allocate monitor pagetable\n");
- domain_crash(v->domain);
- }
-
- mmfn = page_to_mfn(mmfn_info);
- mpl4e = (l4_pgentry_t *) map_domain_page_global(mmfn);
- memcpy(mpl4e, idle_pg_table, PAGE_SIZE);
- mpl4e[l4_table_offset(PERDOMAIN_VIRT_START)] =
- l4e_from_paddr(__pa(d->arch.mm_perdomain_l3), __PAGE_HYPERVISOR);
-
- /* map the phys_to_machine map into the per domain Read-Only MPT space */
-
- v->arch.monitor_table = pagetable_from_pfn(mmfn);
- v->arch.monitor_vtable = (l2_pgentry_t *) mpl4e;
- mpl4e[l4_table_offset(RO_MPT_VIRT_START)] = l4e_empty();
-
- if ( v->vcpu_id == 0 )
- alloc_p2m_table(d);
- else
- {
- unsigned long mfn;
-
- mfn = pagetable_get_pfn(d->vcpu[0]->arch.monitor_table);
- if ( mfn )
- {
- l4_pgentry_t *l4tab;
-
- l4tab = map_domain_page(mfn);
-
- mpl4e[l4_table_offset(RO_MPT_VIRT_START)] =
- l4tab[l4_table_offset(RO_MPT_VIRT_START)];
-
- unmap_domain_page(l4tab);
- }
- }
-}
-
-void free_monitor_pagetable(struct vcpu *v)
-{
- unsigned long mfn;
-
- /*
- * free monitor_table.
- */
- if ( v->vcpu_id == 0 )
- free_p2m_table(v->domain);
-
- /*
- * Then free monitor_table.
- */
- mfn = pagetable_get_pfn(v->arch.monitor_table);
- unmap_domain_page_global(v->arch.monitor_vtable);
- free_domheap_page(mfn_to_page(mfn));
-
- v->arch.monitor_table = pagetable_null();
- v->arch.monitor_vtable = 0;
-}
-#elif CONFIG_PAGING_LEVELS == 3
-static void alloc_monitor_pagetable(struct vcpu *v)
-{
- unsigned long m2mfn, m3mfn;
- l2_pgentry_t *mpl2e;
- l3_pgentry_t *mpl3e;
- struct page_info *m2mfn_info, *m3mfn_info;
- struct domain *d = v->domain;
- int i;
-
- ASSERT(!pagetable_get_paddr(v->arch.monitor_table)); /* we should only get called once */
-
- m3mfn_info = alloc_domheap_pages(NULL, 0, MEMF_dma);
- ASSERT( m3mfn_info );
-
- m3mfn = page_to_mfn(m3mfn_info);
- mpl3e = (l3_pgentry_t *) map_domain_page_global(m3mfn);
- memset(mpl3e, 0, L3_PAGETABLE_ENTRIES * sizeof(l3_pgentry_t));
-
- v->arch.monitor_table = pagetable_from_pfn(m3mfn);
- v->arch.monitor_vtable = (l2_pgentry_t *) mpl3e;
-
- m2mfn_info = alloc_domheap_page(NULL);
- ASSERT( m2mfn_info );
-
- m2mfn = page_to_mfn(m2mfn_info);
- mpl2e = (l2_pgentry_t *) map_domain_page(m2mfn);
- memset(mpl2e, 0, PAGE_SIZE);
-
- /* Map L2 page into L3 */
- mpl3e[L3_PAGETABLE_ENTRIES - 1] = l3e_from_pfn(m2mfn, _PAGE_PRESENT);
-
- memcpy(&mpl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)],
- &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT],
- L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
-
- for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
- mpl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
- l2e_from_page(
- virt_to_page(d->arch.mm_perdomain_pt) + i,
- __PAGE_HYPERVISOR);
- for ( i = 0; i < (LINEARPT_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
- mpl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] =
- (l3e_get_flags(mpl3e[i]) & _PAGE_PRESENT) ?
- l2e_from_pfn(l3e_get_pfn(mpl3e[i]), __PAGE_HYPERVISOR) :
- l2e_empty();
- for ( i = 0; i < (MACHPHYS_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
- mpl2e[l2_table_offset(RO_MPT_VIRT_START) + i] = l2e_empty();
-
- if ( v->vcpu_id == 0 )
- {
- unsigned long m1mfn;
- l1_pgentry_t *mpl1e;
- struct page_info *m1mfn_info;
-
- /*
- * 2 l2 slots are allocated here, so that 4M for p2m table,
- * with this we can guarantee PCI MMIO p2m entries, especially
- * Cirrus VGA, can be seen by all other vcpus.
- */
- for ( i = 0; i < 2; i++ )
- {
- m1mfn_info = alloc_domheap_page(NULL);
- ASSERT( m1mfn_info );
-
- m1mfn = page_to_mfn(m1mfn_info);
- mpl1e = (l1_pgentry_t *) map_domain_page(m1mfn);
- memset(mpl1e, 0, PAGE_SIZE);
- unmap_domain_page(mpl1e);
-
- /* Map L1 page into L2 */
- mpl2e[l2_table_offset(RO_MPT_VIRT_START) + i] =
- l2e_from_pfn(m1mfn, __PAGE_HYPERVISOR);
- }
-
- alloc_p2m_table(d);
- }
- else
- {
- unsigned long mfn;
-
- mfn = pagetable_get_pfn(d->vcpu[0]->arch.monitor_table);
- if ( mfn )
- {
- l3_pgentry_t *l3tab, l3e;
- l2_pgentry_t *l2tab;
-
- l3tab = map_domain_page(mfn);
- l3e = l3tab[l3_table_offset(RO_MPT_VIRT_START)];
-
- /*
- * NB: when CONFIG_PAGING_LEVELS == 3,
- * (entry_get_flags(l3e) & _PAGE_PRESENT) is always true here.
- * alloc_monitor_pagetable should guarantee this.
- */
- if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
- BUG();
-
- l2tab = map_domain_page(l3e_get_pfn(l3e));
-
- for ( i = 0; i < (MACHPHYS_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
- mpl2e[l2_table_offset(RO_MPT_VIRT_START) + i] =
- l2tab[l2_table_offset(RO_MPT_VIRT_START) + i];
-
- unmap_domain_page(l2tab);
- unmap_domain_page(l3tab);
- }
- }
-
- unmap_domain_page(mpl2e);
-}
-
-void free_monitor_pagetable(struct vcpu *v)
-{
- unsigned long m2mfn, m3mfn;
- /*
- * free monitor_table.
- */
- if ( v->vcpu_id == 0 )
- free_p2m_table(v->domain);
-
- m3mfn = pagetable_get_pfn(v->arch.monitor_table);
- m2mfn = l2e_get_pfn(v->arch.monitor_vtable[L3_PAGETABLE_ENTRIES - 1]);
-
- free_domheap_page(mfn_to_page(m2mfn));
- unmap_domain_page_global(v->arch.monitor_vtable);
- free_domheap_page(mfn_to_page(m3mfn));
-
- v->arch.monitor_table = pagetable_null();
- v->arch.monitor_vtable = 0;
-}
-#endif
-
-static void
-shadow_free_snapshot(struct domain *d, struct out_of_sync_entry *entry)
-{
- void *snapshot;
-
- if ( entry->snapshot_mfn == SHADOW_SNAPSHOT_ELSEWHERE )
- return;
-
- // Clear the out_of_sync bit.
- //
- clear_bit(_PGC_out_of_sync, &mfn_to_page(entry->gmfn)->count_info);
-
- // XXX Need to think about how to protect the domain's
- // information less expensively.
- //
- snapshot = map_domain_page(entry->snapshot_mfn);
- memset(snapshot, 0, PAGE_SIZE);
- unmap_domain_page(snapshot);
-
- put_shadow_ref(entry->snapshot_mfn);
-}
-
-void
-release_out_of_sync_entry(struct domain *d, struct out_of_sync_entry *entry)
-{
- struct page_info *page;
-
- page = mfn_to_page(entry->gmfn);
-
- // Decrement ref count of guest & shadow pages
- //
- put_page(page);
-
- // Only use entries that have low bits clear...
- //
- if ( !(entry->writable_pl1e & (sizeof(l1_pgentry_t)-1)) )
- {
- put_shadow_ref(entry->writable_pl1e >> PAGE_SHIFT);
- entry->writable_pl1e = -2;
- }
- else
- ASSERT( entry->writable_pl1e == -1 );
-
- // Free the snapshot
- //
- shadow_free_snapshot(d, entry);
-}
-
-static void remove_out_of_sync_entries(struct domain *d, unsigned long gmfn)
-{
- struct out_of_sync_entry *entry = d->arch.out_of_sync;
- struct out_of_sync_entry **prev = &d->arch.out_of_sync;
- struct out_of_sync_entry *found = NULL;
-
- // NB: Be careful not to call something that manipulates this list
- // while walking it. Collect the results into a separate list
- // first, then walk that list.
- //
- while ( entry )
- {
- if ( entry->gmfn == gmfn )
- {
- // remove from out of sync list
- *prev = entry->next;
-
- // add to found list
- entry->next = found;
- found = entry;
-
- entry = *prev;
- continue;
- }
- prev = &entry->next;
- entry = entry->next;
- }
-
- prev = NULL;
- entry = found;
- while ( entry )
- {
- release_out_of_sync_entry(d, entry);
-
- prev = &entry->next;
- entry = entry->next;
- }
-
- // Add found list to free list
- if ( prev )
- {
- *prev = d->arch.out_of_sync_free;
- d->arch.out_of_sync_free = found;
- }
-}
-
-static inline void
-shadow_demote(struct domain *d, unsigned long gpfn, unsigned long gmfn)
-{
- if ( !shadow_mode_refcounts(d) )
- return;
-
- ASSERT(mfn_to_page(gmfn)->count_info & PGC_page_table);
-
- if ( shadow_max_pgtable_type(d, gpfn, NULL) == PGT_none )
- {
- clear_bit(_PGC_page_table, &mfn_to_page(gmfn)->count_info);
-
- if ( page_out_of_sync(mfn_to_page(gmfn)) )
- {
- remove_out_of_sync_entries(d, gmfn);
- }
- }
-}
-
-static void inline
-free_shadow_l1_table(struct domain *d, unsigned long smfn)
-{
- l1_pgentry_t *pl1e = map_domain_page(smfn);
- l1_pgentry_t *pl1e_next = 0, *sl1e_p;
- int i;
- struct page_info *spage = mfn_to_page(smfn);
- u32 min_max = spage->tlbflush_timestamp;
- int min = SHADOW_MIN(min_max);
- int max;
-
- if ( d->arch.ops->guest_paging_levels == PAGING_L2 )
- {
- max = SHADOW_MAX_GUEST32(min_max);
- pl1e_next = map_domain_page(smfn + 1);
- }
- else
- max = SHADOW_MAX(min_max);
-
- for ( i = min; i <= max; i++ )
- {
- if ( pl1e_next && i >= L1_PAGETABLE_ENTRIES )
- sl1e_p = &pl1e_next[i - L1_PAGETABLE_ENTRIES];
- else
- sl1e_p = &pl1e[i];
-
- shadow_put_page_from_l1e(*sl1e_p, d);
- *sl1e_p = l1e_empty();
- }
-
- unmap_domain_page(pl1e);
- if ( pl1e_next )
- unmap_domain_page(pl1e_next);
-}
-
-static void inline
-free_shadow_hl2_table(struct domain *d, unsigned long smfn)
-{
- l1_pgentry_t *hl2 = map_domain_page(smfn);
- int i, limit;
-
- SH_VVLOG("%s: smfn=%lx freed", __func__, smfn);
-
-#if CONFIG_PAGING_LEVELS == 2
- if ( shadow_mode_external(d) )
- limit = L2_PAGETABLE_ENTRIES;
- else
- limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
-#endif
-
- for ( i = 0; i < limit; i++ )
- {
- if ( l1e_get_flags(hl2[i]) & _PAGE_PRESENT )
- put_page(mfn_to_page(l1e_get_pfn(hl2[i])));
- }
-
- unmap_domain_page(hl2);
-}
-
-static void inline
-free_shadow_l2_table(struct domain *d, unsigned long smfn, unsigned int type)
-{
- l2_pgentry_t *pl2e = map_domain_page(smfn);
- int i, external = shadow_mode_external(d);
-
- for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
- if ( external || is_guest_l2_slot(type, i) )
- if ( l2e_get_flags(pl2e[i]) & _PAGE_PRESENT )
- put_shadow_ref(l2e_get_pfn(pl2e[i]));
-
- if ( (PGT_base_page_table == PGT_l2_page_table) &&
- shadow_mode_translate(d) && !external )
- {
- // free the ref to the hl2
- //
- put_shadow_ref(l2e_get_pfn(pl2e[l2_table_offset(LINEAR_PT_VIRT_START)]));
- }
-
- unmap_domain_page(pl2e);
-}
-
-void free_fake_shadow_l2(struct domain *d, unsigned long smfn)
-{
- pgentry_64_t *ple = map_domain_page(smfn);
- int i;
-
- for ( i = 0; i < PAGETABLE_ENTRIES; i = i + 2 )
- if ( entry_get_flags(ple[i]) & _PAGE_PRESENT )
- put_shadow_ref(entry_get_pfn(ple[i]));
-
- unmap_domain_page(ple);
-}
-
-void free_shadow_page(unsigned long smfn)
-{
- struct page_info *page = mfn_to_page(smfn);
- unsigned long gmfn = page->u.inuse.type_info & PGT_mfn_mask;
- struct domain *d = page_get_owner(mfn_to_page(gmfn));
- unsigned long gpfn = mfn_to_gmfn(d, gmfn);
- unsigned long type = page->u.inuse.type_info & PGT_type_mask;
- u64 index = 0;
-
- SH_VVLOG("%s: free'ing smfn=%lx", __func__, smfn);
-
- ASSERT( ! IS_INVALID_M2P_ENTRY(gpfn) );
-#if CONFIG_PAGING_LEVELS >= 4
- if ( type == PGT_fl1_shadow )
- {
- unsigned long mfn;
- mfn = __shadow_status(d, gpfn, PGT_fl1_shadow);
- if ( !mfn )
- gpfn |= PGT_high_mfn_nx;
- }
-#endif
-#if CONFIG_PAGING_LEVELS >= 3
- if ( d->arch.ops->guest_paging_levels == PAGING_L3 )
- {
- if ( type == PGT_l4_shadow )
- index = page->tlbflush_timestamp;
- }
-#endif
-
- delete_shadow_status(d, gpfn, gmfn, type, index);
-
- switch ( type )
- {
- case PGT_l1_shadow:
- perfc_decr(shadow_l1_pages);
- shadow_demote(d, gpfn, gmfn);
- free_shadow_l1_table(d, smfn);
- d->arch.shadow_page_count--;
- break;
-#if CONFIG_PAGING_LEVELS == 2
- case PGT_l2_shadow:
- perfc_decr(shadow_l2_pages);
- shadow_demote(d, gpfn, gmfn);
- free_shadow_l2_table(d, smfn, page->u.inuse.type_info);
- d->arch.shadow_page_count--;
- break;
-
- case PGT_hl2_shadow:
- perfc_decr(hl2_table_pages);
- shadow_demote(d, gpfn, gmfn);
- free_shadow_hl2_table(d, smfn);
- d->arch.hl2_page_count--;
- break;
-#endif
-#if CONFIG_PAGING_LEVELS >= 3
- case PGT_l2_shadow:
- case PGT_l3_shadow:
- shadow_demote(d, gpfn, gmfn);
- free_shadow_tables(d, smfn, shadow_type_to_level(type));
- d->arch.shadow_page_count--;
- break;
-
- case PGT_l4_shadow:
- gpfn = gpfn & PGT_mfn_mask;
- if ( d->arch.ops->guest_paging_levels == PAGING_L3 )
- {
- /*
- * Since a single PDPT page can have multiple PDPs, it's possible
- * that shadow_demote() has been already called for gmfn.
- */
- if ( mfn_is_page_table(gmfn) )
- shadow_demote(d, gpfn, gmfn);
- } else
- shadow_demote(d, gpfn, gmfn);
-
- free_shadow_tables(d, smfn, shadow_type_to_level(type));
- d->arch.shadow_page_count--;
- break;
-
- case PGT_fl1_shadow:
- free_shadow_fl1_table(d, smfn);
- d->arch.shadow_page_count--;
- break;
-#endif
- case PGT_snapshot:
- perfc_decr(snapshot_pages);
- break;
-
- default:
- printk("Free shadow weird page type mfn=%lx type=%" PRtype_info "\n",
- page_to_mfn(page), page->u.inuse.type_info);
- break;
- }
-
- // No TLB flushes are needed the next time this page gets allocated.
- //
- page->tlbflush_timestamp = 0;
- page->u.free.cpumask = CPU_MASK_NONE;
-
- if ( type == PGT_l1_shadow )
- {
- list_add(&page->list, &d->arch.free_shadow_frames);
- perfc_incr(free_l1_pages);
- }
- else
- free_domheap_page(page);
-}
-
-static void
-free_writable_pte_predictions(struct domain *d)
-{
- int i;
- struct shadow_status *x;
-
- for ( i = 0; i < shadow_ht_buckets; i++ )
- {
- u32 count;
- unsigned long *gpfn_list;
-
- /* Skip empty buckets. */
- if ( d->arch.shadow_ht[i].gpfn_and_flags == 0 )
- continue;
-
- count = 0;
- for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
- if ( (x->gpfn_and_flags & PGT_type_mask) == PGT_writable_pred )
- count++;
-
- gpfn_list = xmalloc_array(unsigned long, count);
- count = 0;
- for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
- if ( (x->gpfn_and_flags & PGT_type_mask) == PGT_writable_pred )
- gpfn_list[count++] = x->gpfn_and_flags & PGT_mfn_mask;
-
- while ( count )
- {
- count--;
- delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred, 0);
- }
-
- xfree(gpfn_list);
- }
-}
-
-static void free_shadow_ht_entries(struct domain *d)
-{
- struct shadow_status *x, *n;
-
- SH_VLOG("freed tables count=%d l1=%d l2=%d",
- d->arch.shadow_page_count, perfc_value(shadow_l1_pages),
- perfc_value(shadow_l2_pages));
-
- n = d->arch.shadow_ht_extras;
- while ( (x = n) != NULL )
- {
- d->arch.shadow_extras_count--;
- n = *((struct shadow_status **)(&x[shadow_ht_extra_size]));
- xfree(x);
- }
-
- d->arch.shadow_ht_extras = NULL;
- d->arch.shadow_ht_free = NULL;
-
- ASSERT(d->arch.shadow_extras_count == 0);
- SH_LOG("freed extras, now %d", d->arch.shadow_extras_count);
-
- if ( d->arch.shadow_dirty_bitmap != NULL )
- {
- xfree(d->arch.shadow_dirty_bitmap);
- d->arch.shadow_dirty_bitmap = 0;
- d->arch.shadow_dirty_bitmap_size = 0;
- }
-
- xfree(d->arch.shadow_ht);
- d->arch.shadow_ht = NULL;
-}
-
-static void free_out_of_sync_entries(struct domain *d)
-{
- struct out_of_sync_entry *x, *n;
-
- n = d->arch.out_of_sync_extras;
- while ( (x = n) != NULL )
- {
- d->arch.out_of_sync_extras_count--;
- n = *((struct out_of_sync_entry **)(&x[out_of_sync_extra_size]));
- xfree(x);
- }
-
- d->arch.out_of_sync_extras = NULL;
- d->arch.out_of_sync_free = NULL;
- d->arch.out_of_sync = NULL;
-
- ASSERT(d->arch.out_of_sync_extras_count == 0);
- FSH_LOG("freed extra out_of_sync entries, now %d",
- d->arch.out_of_sync_extras_count);
-}
-
-void free_shadow_pages(struct domain *d)
-{
- int i;
- struct shadow_status *x;
- struct vcpu *v;
- struct list_head *list_ent, *tmp;
-
- /*
- * WARNING! The shadow page table must not currently be in use!
- * e.g., You are expected to have paused the domain and synchronized CR3.
- */
-
- if( !d->arch.shadow_ht ) return;
-
- shadow_audit(d, 1);
-
- // first, remove any outstanding refs from out_of_sync entries...
- //
- free_out_of_sync_state(d);
-
- // second, remove any outstanding refs from v->arch.shadow_table
- // and CR3.
- //
- for_each_vcpu(d, v)
- {
- if ( pagetable_get_paddr(v->arch.shadow_table) )
- {
- put_shadow_ref(pagetable_get_pfn(v->arch.shadow_table));
- v->arch.shadow_table = pagetable_null();
-
- if ( shadow_mode_external(d) )
- {
- if ( v->arch.shadow_vtable )
- unmap_domain_page_global(v->arch.shadow_vtable);
- v->arch.shadow_vtable = NULL;
- }
- }
-
- if ( v->arch.monitor_shadow_ref )
- {
- put_shadow_ref(v->arch.monitor_shadow_ref);
- v->arch.monitor_shadow_ref = 0;
- }
- }
-
-#if CONFIG_PAGING_LEVELS == 2
- // For external shadows, remove the monitor table's refs
- //
- if ( shadow_mode_external(d) )
- {
- for_each_vcpu(d, v)
- {
- l2_pgentry_t *mpl2e = v->arch.monitor_vtable;
-
- if ( mpl2e )
- {
- l2_pgentry_t hl2e = mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)];
- l2_pgentry_t smfn = mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)];
-
- if ( l2e_get_flags(hl2e) & _PAGE_PRESENT )
- {
- put_shadow_ref(l2e_get_pfn(hl2e));
- mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)] = l2e_empty();
- }
- if ( l2e_get_flags(smfn) & _PAGE_PRESENT )
- {
- put_shadow_ref(l2e_get_pfn(smfn));
- mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = l2e_empty();
- }
- }
- }
- }
-#endif
- // Now, the only refs to shadow pages that are left are from the shadow
- // pages themselves. We just unpin the pinned pages, and the rest
- // should automatically disappear.
- //
- // NB: Beware: each explicitly or implicit call to free_shadow_page
- // can/will result in the hash bucket getting rewritten out from
- // under us... First, collect the list of pinned pages, then
- // free them.
- //
- for ( i = 0; i < shadow_ht_buckets; i++ )
- {
- u32 count;
- unsigned long *mfn_list;
-
- /* Skip empty buckets. */
- if ( d->arch.shadow_ht[i].gpfn_and_flags == 0 )
- continue;
-
- count = 0;
- for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
- if ( MFN_PINNED(x->smfn) )
- count++;
- if ( !count )
- continue;
-
- mfn_list = xmalloc_array(unsigned long, count);
- count = 0;
- for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
- if ( MFN_PINNED(x->smfn) )
- mfn_list[count++] = x->smfn;
-
- while ( count )
- {
- shadow_unpin(mfn_list[--count]);
- }
- xfree(mfn_list);
- }
-
- /* Now free the pre-zero'ed pages from the domain. */
- list_for_each_safe(list_ent, tmp, &d->arch.free_shadow_frames)
- {
- struct page_info *page = list_entry(list_ent, struct page_info, list);
-
- list_del(list_ent);
- perfc_decr(free_l1_pages);
-
- if (d->arch.ops->guest_paging_levels == PAGING_L2)
- {
-#if CONFIG_PAGING_LEVELS >=3
- free_domheap_pages(page, SL1_ORDER);
-#else
- free_domheap_page(page);
-#endif
- }
- else
- free_domheap_page(page);
- }
-
- shadow_audit(d, 0);
-
- SH_LOG("Free shadow table.");
-}
-
-void __shadow_mode_disable(struct domain *d)
-{
- struct vcpu *v;
-#ifndef NDEBUG
- int i;
-#endif
-
- if ( unlikely(!shadow_mode_enabled(d)) )
- return;
-
- free_shadow_pages(d);
- free_writable_pte_predictions(d);
-
-#ifndef NDEBUG
- for ( i = 0; i < shadow_ht_buckets; i++ )
- {
- if ( d->arch.shadow_ht[i].gpfn_and_flags != 0 )
- {
- printk("%s: d->arch.shadow_ht[%x].gpfn_and_flags=%"PRIx64"\n",
- __FILE__, i, (u64)d->arch.shadow_ht[i].gpfn_and_flags);
- BUG();
- }
- }
-#endif
-
- d->arch.shadow_mode = 0;
-
- free_shadow_ht_entries(d);
- free_out_of_sync_entries(d);
-
- for_each_vcpu(d, v)
- update_pagetables(v);
-}
-
-
-int __shadow_mode_enable(struct domain *d, unsigned int mode)
-{
- struct vcpu *v;
- int new_modes = (mode & ~d->arch.shadow_mode);
-#if defined(CONFIG_PAGING_LEVELS)
- int initial_paging_levels = 3;
-#endif
-
- // Gotta be adding something to call this function.
- ASSERT(new_modes);
-
- // can't take anything away by calling this function.
- ASSERT(!(d->arch.shadow_mode & ~mode));
-
-#if defined(CONFIG_PAGING_LEVELS)
- if ( CONFIG_PAGING_LEVELS == 2 )
- initial_paging_levels = CONFIG_PAGING_LEVELS;
- if ( !shadow_set_guest_paging_levels(d,
- initial_paging_levels) ) {
- printk("Unsupported guest paging levels\n");
- domain_crash_synchronous(); /* need to take a clean path */
- }
-#endif
-
- for_each_vcpu(d, v)
- {
- invalidate_shadow_ldt(v);
-
- // We need to set these up for __update_pagetables().
- // See the comment there.
-
- /*
- * arch.guest_vtable
- */
- if ( v->arch.guest_vtable &&
- (v->arch.guest_vtable != __linear_l2_table) )
- {
- unmap_domain_page_global(v->arch.guest_vtable);
- }
- if ( (mode & (SHM_translate | SHM_external)) == SHM_translate )
- v->arch.guest_vtable = __linear_l2_table;
- else
- v->arch.guest_vtable = NULL;
-
- /*
- * arch.shadow_vtable
- */
- if ( v->arch.shadow_vtable &&
- (v->arch.shadow_vtable != __shadow_linear_l2_table) )
- {
- unmap_domain_page_global(v->arch.shadow_vtable);
- }
- if ( !(mode & SHM_external) && d->arch.ops->guest_paging_levels == 2)
- v->arch.shadow_vtable = __shadow_linear_l2_table;
- else
- v->arch.shadow_vtable = NULL;
-
-#if CONFIG_PAGING_LEVELS == 2
- /*
- * arch.hl2_vtable
- */
- if ( v->arch.hl2_vtable &&
- (v->arch.hl2_vtable != __linear_hl2_table) )
- {
- unmap_domain_page_global(v->arch.hl2_vtable);
- }
- if ( (mode & (SHM_translate | SHM_external)) == SHM_translate )
- v->arch.hl2_vtable = __linear_hl2_table;
- else
- v->arch.hl2_vtable = NULL;
-#endif
- /*
- * arch.monitor_table & arch.monitor_vtable
- */
- if ( v->arch.monitor_vtable )
- {
- free_monitor_pagetable(v);
- }
- if ( mode & SHM_external )
- {
- alloc_monitor_pagetable(v);
- }
- }
-
- if ( new_modes & SHM_enable )
- {
- ASSERT( !d->arch.shadow_ht );
- d->arch.shadow_ht = xmalloc_array(struct shadow_status, shadow_ht_buckets);
- if ( d->arch.shadow_ht == NULL )
- goto nomem;
-
- memset(d->arch.shadow_ht, 0,
- shadow_ht_buckets * sizeof(struct shadow_status));
- }
-
- if ( new_modes & SHM_log_dirty )
- {
- ASSERT( !d->arch.shadow_dirty_bitmap );
- d->arch.shadow_dirty_bitmap_size =
- (d->shared_info->arch.max_pfn + 63) & ~63;
- d->arch.shadow_dirty_bitmap =
- xmalloc_array(unsigned long, d->arch.shadow_dirty_bitmap_size /
- (8 * sizeof(unsigned long)));
- if ( d->arch.shadow_dirty_bitmap == NULL )
- {
- d->arch.shadow_dirty_bitmap_size = 0;
- goto nomem;
- }
- memset(d->arch.shadow_dirty_bitmap, 0,
- d->arch.shadow_dirty_bitmap_size/8);
- }
-
- if ( new_modes & SHM_translate )
- {
- if ( !(new_modes & SHM_external) )
- {
- ASSERT( !pagetable_get_paddr(d->arch.phys_table) );
- if ( !alloc_p2m_table(d) )
- {
- printk("alloc_p2m_table failed (out-of-memory?)\n");
- goto nomem;
- }
- }
- }
-
- // Get rid of any shadow pages from any previous shadow mode.
- //
- free_shadow_pages(d);
-
- d->arch.shadow_mode = mode;
-
- if ( shadow_mode_refcounts(d) )
- {
- struct list_head *list_ent;
- struct page_info *page;
-
- /*
- * Tear down its counts by disassembling its page-table-based refcounts
- * Also remove CR3's gcount/tcount.
- * That leaves things like GDTs and LDTs and external refs in tact.
- *
- * Most pages will be writable tcount=0.
- * Some will still be L1 tcount=0 or L2 tcount=0.
- * Maybe some pages will be type none tcount=0.
- * Pages granted external writable refs (via grant tables?) will
- * still have a non-zero tcount. That's OK.
- *
- * gcounts will generally be 1 for PGC_allocated.
- * GDTs and LDTs will have additional gcounts.
- * Any grant-table based refs will still be in the gcount.
- *
- * We attempt to grab writable refs to each page thus setting its type
- * Immediately put back those type refs.
- *
- * Assert that no pages are left with L1/L2/L3/L4 type.
- */
- audit_adjust_pgtables(d, -1, 1);
-
-
- for (list_ent = d->page_list.next; list_ent != &d->page_list;
- list_ent = page->list.next) {
-
- page = list_entry(list_ent, struct page_info, list);
- if ( !get_page_type(page, PGT_writable_page) )
- BUG();
- put_page_type(page);
- /*
- * We use tlbflush_timestamp as back pointer to smfn, and need to
- * clean up it.
- */
- if (shadow_mode_external(d))
- page->tlbflush_timestamp = 0;
- }
-
- audit_adjust_pgtables(d, 1, 1);
-
- }
-
- return 0;
-
- nomem:
- if ( (new_modes & SHM_enable) )
- {
- xfree(d->arch.shadow_ht);
- d->arch.shadow_ht = NULL;
- }
- if ( (new_modes & SHM_log_dirty) )
- {
- xfree(d->arch.shadow_dirty_bitmap);
- d->arch.shadow_dirty_bitmap = NULL;
- }
-
- return -ENOMEM;
-}
-
-
-int shadow_mode_enable(struct domain *d, unsigned int mode)
-{
- int rc;
- shadow_lock(d);
- rc = __shadow_mode_enable(d, mode);
- shadow_unlock(d);
- return rc;
-}
-
-static int shadow_mode_table_op(
- struct domain *d, dom0_shadow_control_t *sc)
-{
- unsigned int op = sc->op;
- int i, rc = 0;
- struct vcpu *v;
-
- ASSERT(shadow_lock_is_acquired(d));
-
- SH_VLOG("shadow mode table op %lx %lx count %d",
- (unsigned long)pagetable_get_pfn(d->vcpu[0]->arch.guest_table), /* XXX SMP */
- (unsigned long)pagetable_get_pfn(d->vcpu[0]->arch.shadow_table), /* XXX SMP */
- d->arch.shadow_page_count);
-
- shadow_audit(d, 1);
-
- switch ( op )
- {
- case DOM0_SHADOW_CONTROL_OP_FLUSH:
- free_shadow_pages(d);
-
- d->arch.shadow_fault_count = 0;
- d->arch.shadow_dirty_count = 0;
-
- break;
-
- case DOM0_SHADOW_CONTROL_OP_CLEAN:
- free_shadow_pages(d);
-
- sc->stats.fault_count = d->arch.shadow_fault_count;
- sc->stats.dirty_count = d->arch.shadow_dirty_count;
-
- d->arch.shadow_fault_count = 0;
- d->arch.shadow_dirty_count = 0;
-
- if ( guest_handle_is_null(sc->dirty_bitmap) ||
- (d->arch.shadow_dirty_bitmap == NULL) )
- {
- rc = -EINVAL;
- break;
- }
-
- if ( sc->pages > d->arch.shadow_dirty_bitmap_size )
- sc->pages = d->arch.shadow_dirty_bitmap_size;
-
-#define chunk (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */
- for ( i = 0; i < sc->pages; i += chunk )
- {
- int bytes = ((((sc->pages - i) > chunk) ?
- chunk : (sc->pages - i)) + 7) / 8;
-
- if ( copy_to_guest_offset(
- sc->dirty_bitmap, i/(8*sizeof(unsigned long)),
- d->arch.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
- (bytes+sizeof(unsigned long)-1) / sizeof(unsigned long)) )
- {
- rc = -EINVAL;
- break;
- }
- memset(
- d->arch.shadow_dirty_bitmap + (i/(8*sizeof(unsigned long))),
- 0, bytes);
- }
-
- break;
-
- case DOM0_SHADOW_CONTROL_OP_PEEK:
- sc->stats.fault_count = d->arch.shadow_fault_count;
- sc->stats.dirty_count = d->arch.shadow_dirty_count;
-
- if ( guest_handle_is_null(sc->dirty_bitmap) ||
- (d->arch.shadow_dirty_bitmap == NULL) )
- {
- rc = -EINVAL;
- break;
- }
-
- if ( sc->pages > d->arch.shadow_dirty_bitmap_size )
- sc->pages = d->arch.shadow_dirty_bitmap_size;
-
- if ( copy_to_guest(sc->dirty_bitmap,
- d->arch.shadow_dirty_bitmap,
- (((sc->pages+7)/8)+sizeof(unsigned long)-1) /
- sizeof(unsigned long)) )
- {
- rc = -EINVAL;
- break;
- }
-
- break;
-
- default:
- rc = -EINVAL;
- break;
- }
-
- SH_VLOG("shadow mode table op : page count %d", d->arch.shadow_page_count);
- shadow_audit(d, 1);
-
- for_each_vcpu(d,v)
- __update_pagetables(v);
-
- return rc;
-}
-
-int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc)
-{
- unsigned int op = sc->op;
- int rc = 0;
- struct vcpu *v;
-
- if ( unlikely(d == current->domain) )
- {
- DPRINTK("Don't try to do a shadow op on yourself!\n");
- return -EINVAL;
- }
-
- domain_pause(d);
-
- shadow_lock(d);
-
- switch ( op )
- {
- case DOM0_SHADOW_CONTROL_OP_OFF:
- if ( shadow_mode_enabled(d) )
- {
- __shadow_sync_all(d);
- __shadow_mode_disable(d);
- }
- break;
-
- case DOM0_SHADOW_CONTROL_OP_ENABLE_TEST:
- free_shadow_pages(d);
- rc = __shadow_mode_enable(d, SHM_enable);
- break;
-
- case DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY:
- free_shadow_pages(d);
- rc = __shadow_mode_enable(
- d, d->arch.shadow_mode|SHM_enable|SHM_log_dirty);
- break;
-
- case DOM0_SHADOW_CONTROL_OP_ENABLE_TRANSLATE:
- free_shadow_pages(d);
- rc = __shadow_mode_enable(
- d, d->arch.shadow_mode|SHM_enable|SHM_refcounts|SHM_translate);
- break;
-
- default:
- rc = shadow_mode_enabled(d) ? shadow_mode_table_op(d, sc) : -EINVAL;
- break;
- }
-
- shadow_unlock(d);
-
- for_each_vcpu(d,v)
- update_pagetables(v);
-
- domain_unpause(d);
-
- return rc;
-}
-
-void shadow_mode_init(void)
-{
-}
-
-int _shadow_mode_refcounts(struct domain *d)
-{
- return shadow_mode_refcounts(d);
-}
-
-static int
-map_p2m_entry(pgentry_64_t *top_tab, unsigned long gpfn, unsigned long mfn)
-{
-#if CONFIG_PAGING_LEVELS >= 4
- pgentry_64_t l4e = { 0 };
- pgentry_64_t *l3tab = NULL;
-#endif
-#if CONFIG_PAGING_LEVELS >= 3
- pgentry_64_t l3e = { 0 };
-#endif
- l2_pgentry_t *l2tab = NULL;
- l1_pgentry_t *l1tab = NULL;
- unsigned long *l0tab = NULL;
- l2_pgentry_t l2e = { 0 };
- l1_pgentry_t l1e = { 0 };
- struct page_info *page;
- unsigned long va = RO_MPT_VIRT_START + (gpfn * sizeof(mfn));
-
-#if CONFIG_PAGING_LEVELS >= 4
- l4e = top_tab[l4_table_offset(va)];
- if ( !(entry_get_flags(l4e) & _PAGE_PRESENT) )
- {
- page = alloc_domheap_page(NULL);
- if ( !page )
- goto nomem;
-
- l3tab = map_domain_page(page_to_mfn(page));
- memset(l3tab, 0, PAGE_SIZE);
- l4e = top_tab[l4_table_offset(va)] =
- entry_from_page(page, __PAGE_HYPERVISOR);
- }
- else
- l3tab = map_domain_page(entry_get_pfn(l4e));
-
- l3e = l3tab[l3_table_offset(va)];
- if ( !(entry_get_flags(l3e) & _PAGE_PRESENT) )
- {
- page = alloc_domheap_page(NULL);
- if ( !page )
- goto nomem;
-
- l2tab = map_domain_page(page_to_mfn(page));
- memset(l2tab, 0, PAGE_SIZE);
- l3e = l3tab[l3_table_offset(va)] =
- entry_from_page(page, __PAGE_HYPERVISOR);
- }
- else
- l2tab = map_domain_page(entry_get_pfn(l3e));
-
- unmap_domain_page(l3tab);
-#else
- l3e = top_tab[l3_table_offset(va)];
-
- /*
- * NB: when CONFIG_PAGING_LEVELS == 3,
- * (entry_get_flags(l3e) & _PAGE_PRESENT) is always true here.
- * alloc_monitor_pagetable should guarantee this.
- */
- if ( !(entry_get_flags(l3e) & _PAGE_PRESENT) )
- BUG();
-
- l2tab = map_domain_page(entry_get_pfn(l3e));
-#endif
-
- l2e = l2tab[l2_table_offset(va)];
- if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
- {
- page = alloc_domheap_page(NULL);
- if ( !page )
- goto nomem;
-
- l1tab = map_domain_page(page_to_mfn(page));
- memset(l1tab, 0, PAGE_SIZE);
- l2e = l2tab[l2_table_offset(va)] =
- l2e_from_page(page, __PAGE_HYPERVISOR);
- }
- else
- l1tab = map_domain_page(l2e_get_pfn(l2e));
-
- unmap_domain_page(l2tab);
-
- l1e = l1tab[l1_table_offset(va)];
- if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
- {
- page = alloc_domheap_page(NULL);
- if ( !page )
- goto nomem;
-
- l0tab = map_domain_page(page_to_mfn(page));
- memset(l0tab, 0, PAGE_SIZE);
- l1e = l1tab[l1_table_offset(va)] =
- l1e_from_page(page, __PAGE_HYPERVISOR);
- }
- else
- l0tab = map_domain_page(l1e_get_pfn(l1e));
-
- unmap_domain_page(l1tab);
-
- l0tab[gpfn & ((PAGE_SIZE / sizeof(mfn)) - 1)] = mfn;
-
- unmap_domain_page(l0tab);
-
- return 1;
-
-nomem:
- return 0;
-}
-
-int
-set_p2m_entry(struct domain *d, unsigned long gpfn, unsigned long mfn,
- struct domain_mmap_cache *l2cache,
- struct domain_mmap_cache *l1cache)
-{
- unsigned long tabmfn = pagetable_get_pfn(d->vcpu[0]->arch.monitor_table);
- pgentry_64_t *top_tab;
- int error;
-
- ASSERT(tabmfn != 0);
- ASSERT(shadow_lock_is_acquired(d));
-
- top_tab = map_domain_page_with_cache(tabmfn, l2cache);
-
- if ( !(error = map_p2m_entry(top_tab, gpfn, mfn)) )
- domain_crash(d);
-
- unmap_domain_page_with_cache(top_tab, l2cache);
-
- return error;
-}
-
-static int
-alloc_p2m_table(struct domain *d)
-{
- struct list_head *list_ent;
- pgentry_64_t *top_tab = NULL;
- unsigned long gpfn, mfn;
- int error = 0;
-
- ASSERT( pagetable_get_pfn(d->vcpu[0]->arch.monitor_table) );
-
- top_tab = map_domain_page(
- pagetable_get_pfn(d->vcpu[0]->arch.monitor_table));
-
- list_ent = d->page_list.next;
-
- while ( list_ent != &d->page_list )
- {
- struct page_info *page;
-
- page = list_entry(list_ent, struct page_info, list);
- mfn = page_to_mfn(page);
-
- gpfn = get_gpfn_from_mfn(mfn);
-
- if ( !(error = map_p2m_entry(top_tab, gpfn, mfn)) )
- {
- domain_crash(d);
- break;
- }
-
- list_ent = page->list.next;
- }
-
- unmap_domain_page(top_tab);
-
- return error;
-}
-
-#if CONFIG_PAGING_LEVELS >= 3
-static void
-free_p2m_table(struct domain *d)
-{
- unsigned long va;
- l1_pgentry_t *l1tab;
- l1_pgentry_t l1e;
- l2_pgentry_t *l2tab;
- l2_pgentry_t l2e;
-#if CONFIG_PAGING_LEVELS >= 3
- l3_pgentry_t *l3tab;
- l3_pgentry_t l3e;
-#endif
-#if CONFIG_PAGING_LEVELS == 4
- int i3;
- l4_pgentry_t *l4tab;
- l4_pgentry_t l4e;
-#endif
-
- ASSERT( pagetable_get_pfn(d->vcpu[0]->arch.monitor_table) );
-
-#if CONFIG_PAGING_LEVELS == 4
- l4tab = map_domain_page(
- pagetable_get_pfn(d->vcpu[0]->arch.monitor_table));
-#endif
-#if CONFIG_PAGING_LEVELS == 3
- l3tab = map_domain_page(
- pagetable_get_pfn(d->vcpu[0]->arch.monitor_table));
-
- l3e = l3tab[l3_table_offset(RO_MPT_VIRT_START)];
-
- /*
- * NB: when CONFIG_PAGING_LEVELS == 3,
- * (entry_get_flags(l3e) & _PAGE_PRESENT) is always true here.
- * alloc_monitor_pagetable should guarantee this.
- */
- if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
- BUG();
-
- l2tab = map_domain_page(l3e_get_pfn(l3e));
-#endif
-
- for ( va = RO_MPT_VIRT_START; va < RO_MPT_VIRT_END; )
- {
-#if CONFIG_PAGING_LEVELS == 4
- l4e = l4tab[l4_table_offset(va)];
-
- if ( l4e_get_flags(l4e) & _PAGE_PRESENT )
- {
- l3tab = map_domain_page(l4e_get_pfn(l4e));
-
- for ( i3 = 0; i3 < L3_PAGETABLE_ENTRIES; i3++ )
- {
- l3e = l3tab[l3_table_offset(va)];
-
- if ( l3e_get_flags(l3e) & _PAGE_PRESENT )
- {
- int i2;
-
- l2tab = map_domain_page(l3e_get_pfn(l3e));
-
- for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
- {
-#endif
- l2e = l2tab[l2_table_offset(va)];
-
- if ( l2e_get_flags(l2e) & _PAGE_PRESENT )
- {
- int i1;
-
- l1tab = map_domain_page(l2e_get_pfn(l2e));
-
- /*
- * unsigned long phys_to_machine_mapping[]
- */
- for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++ )
- {
- l1e = l1tab[l1_table_offset(va)];
-
- if ( l1e_get_flags(l1e) & _PAGE_PRESENT )
- free_domheap_page(mfn_to_page(l1e_get_pfn(l1e)));
-
- va += PAGE_SIZE;
- }
- unmap_domain_page(l1tab);
- free_domheap_page(mfn_to_page(l2e_get_pfn(l2e)));
- }
- else
- va += PAGE_SIZE * L1_PAGETABLE_ENTRIES;
-
-#if CONFIG_PAGING_LEVELS == 4
- }
- unmap_domain_page(l2tab);
- free_domheap_page(mfn_to_page(l3e_get_pfn(l3e)));
- }
- else
- va += PAGE_SIZE * L1_PAGETABLE_ENTRIES * L2_PAGETABLE_ENTRIES;
- }
- unmap_domain_page(l3tab);
- free_domheap_page(mfn_to_page(l4e_get_pfn(l4e)));
- }
- else
- va += PAGE_SIZE *
- L1_PAGETABLE_ENTRIES * L2_PAGETABLE_ENTRIES * L3_PAGETABLE_ENTRIES;
-#endif
- }
-
-#if CONFIG_PAGING_LEVELS == 4
- unmap_domain_page(l4tab);
-#endif
-#if CONFIG_PAGING_LEVELS == 3
- unmap_domain_page(l3tab);
-#endif
-}
-#endif
-
-void shadow_l1_normal_pt_update(
- struct domain *d,
- paddr_t pa, l1_pgentry_t gpte,
- struct domain_mmap_cache *cache)
-{
- unsigned long sl1mfn;
- l1_pgentry_t *spl1e, spte;
-
- shadow_lock(d);
-
- sl1mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l1_shadow);
- if ( sl1mfn )
- {
- SH_VVLOG("shadow_l1_normal_pt_update pa=%p, gpde=%" PRIpte,
- (void *)pa, l1e_get_intpte(gpte));
- l1pte_propagate_from_guest(current->domain, gpte, &spte);
-
- spl1e = map_domain_page_with_cache(sl1mfn, cache);
- spl1e[(pa & ~PAGE_MASK) / sizeof(l1_pgentry_t)] = spte;
- unmap_domain_page_with_cache(spl1e, cache);
- }
-
- shadow_unlock(d);
-}
-
-void shadow_l2_normal_pt_update(
- struct domain *d,
- paddr_t pa, l2_pgentry_t gpde,
- struct domain_mmap_cache *cache)
-{
- unsigned long sl2mfn;
- l2_pgentry_t *spl2e;
-
- shadow_lock(d);
-
- sl2mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l2_shadow);
- if ( sl2mfn )
- {
- SH_VVLOG("shadow_l2_normal_pt_update pa=%p, gpde=%" PRIpte,
- (void *)pa, l2e_get_intpte(gpde));
- spl2e = map_domain_page_with_cache(sl2mfn, cache);
- validate_pde_change(d, gpde,
- &spl2e[(pa & ~PAGE_MASK) / sizeof(l2_pgentry_t)]);
- unmap_domain_page_with_cache(spl2e, cache);
- }
-
- shadow_unlock(d);
-}
-
-#if CONFIG_PAGING_LEVELS >= 3
-void shadow_l3_normal_pt_update(
- struct domain *d,
- paddr_t pa, l3_pgentry_t l3e,
- struct domain_mmap_cache *cache)
-{
- unsigned long sl3mfn;
- pgentry_64_t *spl3e;
-
- shadow_lock(d);
-
- sl3mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l3_shadow);
- if ( sl3mfn )
- {
- SH_VVLOG("shadow_l3_normal_pt_update pa=%p, l3e=%" PRIpte,
- (void *)pa, l3e_get_intpte(l3e));
- spl3e = (pgentry_64_t *) map_domain_page_with_cache(sl3mfn, cache);
- validate_entry_change(d, (pgentry_64_t *) &l3e,
- &spl3e[(pa & ~PAGE_MASK) / sizeof(l3_pgentry_t)],
- shadow_type_to_level(PGT_l3_shadow));
- unmap_domain_page_with_cache(spl3e, cache);
- }
-
- shadow_unlock(d);
-}
-#endif
-
-#if CONFIG_PAGING_LEVELS >= 4
-void shadow_l4_normal_pt_update(
- struct domain *d,
- paddr_t pa, l4_pgentry_t l4e,
- struct domain_mmap_cache *cache)
-{
- unsigned long sl4mfn;
- pgentry_64_t *spl4e;
-
- shadow_lock(d);
-
- sl4mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l4_shadow);
- if ( sl4mfn )
- {
- SH_VVLOG("shadow_l4_normal_pt_update pa=%p, l4e=%" PRIpte,
- (void *)pa, l4e_get_intpte(l4e));
- spl4e = (pgentry_64_t *)map_domain_page_with_cache(sl4mfn, cache);
- validate_entry_change(d, (pgentry_64_t *)&l4e,
- &spl4e[(pa & ~PAGE_MASK) / sizeof(l4_pgentry_t)],
- shadow_type_to_level(PGT_l4_shadow));
- unmap_domain_page_with_cache(spl4e, cache);
- }
-
- shadow_unlock(d);
-}
-#endif
-
-static void
-translate_l1pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l1mfn)
-{
- int i;
- l1_pgentry_t *l1;
-
- l1 = map_domain_page(l1mfn);
- for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
- {
- if ( is_guest_l1_slot(i) &&
- (l1e_get_flags(l1[i]) & _PAGE_PRESENT) )
- {
- unsigned long mfn = l1e_get_pfn(l1[i]);
- unsigned long gpfn = mfn_to_gmfn(d, mfn);
- ASSERT(l1e_get_pfn(p2m[gpfn]) == mfn);
- l1[i] = l1e_from_pfn(gpfn, l1e_get_flags(l1[i]));
- }
- }
- unmap_domain_page(l1);
-}
-
-// This is not general enough to handle arbitrary pagetables
-// with shared L1 pages, etc., but it is sufficient for bringing
-// up dom0.
-//
-void
-translate_l2pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l2mfn,
- unsigned int type)
-{
- int i;
- l2_pgentry_t *l2;
-
- ASSERT(shadow_mode_translate(d) && !shadow_mode_external(d));
-
- l2 = map_domain_page(l2mfn);
- for (i = 0; i < L2_PAGETABLE_ENTRIES; i++)
- {
- if ( is_guest_l2_slot(type, i) &&
- (l2e_get_flags(l2[i]) & _PAGE_PRESENT) )
- {
- unsigned long mfn = l2e_get_pfn(l2[i]);
- unsigned long gpfn = mfn_to_gmfn(d, mfn);
- ASSERT(l1e_get_pfn(p2m[gpfn]) == mfn);
- l2[i] = l2e_from_pfn(gpfn, l2e_get_flags(l2[i]));
- translate_l1pgtable(d, p2m, mfn);
- }
- }
- unmap_domain_page(l2);
-}
-
-void
-remove_shadow(struct domain *d, unsigned long gpfn, u32 stype)
-{
- unsigned long smfn;
-
- shadow_lock(d);
-
- while ( stype >= PGT_l1_shadow )
- {
- smfn = __shadow_status(d, gpfn, stype);
- if ( smfn && MFN_PINNED(smfn) )
- shadow_unpin(smfn);
- stype -= PGT_l1_shadow;
- }
-
- shadow_unlock(d);
-}
-
-unsigned long
-get_mfn_from_gpfn_foreign(struct domain *d, unsigned long gpfn)
-{
- unsigned long va, tabpfn;
- l1_pgentry_t *l1, l1e;
- l2_pgentry_t *l2, l2e;
-#if CONFIG_PAGING_LEVELS >= 4
- pgentry_64_t *l4 = NULL;
- pgentry_64_t l4e = { 0 };
-#endif
- pgentry_64_t *l3 = NULL;
- pgentry_64_t l3e = { 0 };
- unsigned long *l0tab = NULL;
- unsigned long mfn;
-
- ASSERT(shadow_mode_translate(d));
-
- perfc_incrc(get_mfn_from_gpfn_foreign);
-
- va = RO_MPT_VIRT_START + (gpfn * sizeof(mfn));
-
- tabpfn = pagetable_get_pfn(d->vcpu[0]->arch.monitor_table);
- if ( !tabpfn )
- return INVALID_MFN;
-
-#if CONFIG_PAGING_LEVELS >= 4
- l4 = map_domain_page(tabpfn);
- l4e = l4[l4_table_offset(va)];
- unmap_domain_page(l4);
- if ( !(entry_get_flags(l4e) & _PAGE_PRESENT) )
- return INVALID_MFN;
-
- l3 = map_domain_page(entry_get_pfn(l4e));
-#else
- l3 = map_domain_page(tabpfn);
-#endif
- l3e = l3[l3_table_offset(va)];
- unmap_domain_page(l3);
- if ( !(entry_get_flags(l3e) & _PAGE_PRESENT) )
- return INVALID_MFN;
- l2 = map_domain_page(entry_get_pfn(l3e));
- l2e = l2[l2_table_offset(va)];
- unmap_domain_page(l2);
- if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
- return INVALID_MFN;
-
- l1 = map_domain_page(l2e_get_pfn(l2e));
- l1e = l1[l1_table_offset(va)];
- unmap_domain_page(l1);
- if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
- return INVALID_MFN;
-
- l0tab = map_domain_page(l1e_get_pfn(l1e));
- mfn = l0tab[gpfn & ((PAGE_SIZE / sizeof (mfn)) - 1)];
- unmap_domain_page(l0tab);
- return mfn;
-}
-
-static u32 remove_all_access_in_page(
- struct domain *d, unsigned long l1mfn, unsigned long forbidden_gmfn)
-{
- l1_pgentry_t *pl1e = map_domain_page(l1mfn);
- l1_pgentry_t match, ol2e;
- unsigned long flags = _PAGE_PRESENT;
- int i;
- u32 count = 0;
- int is_l1_shadow =
- ((mfn_to_page(l1mfn)->u.inuse.type_info & PGT_type_mask) ==
- PGT_l1_shadow);
-
- match = l1e_from_pfn(forbidden_gmfn, flags);
-
- for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
- {
- if ( l1e_has_changed(pl1e[i], match, flags) )
- continue;
-
- ol2e = pl1e[i];
- pl1e[i] = l1e_empty();
- count++;
-
- if ( is_l1_shadow )
- shadow_put_page_from_l1e(ol2e, d);
- else /* must be an hl2 page */
- put_page(mfn_to_page(forbidden_gmfn));
- }
-
- unmap_domain_page(pl1e);
-
- return count;
-}
-
-static u32 __shadow_remove_all_access(struct domain *d, unsigned long forbidden_gmfn)
-{
- int i;
- struct shadow_status *a;
- u32 count = 0;
-
- if ( unlikely(!shadow_mode_enabled(d)) )
- return 0;
-
- ASSERT(shadow_lock_is_acquired(d));
- perfc_incrc(remove_all_access);
-
- for (i = 0; i < shadow_ht_buckets; i++)
- {
- a = &d->arch.shadow_ht[i];
- while ( a && a->gpfn_and_flags )
- {
- switch (a->gpfn_and_flags & PGT_type_mask)
- {
- case PGT_l1_shadow:
- case PGT_l2_shadow:
- case PGT_l3_shadow:
- case PGT_l4_shadow:
- case PGT_hl2_shadow:
- count += remove_all_access_in_page(d, a->smfn, forbidden_gmfn);
- break;
- case PGT_snapshot:
- case PGT_writable_pred:
- // these can't hold refs to the forbidden page
- break;
- default:
- BUG();
- }
-
- a = a->next;
- }
- }
-
- return count;
-}
-
-void shadow_drop_references(
- struct domain *d, struct page_info *page)
-{
- if ( likely(!shadow_mode_refcounts(d)) ||
- ((page->u.inuse.type_info & PGT_count_mask) == 0) )
- return;
-
- /* XXX This needs more thought... */
- printk("%s: needing to call __shadow_remove_all_access for mfn=%lx\n",
- __func__, page_to_mfn(page));
- printk("Before: mfn=%lx c=%08x t=%" PRtype_info "\n", page_to_mfn(page),
- page->count_info, page->u.inuse.type_info);
-
- shadow_lock(d);
- __shadow_remove_all_access(d, page_to_mfn(page));
- shadow_unlock(d);
-
- printk("After: mfn=%lx c=%08x t=%" PRtype_info "\n", page_to_mfn(page),
- page->count_info, page->u.inuse.type_info);
-}
-
-/* XXX Needs more thought. Neither pretty nor fast: a place holder. */
-void shadow_sync_and_drop_references(
- struct domain *d, struct page_info *page)
-{
- if ( likely(!shadow_mode_refcounts(d)) )
- return;
-
- shadow_lock(d);
-
- if ( page_out_of_sync(page) )
- __shadow_sync_mfn(d, page_to_mfn(page));
-
- __shadow_remove_all_access(d, page_to_mfn(page));
-
- shadow_unlock(d);
-}
-
-void clear_all_shadow_status(struct domain *d)
-{
- struct vcpu *v = current;
-
- /*
- * Don't clean up while other vcpus are working.
- */
- if ( v->vcpu_id )
- return;
-
- shadow_lock(d);
-
- free_shadow_pages(d);
- free_shadow_ht_entries(d);
- d->arch.shadow_ht =
- xmalloc_array(struct shadow_status, shadow_ht_buckets);
- if ( d->arch.shadow_ht == NULL ) {
- printk("clear all shadow status:xmalloc fail\n");
- domain_crash_synchronous();
- }
- memset(d->arch.shadow_ht, 0,
- shadow_ht_buckets * sizeof(struct shadow_status));
-
- free_out_of_sync_entries(d);
-
- shadow_unlock(d);
-}
-
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */