aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Deegan <Tim.Deegan@xensource.com>2006-12-20 12:03:07 +0000
committerTim Deegan <Tim.Deegan@xensource.com>2006-12-20 12:03:07 +0000
commit1128c254f3ddece0fc98b98f30c37c7d83b251f3 (patch)
tree04cc143e88859f7fe486f95df373dee2318348dd
parent6f7ff73d5777b3aec9b7fdadfe0869ca1df280aa (diff)
downloadxen-1128c254f3ddece0fc98b98f30c37c7d83b251f3.tar.gz
xen-1128c254f3ddece0fc98b98f30c37c7d83b251f3.tar.bz2
xen-1128c254f3ddece0fc98b98f30c37c7d83b251f3.zip
[XEN] Clean up the shadow interface
Remove a lot of unneccesary things from shadow.h, and move the shadow lock entirely inside the shadow code. Signed-off-by: Tim Deegan <Tim.Deegan@xensource.com>
-rw-r--r--xen/arch/x86/domain.c19
-rw-r--r--xen/arch/x86/domain_build.c2
-rw-r--r--xen/arch/x86/mm.c199
-rw-r--r--xen/arch/x86/mm/shadow/common.c282
-rw-r--r--xen/arch/x86/mm/shadow/multi.c53
-rw-r--r--xen/arch/x86/mm/shadow/multi.h4
-rw-r--r--xen/arch/x86/mm/shadow/private.h160
-rw-r--r--xen/arch/x86/mm/shadow/types.h4
-rw-r--r--xen/include/asm-x86/mm.h2
-rw-r--r--xen/include/asm-x86/shadow.h468
10 files changed, 577 insertions, 616 deletions
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index e5897454fa..afa2f60f08 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -172,10 +172,11 @@ int arch_domain_create(struct domain *d)
{
#ifdef __x86_64__
struct page_info *pg;
+ int i;
#endif
l1_pgentry_t gdt_l1e;
int vcpuid, pdpt_order;
- int i, rc = -ENOMEM;
+ int rc = -ENOMEM;
pdpt_order = get_order_from_bytes(PDPT_L1_ENTRIES * sizeof(l1_pgentry_t));
d->arch.mm_perdomain_pt = alloc_xenheap_pages(pdpt_order);
@@ -218,12 +219,7 @@ int arch_domain_create(struct domain *d)
#endif /* __x86_64__ */
- shadow_lock_init(d);
- for ( i = 0; i <= SHADOW_MAX_ORDER; i++ )
- INIT_LIST_HEAD(&d->arch.shadow.freelists[i]);
- INIT_LIST_HEAD(&d->arch.shadow.p2m_freelist);
- INIT_LIST_HEAD(&d->arch.shadow.p2m_inuse);
- INIT_LIST_HEAD(&d->arch.shadow.pinned_shadows);
+ shadow_domain_init(d);
if ( !is_idle_domain(d) )
{
@@ -366,15 +362,6 @@ int arch_set_info_guest(
v->arch.guest_table = pagetable_from_pfn(cr3_pfn);
}
- /* Shadow: make sure the domain has enough shadow memory to
- * boot another vcpu */
- if ( shadow_mode_enabled(d)
- && d->arch.shadow.total_pages < shadow_min_acceptable_pages(d) )
- {
- destroy_gdt(v);
- return -ENOMEM;
- }
-
if ( v->vcpu_id == 0 )
update_domain_wallclock_time(d);
diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c
index c092149def..236bc5be46 100644
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -827,7 +827,7 @@ int construct_dom0(struct domain *d,
regs->eflags = X86_EFLAGS_IF;
if ( opt_dom0_shadow )
- if ( shadow_test_enable(d) == 0 )
+ if ( shadow_enable(d, SHM2_enable) == 0 )
shadow_update_paging_modes(v);
if ( supervisor_mode_kernel )
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 9c932e9686..6ed7d84490 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -365,6 +365,38 @@ void write_ptbase(struct vcpu *v)
write_cr3(v->arch.cr3);
}
+/* Should be called after CR3 is updated.
+ * Updates vcpu->arch.cr3 and, for HVM guests, vcpu->arch.hvm_vcpu.cpu_cr3.
+ *
+ * Also updates other state derived from CR3 (vcpu->arch.guest_vtable,
+ * shadow_vtable, etc).
+ *
+ * Uses values found in vcpu->arch.(guest_table and guest_table_user), and
+ * for HVM guests, arch.monitor_table and hvm's guest CR3.
+ *
+ * Update ref counts to shadow tables appropriately.
+ */
+void update_cr3(struct vcpu *v)
+{
+ unsigned long cr3_mfn=0;
+
+ if ( shadow_mode_enabled(v->domain) )
+ {
+ shadow_update_cr3(v);
+ return;
+ }
+
+#if CONFIG_PAGING_LEVELS == 4
+ if ( !(v->arch.flags & TF_kernel_mode) )
+ cr3_mfn = pagetable_get_pfn(v->arch.guest_table_user);
+ else
+#endif
+ cr3_mfn = pagetable_get_pfn(v->arch.guest_table);
+
+ make_cr3(v, cr3_mfn);
+}
+
+
void invalidate_shadow_ldt(struct vcpu *v)
{
int i;
@@ -1160,53 +1192,57 @@ static void free_l4_table(struct page_info *page)
#endif
-static inline int update_l1e(l1_pgentry_t *pl1e,
- l1_pgentry_t ol1e,
- l1_pgentry_t nl1e,
- unsigned long gl1mfn,
- struct vcpu *v)
+
+/* How to write an entry to the guest pagetables.
+ * Returns 0 for failure (pointer not valid), 1 for success. */
+static inline int update_intpte(intpte_t *p,
+ intpte_t old,
+ intpte_t new,
+ unsigned long mfn,
+ struct vcpu *v)
{
int rv = 1;
- if ( unlikely(shadow_mode_enabled(v->domain)) )
- shadow_lock(v->domain);
#ifndef PTE_UPDATE_WITH_CMPXCHG
- rv = (!__copy_to_user(pl1e, &nl1e, sizeof(nl1e)));
+ if ( unlikely(shadow_mode_enabled(v->domain)) )
+ rv = shadow_write_guest_entry(v, p, new, _mfn(mfn));
+ else
+ rv = (!__copy_to_user(p, &new, sizeof(new)));
#else
{
- intpte_t o = l1e_get_intpte(ol1e);
- intpte_t n = l1e_get_intpte(nl1e);
-
+ intpte_t t = old;
for ( ; ; )
{
- if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) )
+ if ( unlikely(shadow_mode_enabled(v->domain)) )
+ rv = shadow_cmpxchg_guest_entry(v, p, &t, new, _mfn(mfn));
+ else
+ rv = (!cmpxchg_user(p, t, new));
+
+ if ( unlikely(rv == 0) )
{
MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte
- ": saw %" PRIpte,
- l1e_get_intpte(ol1e),
- l1e_get_intpte(nl1e),
- o);
- rv = 0;
+ ": saw %" PRIpte, old, new, t);
break;
}
- if ( o == l1e_get_intpte(ol1e) )
+ if ( t == old )
break;
/* Allowed to change in Accessed/Dirty flags only. */
- BUG_ON((o ^ l1e_get_intpte(ol1e)) &
- ~(int)(_PAGE_ACCESSED|_PAGE_DIRTY));
- ol1e = l1e_from_intpte(o);
+ BUG_ON((t ^ old) & ~(intpte_t)(_PAGE_ACCESSED|_PAGE_DIRTY));
+
+ old = t;
}
}
#endif
- if ( unlikely(shadow_mode_enabled(v->domain)) && rv )
- {
- shadow_validate_guest_entry(v, _mfn(gl1mfn), pl1e);
- shadow_unlock(v->domain);
- }
return rv;
}
+/* Macro that wraps the appropriate type-changes around update_intpte().
+ * Arguments are: type, ptr, old, new, mfn, vcpu */
+#define UPDATE_ENTRY(_t,_p,_o,_n,_m,_v) \
+ update_intpte((intpte_t *)(_p), \
+ _t ## e_get_intpte(_o), _t ## e_get_intpte(_n), \
+ (_m), (_v))
/* Update the L1 entry at pl1e to new value nl1e. */
static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e,
@@ -1219,7 +1255,7 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e,
return 0;
if ( unlikely(shadow_mode_refcounts(d)) )
- return update_l1e(pl1e, ol1e, nl1e, gl1mfn, current);
+ return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current);
if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
{
@@ -1238,12 +1274,12 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e,
/* Fast path for identical mapping, r/w and presence. */
if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) )
- return update_l1e(pl1e, ol1e, nl1e, gl1mfn, current);
+ return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current);
if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) )
return 0;
- if ( unlikely(!update_l1e(pl1e, ol1e, nl1e, gl1mfn, current)) )
+ if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current)) )
{
put_page_from_l1e(nl1e, d);
return 0;
@@ -1251,7 +1287,7 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e,
}
else
{
- if ( unlikely(!update_l1e(pl1e, ol1e, nl1e, gl1mfn, current)) )
+ if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current)) )
return 0;
}
@@ -1259,36 +1295,6 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e,
return 1;
}
-#ifndef PTE_UPDATE_WITH_CMPXCHG
-#define _UPDATE_ENTRY(_t,_p,_o,_n) ({ (*(_p) = (_n)); 1; })
-#else
-#define _UPDATE_ENTRY(_t,_p,_o,_n) ({ \
- for ( ; ; ) \
- { \
- intpte_t __o = cmpxchg((intpte_t *)(_p), \
- _t ## e_get_intpte(_o), \
- _t ## e_get_intpte(_n)); \
- if ( __o == _t ## e_get_intpte(_o) ) \
- break; \
- /* Allowed to change in Accessed/Dirty flags only. */ \
- BUG_ON((__o ^ _t ## e_get_intpte(_o)) & \
- ~(int)(_PAGE_ACCESSED|_PAGE_DIRTY)); \
- _o = _t ## e_from_intpte(__o); \
- } \
- 1; })
-#endif
-#define UPDATE_ENTRY(_t,_p,_o,_n,_m) ({ \
- int rv; \
- if ( unlikely(shadow_mode_enabled(current->domain)) ) \
- shadow_lock(current->domain); \
- rv = _UPDATE_ENTRY(_t, _p, _o, _n); \
- if ( unlikely(shadow_mode_enabled(current->domain)) ) \
- { \
- shadow_validate_guest_entry(current, _mfn(_m), (_p)); \
- shadow_unlock(current->domain); \
- } \
- rv; \
-})
/* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */
static int mod_l2_entry(l2_pgentry_t *pl2e,
@@ -1320,18 +1326,18 @@ static int mod_l2_entry(l2_pgentry_t *pl2e,
/* Fast path for identical mapping and presence. */
if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT))
- return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn);
+ return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current);
if ( unlikely(!get_page_from_l2e(nl2e, pfn, current->domain)) )
return 0;
- if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn)) )
+ if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current)) )
{
put_page_from_l2e(nl2e, pfn);
return 0;
}
}
- else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn)) )
+ else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current)) )
{
return 0;
}
@@ -1381,18 +1387,18 @@ static int mod_l3_entry(l3_pgentry_t *pl3e,
/* Fast path for identical mapping and presence. */
if (!l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT))
- return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn);
+ return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current);
if ( unlikely(!get_page_from_l3e(nl3e, pfn, current->domain)) )
return 0;
- if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn)) )
+ if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current)) )
{
put_page_from_l3e(nl3e, pfn);
return 0;
}
}
- else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn)) )
+ else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current)) )
{
return 0;
}
@@ -1439,18 +1445,18 @@ static int mod_l4_entry(l4_pgentry_t *pl4e,
/* Fast path for identical mapping and presence. */
if (!l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT))
- return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn);
+ return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current);
if ( unlikely(!get_page_from_l4e(nl4e, pfn, current->domain)) )
return 0;
- if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn)) )
+ if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current)) )
{
put_page_from_l4e(nl4e, pfn);
return 0;
}
}
- else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn)) )
+ else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current)) )
{
return 0;
}
@@ -2292,15 +2298,11 @@ int do_mmu_update(
break;
if ( unlikely(shadow_mode_enabled(d)) )
- shadow_lock(d);
-
- *(intpte_t *)va = req.val;
- okay = 1;
-
- if ( unlikely(shadow_mode_enabled(d)) )
+ okay = shadow_write_guest_entry(v, va, req.val, _mfn(mfn));
+ else
{
- shadow_validate_guest_entry(v, _mfn(mfn), va);
- shadow_unlock(d);
+ *(intpte_t *)va = req.val;
+ okay = 1;
}
put_page_type(page);
@@ -2409,7 +2411,7 @@ static int create_grant_pte_mapping(
}
ol1e = *(l1_pgentry_t *)va;
- if ( !update_l1e(va, ol1e, nl1e, mfn, v) )
+ if ( !UPDATE_ENTRY(l1, va, ol1e, nl1e, mfn, v) )
{
put_page_type(page);
rc = GNTST_general_error;
@@ -2477,7 +2479,7 @@ static int destroy_grant_pte_mapping(
}
/* Delete pagetable entry. */
- if ( unlikely(!update_l1e(
+ if ( unlikely(!UPDATE_ENTRY(l1,
(l1_pgentry_t *)va, ol1e, l1e_empty(), mfn,
d->vcpu[0] /* Change if we go to per-vcpu shadows. */)) )
{
@@ -2515,7 +2517,7 @@ static int create_grant_va_mapping(
return GNTST_general_error;
}
ol1e = *pl1e;
- okay = update_l1e(pl1e, ol1e, nl1e, gl1mfn, v);
+ okay = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v);
guest_unmap_l1e(v, pl1e);
pl1e = NULL;
@@ -2553,7 +2555,7 @@ static int destroy_grant_va_mapping(
}
/* Delete pagetable entry. */
- if ( unlikely(!update_l1e(pl1e, ol1e, l1e_empty(), gl1mfn, v)) )
+ if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, l1e_empty(), gl1mfn, v)) )
{
MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
rc = GNTST_general_error;
@@ -2952,16 +2954,6 @@ long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
UNLOCK_BIGLOCK(d);
- /* If we're doing FAST_FAULT_PATH, then shadow mode may have
- cached the fact that this is an mmio region in the shadow
- page tables. Blow the tables away to remove the cache.
- This is pretty heavy handed, but this is a rare operation
- (it might happen a dozen times during boot and then never
- again), so it doesn't matter too much. */
- shadow_lock(d);
- shadow_blow_tables(d);
- shadow_unlock(d);
-
put_domain(d);
break;
@@ -3188,27 +3180,30 @@ static int ptwr_emulated_update(
pl1e = (l1_pgentry_t *)((unsigned long)pl1e + (addr & ~PAGE_MASK));
if ( do_cmpxchg )
{
- if ( shadow_mode_enabled(d) )
- shadow_lock(d);
+ int okay;
ol1e = l1e_from_intpte(old);
- if ( cmpxchg((intpte_t *)pl1e, old, val) != old )
+
+ if ( shadow_mode_enabled(d) )
+ {
+ intpte_t t = old;
+ okay = shadow_cmpxchg_guest_entry(v, (intpte_t *) pl1e,
+ &t, val, _mfn(mfn));
+ okay = (okay && t == old);
+ }
+ else
+ okay = (cmpxchg((intpte_t *)pl1e, old, val) == old);
+
+ if ( !okay )
{
- if ( shadow_mode_enabled(d) )
- shadow_unlock(d);
unmap_domain_page(pl1e);
put_page_from_l1e(gl1e_to_ml1e(d, nl1e), d);
return X86EMUL_CMPXCHG_FAILED;
}
- if ( unlikely(shadow_mode_enabled(d)) )
- {
- shadow_validate_guest_entry(v, _mfn(page_to_mfn(page)), pl1e);
- shadow_unlock(d);
- }
}
else
{
ol1e = *pl1e;
- if ( !update_l1e(pl1e, ol1e, nl1e, page_to_mfn(page), v) )
+ if ( !UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, page_to_mfn(page), v) )
BUG();
}
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index 4bb8e3b230..b4275bf8e0 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -38,6 +38,21 @@
#include <asm/shadow.h>
#include "private.h"
+
+/* Set up the shadow-specific parts of a domain struct at start of day.
+ * Called for every domain from arch_domain_create() */
+void shadow_domain_init(struct domain *d)
+{
+ int i;
+ shadow_lock_init(d);
+ for ( i = 0; i <= SHADOW_MAX_ORDER; i++ )
+ INIT_LIST_HEAD(&d->arch.shadow.freelists[i]);
+ INIT_LIST_HEAD(&d->arch.shadow.p2m_freelist);
+ INIT_LIST_HEAD(&d->arch.shadow.p2m_inuse);
+ INIT_LIST_HEAD(&d->arch.shadow.pinned_shadows);
+}
+
+
#if SHADOW_AUDIT
int shadow_audit_enable = 0;
@@ -434,7 +449,7 @@ void shadow_promote(struct vcpu *v, mfn_t gmfn, unsigned int type)
ASSERT(mfn_valid(gmfn));
/* We should never try to promote a gmfn that has writeable mappings */
- ASSERT(shadow_remove_write_access(v, gmfn, 0, 0) == 0);
+ ASSERT(sh_remove_write_access(v, gmfn, 0, 0) == 0);
/* Is the page already shadowed? */
if ( !test_and_set_bit(_PGC_page_table, &page->count_info) )
@@ -466,8 +481,7 @@ void shadow_demote(struct vcpu *v, mfn_t gmfn, u32 type)
* Returns a bitmask of SHADOW_SET_* flags. */
int
-__shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn,
- void *entry, u32 size)
+sh_validate_guest_entry(struct vcpu *v, mfn_t gmfn, void *entry, u32 size)
{
int result = 0;
struct page_info *page = mfn_to_page(gmfn);
@@ -546,22 +560,9 @@ __shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn,
}
-int
-shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, void *entry)
-/* This is the entry point from hypercalls. It returns a bitmask of all the
- * results of shadow_set_l*e() calls, so the caller knows to do TLB flushes. */
-{
- int rc;
-
- ASSERT(shadow_locked_by_me(v->domain));
- rc = __shadow_validate_guest_entry(v, gmfn, entry, sizeof(l1_pgentry_t));
- shadow_audit_tables(v);
- return rc;
-}
-
void
-shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn,
- void *entry, u32 size)
+sh_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn,
+ void *entry, u32 size)
/* This is the entry point for emulated writes to pagetables in HVM guests and
* PV translated guests.
*/
@@ -570,7 +571,7 @@ shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn,
int rc;
ASSERT(shadow_locked_by_me(v->domain));
- rc = __shadow_validate_guest_entry(v, gmfn, entry, size);
+ rc = sh_validate_guest_entry(v, gmfn, entry, size);
if ( rc & SHADOW_SET_FLUSH )
/* Need to flush TLBs to pick up shadow PT changes */
flush_tlb_mask(d->domain_dirty_cpumask);
@@ -585,6 +586,38 @@ shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn,
}
}
+int shadow_write_guest_entry(struct vcpu *v, intpte_t *p,
+ intpte_t new, mfn_t gmfn)
+/* Write a new value into the guest pagetable, and update the shadows
+ * appropriately. Returns 0 if we page-faulted, 1 for success. */
+{
+ int failed;
+ shadow_lock(v->domain);
+ failed = __copy_to_user(p, &new, sizeof(new));
+ if ( failed != sizeof(new) )
+ sh_validate_guest_entry(v, gmfn, p, sizeof(new));
+ shadow_unlock(v->domain);
+ return (failed == 0);
+}
+
+int shadow_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p,
+ intpte_t *old, intpte_t new, mfn_t gmfn)
+/* Cmpxchg a new value into the guest pagetable, and update the shadows
+ * appropriately. Returns 0 if we page-faulted, 1 if not.
+ * N.B. caller should check the value of "old" to see if the
+ * cmpxchg itself was successful. */
+{
+ int failed;
+ intpte_t t = *old;
+ shadow_lock(v->domain);
+ failed = cmpxchg_user(p, t, new);
+ if ( t == *old )
+ sh_validate_guest_entry(v, gmfn, p, sizeof(new));
+ *old = t;
+ shadow_unlock(v->domain);
+ return (failed == 0);
+}
+
/**************************************************************************/
/* Memory management for shadow pages. */
@@ -791,7 +824,7 @@ void shadow_prealloc(struct domain *d, unsigned int order)
/* Deliberately free all the memory we can: this will tear down all of
* this domain's shadows */
-void shadow_blow_tables(struct domain *d)
+static void shadow_blow_tables(struct domain *d)
{
struct list_head *l, *t;
struct shadow_page_info *sp;
@@ -989,7 +1022,7 @@ void shadow_free(struct domain *d, mfn_t smfn)
* Also, we only ever allocate a max-order chunk, so as to preserve
* the invariant that shadow_prealloc() always works.
* Returns 0 iff it can't get a chunk (the caller should then
- * free up some pages in domheap and call set_sh_allocation);
+ * free up some pages in domheap and call sh_set_allocation);
* returns non-zero on success.
*/
static int
@@ -1149,14 +1182,14 @@ p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table,
if ( pagetable_get_pfn(v->arch.guest_table)
== pagetable_get_pfn(d->arch.phys_table)
&& v->arch.shadow.mode != NULL )
- v->arch.shadow.mode->update_cr3(v);
+ v->arch.shadow.mode->update_cr3(v, 0);
}
}
#endif
/* The P2M can be shadowed: keep the shadows synced */
if ( d->vcpu[0] != NULL )
- (void)__shadow_validate_guest_entry(d->vcpu[0], *table_mfn,
- p2m_entry, sizeof *p2m_entry);
+ (void)sh_validate_guest_entry(d->vcpu[0], *table_mfn,
+ p2m_entry, sizeof *p2m_entry);
}
*table_mfn = _mfn(l1e_get_pfn(*p2m_entry));
next = sh_map_domain_page(*table_mfn);
@@ -1216,8 +1249,8 @@ shadow_set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
/* The P2M can be shadowed: keep the shadows synced */
if ( d->vcpu[0] != NULL )
- (void)__shadow_validate_guest_entry(
- d->vcpu[0], table_mfn, p2m_entry, sizeof(*p2m_entry));
+ (void)sh_validate_guest_entry(d->vcpu[0], table_mfn,
+ p2m_entry, sizeof(*p2m_entry));
/* Success */
rv = 1;
@@ -1427,9 +1460,9 @@ static void shadow_p2m_teardown(struct domain *d)
* Input will be rounded up to at least shadow_min_acceptable_pages(),
* plus space for the p2m table.
* Returns 0 for success, non-zero for failure. */
-static unsigned int set_sh_allocation(struct domain *d,
- unsigned int pages,
- int *preempted)
+static unsigned int sh_set_allocation(struct domain *d,
+ unsigned int pages,
+ int *preempted)
{
struct shadow_page_info *sp;
unsigned int lower_bound;
@@ -1499,20 +1532,12 @@ static unsigned int set_sh_allocation(struct domain *d,
return 0;
}
-unsigned int shadow_set_allocation(struct domain *d,
- unsigned int megabytes,
- int *preempted)
-/* Hypercall interface to set the shadow memory allocation */
+/* Return the size of the shadow pool, rounded up to the nearest MB */
+static unsigned int shadow_get_allocation(struct domain *d)
{
- unsigned int rv;
- shadow_lock(d);
- rv = set_sh_allocation(d, megabytes << (20 - PAGE_SHIFT), preempted);
- SHADOW_PRINTK("dom %u allocation now %u pages (%u MB)\n",
- d->domain_id,
- d->arch.shadow.total_pages,
- shadow_get_allocation(d));
- shadow_unlock(d);
- return rv;
+ unsigned int pg = d->arch.shadow.total_pages;
+ return ((pg >> (20 - PAGE_SHIFT))
+ + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0));
}
/**************************************************************************/
@@ -1889,24 +1914,24 @@ void sh_destroy_shadow(struct vcpu *v, mfn_t smfn)
* level and fault_addr desribe how we found this to be a pagetable;
* level==0 means we have some other reason for revoking write access.*/
-int shadow_remove_write_access(struct vcpu *v, mfn_t gmfn,
- unsigned int level,
- unsigned long fault_addr)
+int sh_remove_write_access(struct vcpu *v, mfn_t gmfn,
+ unsigned int level,
+ unsigned long fault_addr)
{
/* Dispatch table for getting per-type functions */
static hash_callback_t callbacks[16] = {
NULL, /* none */
#if CONFIG_PAGING_LEVELS == 2
- SHADOW_INTERNAL_NAME(sh_remove_write_access,2,2), /* l1_32 */
- SHADOW_INTERNAL_NAME(sh_remove_write_access,2,2), /* fl1_32 */
+ SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,2,2), /* l1_32 */
+ SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,2,2), /* fl1_32 */
#else
- SHADOW_INTERNAL_NAME(sh_remove_write_access,3,2), /* l1_32 */
- SHADOW_INTERNAL_NAME(sh_remove_write_access,3,2), /* fl1_32 */
+ SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,2), /* l1_32 */
+ SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,2), /* fl1_32 */
#endif
NULL, /* l2_32 */
#if CONFIG_PAGING_LEVELS >= 3
- SHADOW_INTERNAL_NAME(sh_remove_write_access,3,3), /* l1_pae */
- SHADOW_INTERNAL_NAME(sh_remove_write_access,3,3), /* fl1_pae */
+ SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,3), /* l1_pae */
+ SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,3), /* fl1_pae */
#else
NULL, /* l1_pae */
NULL, /* fl1_pae */
@@ -1914,8 +1939,8 @@ int shadow_remove_write_access(struct vcpu *v, mfn_t gmfn,
NULL, /* l2_pae */
NULL, /* l2h_pae */
#if CONFIG_PAGING_LEVELS >= 4
- SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* l1_64 */
- SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* fl1_64 */
+ SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,4,4), /* l1_64 */
+ SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,4,4), /* fl1_64 */
#else
NULL, /* l1_64 */
NULL, /* fl1_64 */
@@ -2077,25 +2102,25 @@ int shadow_remove_write_access(struct vcpu *v, mfn_t gmfn,
/* Remove all mappings of a guest frame from the shadow tables.
* Returns non-zero if we need to flush TLBs. */
-int shadow_remove_all_mappings(struct vcpu *v, mfn_t gmfn)
+int sh_remove_all_mappings(struct vcpu *v, mfn_t gmfn)
{
struct page_info *page = mfn_to_page(gmfn);
- int expected_count;
+ int expected_count, do_locking;
/* Dispatch table for getting per-type functions */
static hash_callback_t callbacks[16] = {
NULL, /* none */
#if CONFIG_PAGING_LEVELS == 2
- SHADOW_INTERNAL_NAME(sh_remove_all_mappings,2,2), /* l1_32 */
- SHADOW_INTERNAL_NAME(sh_remove_all_mappings,2,2), /* fl1_32 */
+ SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,2,2), /* l1_32 */
+ SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,2,2), /* fl1_32 */
#else
- SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,2), /* l1_32 */
- SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,2), /* fl1_32 */
+ SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,2), /* l1_32 */
+ SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,2), /* fl1_32 */
#endif
NULL, /* l2_32 */
#if CONFIG_PAGING_LEVELS >= 3
- SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,3), /* l1_pae */
- SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,3), /* fl1_pae */
+ SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,3), /* l1_pae */
+ SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,3), /* fl1_pae */
#else
NULL, /* l1_pae */
NULL, /* fl1_pae */
@@ -2103,8 +2128,8 @@ int shadow_remove_all_mappings(struct vcpu *v, mfn_t gmfn)
NULL, /* l2_pae */
NULL, /* l2h_pae */
#if CONFIG_PAGING_LEVELS >= 4
- SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* l1_64 */
- SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* fl1_64 */
+ SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,4,4), /* l1_64 */
+ SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,4,4), /* fl1_64 */
#else
NULL, /* l1_64 */
NULL, /* fl1_64 */
@@ -2129,7 +2154,12 @@ int shadow_remove_all_mappings(struct vcpu *v, mfn_t gmfn)
if ( (page->count_info & PGC_count_mask) == 0 )
return 0;
- ASSERT(shadow_locked_by_me(v->domain));
+ /* Although this is an externally visible function, we do not know
+ * whether the shadow lock will be held when it is called (since it
+ * can be called via put_page_type when we clear a shadow l1e).
+ * If the lock isn't held, take it for the duration of the call. */
+ do_locking = !shadow_locked_by_me(v->domain);
+ if ( do_locking ) shadow_lock(v->domain);
/* XXX TODO:
* Heuristics for finding the (probably) single mapping of this gmfn */
@@ -2154,6 +2184,8 @@ int shadow_remove_all_mappings(struct vcpu *v, mfn_t gmfn)
}
}
+ if ( do_locking ) shadow_unlock(v->domain);
+
/* We killed at least one mapping, so must flush TLBs. */
return 1;
}
@@ -2236,9 +2268,10 @@ void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all)
* (all != 0 implies fast == 0)
*/
{
- struct page_info *pg;
+ struct page_info *pg = mfn_to_page(gmfn);
mfn_t smfn;
u32 sh_flags;
+ int do_locking;
unsigned char t;
/* Dispatch table for getting per-type functions: each level must
@@ -2296,15 +2329,19 @@ void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all)
0 /* unused */
};
- ASSERT(shadow_locked_by_me(v->domain));
ASSERT(!(all && fast));
- pg = mfn_to_page(gmfn);
-
/* Bail out now if the page is not shadowed */
if ( (pg->count_info & PGC_page_table) == 0 )
return;
+ /* Although this is an externally visible function, we do not know
+ * whether the shadow lock will be held when it is called (since it
+ * can be called via put_page_type when we clear a shadow l1e).
+ * If the lock isn't held, take it for the duration of the call. */
+ do_locking = !shadow_locked_by_me(v->domain);
+ if ( do_locking ) shadow_lock(v->domain);
+
SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx\n",
v->domain->domain_id, v->vcpu_id, mfn_x(gmfn));
@@ -2356,14 +2393,16 @@ void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all)
/* Need to flush TLBs now, so that linear maps are safe next time we
* take a fault. */
flush_tlb_mask(v->domain->domain_dirty_cpumask);
+
+ if ( do_locking ) shadow_unlock(v->domain);
}
-void
-shadow_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn)
+static void
+sh_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn)
/* Even harsher: this is a HVM page that we thing is no longer a pagetable.
* Unshadow it, and recursively unshadow pages that reference it. */
{
- shadow_remove_all_shadows(v, gmfn);
+ sh_remove_shadows(v, gmfn, 0, 1);
/* XXX TODO:
* Rework this hashtable walker to return a linked-list of all
* the shadows it modified, then do breadth-first recursion
@@ -2376,7 +2415,7 @@ shadow_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn)
/**************************************************************************/
-void sh_update_paging_modes(struct vcpu *v)
+static void sh_update_paging_modes(struct vcpu *v)
{
struct domain *d = v->domain;
struct shadow_paging_mode *old_mode = v->arch.shadow.mode;
@@ -2394,7 +2433,8 @@ void sh_update_paging_modes(struct vcpu *v)
// First, tear down any old shadow tables held by this vcpu.
//
- shadow_detach_old_tables(v);
+ if ( v->arch.shadow.mode )
+ v->arch.shadow.mode->detach_old_tables(v);
if ( !is_hvm_domain(d) )
{
@@ -2402,10 +2442,9 @@ void sh_update_paging_modes(struct vcpu *v)
/// PV guest
///
#if CONFIG_PAGING_LEVELS == 4
- if ( pv_32bit_guest(v) )
- v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,3);
- else
- v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,4);
+ /* When 32-on-64 PV guests are supported, they must choose
+ * a different mode here */
+ v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,4);
#elif CONFIG_PAGING_LEVELS == 3
v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3);
#elif CONFIG_PAGING_LEVELS == 2
@@ -2493,7 +2532,7 @@ void sh_update_paging_modes(struct vcpu *v)
if ( pagetable_is_null(v->arch.monitor_table) )
{
- mfn_t mmfn = shadow_make_monitor_table(v);
+ mfn_t mmfn = v->arch.shadow.mode->make_monitor_table(v);
v->arch.monitor_table = pagetable_from_mfn(mmfn);
make_cr3(v, mfn_x(mmfn));
hvm_update_host_cr3(v);
@@ -2528,7 +2567,7 @@ void sh_update_paging_modes(struct vcpu *v)
old_mfn = pagetable_get_mfn(v->arch.monitor_table);
v->arch.monitor_table = pagetable_null();
- new_mfn = v->arch.shadow.mode->make_monitor_table(v);
+ new_mfn = v->arch.shadow.mode->make_monitor_table(v);
v->arch.monitor_table = pagetable_from_mfn(new_mfn);
SHADOW_PRINTK("new monitor table %"SH_PRI_mfn "\n",
mfn_x(new_mfn));
@@ -2549,7 +2588,14 @@ void sh_update_paging_modes(struct vcpu *v)
// This *does* happen, at least for CR4.PGE...
}
- v->arch.shadow.mode->update_cr3(v);
+ v->arch.shadow.mode->update_cr3(v, 0);
+}
+
+void shadow_update_paging_modes(struct vcpu *v)
+{
+ shadow_lock(v->domain);
+ sh_update_paging_modes(v);
+ shadow_unlock(v->domain);
}
/**************************************************************************/
@@ -2610,9 +2656,9 @@ int shadow_enable(struct domain *d, u32 mode)
/* Init the shadow memory allocation if the user hasn't done so */
old_pages = d->arch.shadow.total_pages;
if ( old_pages == 0 )
- if ( set_sh_allocation(d, 256, NULL) != 0 ) /* Use at least 1MB */
+ if ( sh_set_allocation(d, 256, NULL) != 0 ) /* Use at least 1MB */
{
- set_sh_allocation(d, 0, NULL);
+ sh_set_allocation(d, 0, NULL);
rv = -ENOMEM;
goto out;
}
@@ -2620,7 +2666,7 @@ int shadow_enable(struct domain *d, u32 mode)
/* Init the hash table */
if ( shadow_hash_alloc(d) != 0 )
{
- set_sh_allocation(d, old_pages, NULL);
+ sh_set_allocation(d, old_pages, NULL);
rv = -ENOMEM;
goto out;
}
@@ -2630,7 +2676,7 @@ int shadow_enable(struct domain *d, u32 mode)
if ( !shadow_alloc_p2m_table(d) )
{
shadow_hash_teardown(d);
- set_sh_allocation(d, old_pages, NULL);
+ sh_set_allocation(d, old_pages, NULL);
shadow_p2m_teardown(d);
rv = -ENOMEM;
goto out;
@@ -2669,13 +2715,16 @@ void shadow_teardown(struct domain *d)
/* Release the shadow and monitor tables held by each vcpu */
for_each_vcpu(d, v)
{
- shadow_detach_old_tables(v);
- if ( shadow_mode_external(d) )
+ if ( v->arch.shadow.mode )
{
- mfn = pagetable_get_mfn(v->arch.monitor_table);
- if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) )
- shadow_destroy_monitor_table(v, mfn);
- v->arch.monitor_table = pagetable_null();
+ v->arch.shadow.mode->detach_old_tables(v);
+ if ( shadow_mode_external(d) )
+ {
+ mfn = pagetable_get_mfn(v->arch.monitor_table);
+ if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) )
+ v->arch.shadow.mode->destroy_monitor_table(v, mfn);
+ v->arch.monitor_table = pagetable_null();
+ }
}
}
}
@@ -2689,7 +2738,7 @@ void shadow_teardown(struct domain *d)
d->arch.shadow.free_pages,
d->arch.shadow.p2m_pages);
/* Destroy all the shadows and release memory to domheap */
- set_sh_allocation(d, 0, NULL);
+ sh_set_allocation(d, 0, NULL);
/* Release the hash table back to xenheap */
if (d->arch.shadow.hash_table)
shadow_hash_teardown(d);
@@ -2755,10 +2804,10 @@ static int shadow_one_bit_enable(struct domain *d, u32 mode)
if ( d->arch.shadow.mode == 0 )
{
/* Init the shadow memory allocation and the hash table */
- if ( set_sh_allocation(d, 1, NULL) != 0
+ if ( sh_set_allocation(d, 1, NULL) != 0
|| shadow_hash_alloc(d) != 0 )
{
- set_sh_allocation(d, 0, NULL);
+ sh_set_allocation(d, 0, NULL);
return -ENOMEM;
}
}
@@ -2794,7 +2843,8 @@ static int shadow_one_bit_disable(struct domain *d, u32 mode)
d->arch.shadow.p2m_pages);
for_each_vcpu(d, v)
{
- shadow_detach_old_tables(v);
+ if ( v->arch.shadow.mode )
+ v->arch.shadow.mode->detach_old_tables(v);
#if CONFIG_PAGING_LEVELS == 4
if ( !(v->arch.flags & TF_kernel_mode) )
make_cr3(v, pagetable_get_pfn(v->arch.guest_table_user));
@@ -2805,7 +2855,7 @@ static int shadow_one_bit_disable(struct domain *d, u32 mode)
}
/* Pull down the memory allocation */
- if ( set_sh_allocation(d, 0, NULL) != 0 )
+ if ( sh_set_allocation(d, 0, NULL) != 0 )
{
// XXX - How can this occur?
// Seems like a bug to return an error now that we've
@@ -2826,7 +2876,7 @@ static int shadow_one_bit_disable(struct domain *d, u32 mode)
}
/* Enable/disable ops for the "test" and "log-dirty" modes */
-int shadow_test_enable(struct domain *d)
+static int shadow_test_enable(struct domain *d)
{
int ret;
@@ -2849,7 +2899,7 @@ int shadow_test_enable(struct domain *d)
return ret;
}
-int shadow_test_disable(struct domain *d)
+static int shadow_test_disable(struct domain *d)
{
int ret;
@@ -2968,8 +3018,8 @@ sh_p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn)
if ( v != NULL )
{
- shadow_remove_all_shadows_and_parents(v, _mfn(mfn));
- if ( shadow_remove_all_mappings(v, _mfn(mfn)) )
+ sh_remove_all_shadows_and_parents(v, _mfn(mfn));
+ if ( sh_remove_all_mappings(v, _mfn(mfn)) )
flush_tlb_mask(d->domain_dirty_cpumask);
}
@@ -3012,8 +3062,8 @@ shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn,
v = d->vcpu[0];
if ( v != NULL )
{
- shadow_remove_all_shadows_and_parents(v, omfn);
- if ( shadow_remove_all_mappings(v, omfn) )
+ sh_remove_all_shadows_and_parents(v, omfn);
+ if ( sh_remove_all_mappings(v, omfn) )
flush_tlb_mask(d->domain_dirty_cpumask);
}
set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
@@ -3043,6 +3093,17 @@ shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn,
shadow_set_p2m_entry(d, gfn, _mfn(mfn));
set_gpfn_from_mfn(mfn, gfn);
+
+#if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH)
+ /* If we're doing FAST_FAULT_PATH, then shadow mode may have
+ cached the fact that this is an mmio region in the shadow
+ page tables. Blow the tables away to remove the cache.
+ This is pretty heavy handed, but this is a rare operation
+ (it might happen a dozen times during boot and then never
+ again), so it doesn't matter too much. */
+ shadow_blow_tables(d);
+#endif
+
shadow_audit_p2m(d);
shadow_unlock(d);
}
@@ -3130,14 +3191,13 @@ static int shadow_log_dirty_op(
/* Mark a page as dirty */
-void sh_do_mark_dirty(struct domain *d, mfn_t gmfn)
+void sh_mark_dirty(struct domain *d, mfn_t gmfn)
{
unsigned long pfn;
ASSERT(shadow_locked_by_me(d));
- ASSERT(shadow_mode_log_dirty(d));
- if ( !mfn_valid(gmfn) )
+ if ( !shadow_mode_log_dirty(d) || !mfn_valid(gmfn) )
return;
ASSERT(d->arch.shadow.dirty_bitmap != NULL);
@@ -3181,13 +3241,19 @@ void sh_do_mark_dirty(struct domain *d, mfn_t gmfn)
}
}
+void shadow_mark_dirty(struct domain *d, mfn_t gmfn)
+{
+ shadow_lock(d);
+ sh_mark_dirty(d, gmfn);
+ shadow_unlock(d);
+}
/**************************************************************************/
/* Shadow-control XEN_DOMCTL dispatcher */
int shadow_domctl(struct domain *d,
- xen_domctl_shadow_op_t *sc,
- XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
+ xen_domctl_shadow_op_t *sc,
+ XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
{
int rc, preempted = 0;
@@ -3233,7 +3299,9 @@ int shadow_domctl(struct domain *d,
return 0;
case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION:
- rc = shadow_set_allocation(d, sc->mb, &preempted);
+ shadow_lock(d);
+ rc = sh_set_allocation(d, sc->mb << (20 - PAGE_SHIFT), &preempted);
+ shadow_unlock(d);
if ( preempted )
/* Not finished. Set up to re-run the call. */
rc = hypercall_create_continuation(
diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index 32af022668..5aabce5469 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -243,7 +243,7 @@ guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, int guest_op)
gw->l3mfn = vcpu_gfn_to_mfn(v, guest_l4e_get_gfn(*gw->l4e));
if ( !mfn_valid(gw->l3mfn) ) return 1;
/* This mfn is a pagetable: make sure the guest can't write to it. */
- if ( guest_op && shadow_remove_write_access(v, gw->l3mfn, 3, va) != 0 )
+ if ( guest_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 )
flush_tlb_mask(v->domain->domain_dirty_cpumask);
gw->l3e = ((guest_l3e_t *)sh_map_domain_page(gw->l3mfn))
+ guest_l3_table_offset(va);
@@ -257,7 +257,7 @@ guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, int guest_op)
gw->l2mfn = vcpu_gfn_to_mfn(v, guest_l3e_get_gfn(*gw->l3e));
if ( !mfn_valid(gw->l2mfn) ) return 1;
/* This mfn is a pagetable: make sure the guest can't write to it. */
- if ( guest_op && shadow_remove_write_access(v, gw->l2mfn, 2, va) != 0 )
+ if ( guest_op && sh_remove_write_access(v, gw->l2mfn, 2, va) != 0 )
flush_tlb_mask(v->domain->domain_dirty_cpumask);
gw->l2e = ((guest_l2e_t *)sh_map_domain_page(gw->l2mfn))
+ guest_l2_table_offset(va);
@@ -299,7 +299,7 @@ guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, int guest_op)
if ( !mfn_valid(gw->l1mfn) ) return 1;
/* This mfn is a pagetable: make sure the guest can't write to it. */
if ( guest_op
- && shadow_remove_write_access(v, gw->l1mfn, 1, va) != 0 )
+ && sh_remove_write_access(v, gw->l1mfn, 1, va) != 0 )
flush_tlb_mask(v->domain->domain_dirty_cpumask);
gw->l1e = ((guest_l1e_t *)sh_map_domain_page(gw->l1mfn))
+ guest_l1_table_offset(va);
@@ -492,7 +492,7 @@ static u32 guest_set_ad_bits(struct vcpu *v,
u32 shflags = mfn_to_page(gmfn)->shadow_flags & SHF_page_type_mask;
/* More than one type bit set in shadow-flags? */
if ( shflags & ~(1UL << find_first_set_bit(shflags)) )
- res = __shadow_validate_guest_entry(v, gmfn, ep, sizeof(*ep));
+ res = sh_validate_guest_entry(v, gmfn, ep, sizeof (*ep));
}
/* We should never need to flush the TLB or recopy PAE entries */
@@ -2847,7 +2847,7 @@ static int sh_page_fault(struct vcpu *v,
/* If this is actually a page table, then we have a bug, and need
* to support more operations in the emulator. More likely,
* though, this is a hint that this page should not be shadowed. */
- shadow_remove_all_shadows(v, gmfn);
+ sh_remove_shadows(v, gmfn, 0 /* thorough */, 1 /* must succeed */);
}
/* Emulator has changed the user registers: write back */
@@ -3080,7 +3080,7 @@ sh_update_linear_entries(struct vcpu *v)
sh_unmap_domain_page(ml4e);
}
- /* Shadow l3 tables are made up by update_cr3 */
+ /* Shadow l3 tables are made up by sh_update_cr3 */
sl3e = v->arch.shadow.l3table;
for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
@@ -3118,7 +3118,7 @@ sh_update_linear_entries(struct vcpu *v)
int unmap_l2e = 0;
#if GUEST_PAGING_LEVELS == 2
- /* Shadow l3 tables were built by update_cr3 */
+ /* Shadow l3 tables were built by sh_update_cr3 */
if ( shadow_mode_external(d) )
shadow_l3e = (shadow_l3e_t *)&v->arch.shadow.l3table;
else
@@ -3341,12 +3341,15 @@ sh_set_toplevel_shadow(struct vcpu *v,
static void
-sh_update_cr3(struct vcpu *v)
+sh_update_cr3(struct vcpu *v, int do_locking)
/* Updates vcpu->arch.cr3 after the guest has changed CR3.
* Paravirtual guests should set v->arch.guest_table (and guest_table_user,
* if appropriate).
* HVM guests should also make sure hvm_get_guest_cntl_reg(v, 3) works,
* and read vcpu->arch.hvm_vcpu.hw_cr3 afterwards.
+ * If do_locking != 0, assume we are being called from outside the
+ * shadow code, and must take and release the shadow lock; otherwise
+ * that is the caller's respnsibility.
*/
{
struct domain *d = v->domain;
@@ -3355,6 +3358,15 @@ sh_update_cr3(struct vcpu *v)
u32 guest_idx=0;
#endif
+ /* Don't do anything on an uninitialised vcpu */
+ if ( !is_hvm_domain(d) && !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
+ {
+ ASSERT(v->arch.cr3 == 0);
+ return;
+ }
+
+ if ( do_locking ) shadow_lock(v->domain);
+
ASSERT(shadow_locked_by_me(v->domain));
ASSERT(v->arch.shadow.mode);
@@ -3400,11 +3412,6 @@ sh_update_cr3(struct vcpu *v)
#endif
gmfn = pagetable_get_mfn(v->arch.guest_table);
- if ( !is_hvm_domain(d) && !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
- {
- ASSERT(v->arch.cr3 == 0);
- return;
- }
////
//// vcpu->arch.guest_vtable
@@ -3466,7 +3473,7 @@ sh_update_cr3(struct vcpu *v)
* replace the old shadow pagetable(s), so that we can safely use the
* (old) shadow linear maps in the writeable mapping heuristics. */
#if GUEST_PAGING_LEVELS == 2
- if ( shadow_remove_write_access(v, gmfn, 2, 0) != 0 )
+ if ( sh_remove_write_access(v, gmfn, 2, 0) != 0 )
flush_tlb_mask(v->domain->domain_dirty_cpumask);
sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l2_shadow);
#elif GUEST_PAGING_LEVELS == 3
@@ -3484,7 +3491,7 @@ sh_update_cr3(struct vcpu *v)
{
gl2gfn = guest_l3e_get_gfn(gl3e[i]);
gl2mfn = vcpu_gfn_to_mfn(v, gl2gfn);
- flush |= shadow_remove_write_access(v, gl2mfn, 2, 0);
+ flush |= sh_remove_write_access(v, gl2mfn, 2, 0);
}
}
if ( flush )
@@ -3506,7 +3513,7 @@ sh_update_cr3(struct vcpu *v)
}
}
#elif GUEST_PAGING_LEVELS == 4
- if ( shadow_remove_write_access(v, gmfn, 4, 0) != 0 )
+ if ( sh_remove_write_access(v, gmfn, 4, 0) != 0 )
flush_tlb_mask(v->domain->domain_dirty_cpumask);
sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l4_shadow);
#else
@@ -3582,6 +3589,9 @@ sh_update_cr3(struct vcpu *v)
/* Fix up the linear pagetable mappings */
sh_update_linear_entries(v);
+
+ /* Release the lock, if we took it (otherwise it's the caller's problem) */
+ if ( do_locking ) shadow_unlock(v->domain);
}
@@ -3637,7 +3647,8 @@ static int sh_guess_wrmap(struct vcpu *v, unsigned long vaddr, mfn_t gmfn)
}
#endif
-int sh_remove_write_access(struct vcpu *v, mfn_t sl1mfn, mfn_t readonly_mfn)
+int sh_rm_write_access_from_l1(struct vcpu *v, mfn_t sl1mfn,
+ mfn_t readonly_mfn)
/* Excises all writeable mappings to readonly_mfn from this l1 shadow table */
{
shadow_l1e_t *sl1e;
@@ -3668,7 +3679,7 @@ int sh_remove_write_access(struct vcpu *v, mfn_t sl1mfn, mfn_t readonly_mfn)
}
-int sh_remove_all_mappings(struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn)
+int sh_rm_mappings_from_l1(struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn)
/* Excises all mappings to guest frame from this shadow l1 table */
{
shadow_l1e_t *sl1e;
@@ -3888,7 +3899,7 @@ sh_x86_emulate_write(struct vcpu *v, unsigned long vaddr, void *src,
skip = safe_not_to_verify_write(mfn, addr, src, bytes);
memcpy(addr, src, bytes);
- if ( !skip ) shadow_validate_guest_pt_write(v, mfn, addr, bytes);
+ if ( !skip ) sh_validate_guest_pt_write(v, mfn, addr, bytes);
/* If we are writing zeros to this page, might want to unshadow */
if ( likely(bytes >= 4) && (*(u32 *)addr == 0) )
@@ -3933,7 +3944,7 @@ sh_x86_emulate_cmpxchg(struct vcpu *v, unsigned long vaddr,
if ( prev == old )
{
- if ( !skip ) shadow_validate_guest_pt_write(v, mfn, addr, bytes);
+ if ( !skip ) sh_validate_guest_pt_write(v, mfn, addr, bytes);
}
else
rv = X86EMUL_CMPXCHG_FAILED;
@@ -3977,7 +3988,7 @@ sh_x86_emulate_cmpxchg8b(struct vcpu *v, unsigned long vaddr,
if ( prev == old )
{
- if ( !skip ) shadow_validate_guest_pt_write(v, mfn, addr, 8);
+ if ( !skip ) sh_validate_guest_pt_write(v, mfn, addr, 8);
}
else
rv = X86EMUL_CMPXCHG_FAILED;
diff --git a/xen/arch/x86/mm/shadow/multi.h b/xen/arch/x86/mm/shadow/multi.h
index 2cc61b830f..97903059d4 100644
--- a/xen/arch/x86/mm/shadow/multi.h
+++ b/xen/arch/x86/mm/shadow/multi.h
@@ -61,10 +61,10 @@ SHADOW_INTERNAL_NAME(sh_unhook_64b_mappings, SHADOW_LEVELS, GUEST_LEVELS)
(struct vcpu *v, mfn_t sl4mfn);
extern int
-SHADOW_INTERNAL_NAME(sh_remove_write_access, SHADOW_LEVELS, GUEST_LEVELS)
+SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1, SHADOW_LEVELS, GUEST_LEVELS)
(struct vcpu *v, mfn_t sl1mfn, mfn_t readonly_mfn);
extern int
-SHADOW_INTERNAL_NAME(sh_remove_all_mappings, SHADOW_LEVELS, GUEST_LEVELS)
+SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1, SHADOW_LEVELS, GUEST_LEVELS)
(struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn);
extern void
diff --git a/xen/arch/x86/mm/shadow/private.h b/xen/arch/x86/mm/shadow/private.h
index 62a6364c39..913fa43770 100644
--- a/xen/arch/x86/mm/shadow/private.h
+++ b/xen/arch/x86/mm/shadow/private.h
@@ -33,8 +33,43 @@
/******************************************************************************
+ * Levels of self-test and paranoia
+ */
+
+#define SHADOW_AUDIT_HASH 0x01 /* Check current hash bucket */
+#define SHADOW_AUDIT_HASH_FULL 0x02 /* Check every hash bucket */
+#define SHADOW_AUDIT_ENTRIES 0x04 /* Check this walk's shadows */
+#define SHADOW_AUDIT_ENTRIES_FULL 0x08 /* Check every shadow */
+#define SHADOW_AUDIT_ENTRIES_MFNS 0x10 /* Check gfn-mfn map in shadows */
+#define SHADOW_AUDIT_P2M 0x20 /* Check the p2m table */
+
+#ifdef NDEBUG
+#define SHADOW_AUDIT 0
+#define SHADOW_AUDIT_ENABLE 0
+#else
+#define SHADOW_AUDIT 0x15 /* Basic audit of all except p2m. */
+#define SHADOW_AUDIT_ENABLE shadow_audit_enable
+extern int shadow_audit_enable;
+#endif
+
+/******************************************************************************
+ * Levels of optimization
+ */
+
+#define SHOPT_WRITABLE_HEURISTIC 0x01 /* Guess at RW PTEs via linear maps */
+#define SHOPT_EARLY_UNSHADOW 0x02 /* Unshadow l1s on fork or exit */
+#define SHOPT_FAST_FAULT_PATH 0x04 /* Fast-path MMIO and not-present */
+#define SHOPT_PREFETCH 0x08 /* Shadow multiple entries per fault */
+#define SHOPT_LINUX_L3_TOPLEVEL 0x10 /* Pin l3es on early 64bit linux */
+#define SHOPT_SKIP_VERIFY 0x20 /* Skip PTE v'fy when safe to do so */
+
+#define SHADOW_OPTIMIZATIONS 0x3f
+
+
+/******************************************************************************
* Debug and error-message output
*/
+
#define SHADOW_PRINTK(_f, _a...) \
debugtrace_printk("sh: %s(): " _f, __func__, ##_a)
#define SHADOW_ERROR(_f, _a...) \
@@ -54,6 +89,58 @@
#define SHADOW_DEBUG_EMULATE 1
#define SHADOW_DEBUG_LOGDIRTY 0
+/******************************************************************************
+ * The shadow lock.
+ *
+ * This lock is per-domain. It is intended to allow us to make atomic
+ * updates to the software TLB that the shadow tables provide.
+ *
+ * Specifically, it protects:
+ * - all changes to shadow page table pages
+ * - the shadow hash table
+ * - the shadow page allocator
+ * - all changes to guest page table pages
+ * - all changes to the page_info->tlbflush_timestamp
+ * - the page_info->count fields on shadow pages
+ * - the shadow dirty bit array and count
+ */
+#ifndef CONFIG_SMP
+#error shadow.h currently requires CONFIG_SMP
+#endif
+
+#define shadow_lock_init(_d) \
+ do { \
+ spin_lock_init(&(_d)->arch.shadow.lock); \
+ (_d)->arch.shadow.locker = -1; \
+ (_d)->arch.shadow.locker_function = "nobody"; \
+ } while (0)
+
+#define shadow_locked_by_me(_d) \
+ (current->processor == (_d)->arch.shadow.locker)
+
+#define shadow_lock(_d) \
+ do { \
+ if ( unlikely((_d)->arch.shadow.locker == current->processor) ) \
+ { \
+ printk("Error: shadow lock held by %s\n", \
+ (_d)->arch.shadow.locker_function); \
+ BUG(); \
+ } \
+ spin_lock(&(_d)->arch.shadow.lock); \
+ ASSERT((_d)->arch.shadow.locker == -1); \
+ (_d)->arch.shadow.locker = current->processor; \
+ (_d)->arch.shadow.locker_function = __func__; \
+ } while (0)
+
+#define shadow_unlock(_d) \
+ do { \
+ ASSERT((_d)->arch.shadow.locker == current->processor); \
+ (_d)->arch.shadow.locker = -1; \
+ (_d)->arch.shadow.locker_function = "nobody"; \
+ spin_unlock(&(_d)->arch.shadow.lock); \
+ } while (0)
+
+
/******************************************************************************
* Auditing routines
@@ -291,6 +378,21 @@ void sh_install_xen_entries_in_l4(struct vcpu *v, mfn_t gl4mfn, mfn_t sl4mfn);
void sh_install_xen_entries_in_l2h(struct vcpu *v, mfn_t sl2hmfn);
void sh_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn);
+/* Update the shadows in response to a pagetable write from Xen */
+extern int sh_validate_guest_entry(struct vcpu *v, mfn_t gmfn,
+ void *entry, u32 size);
+
+/* Update the shadows in response to a pagetable write from a HVM guest */
+extern void sh_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn,
+ void *entry, u32 size);
+
+/* Remove all writeable mappings of a guest frame from the shadows.
+ * Returns non-zero if we need to flush TLBs.
+ * level and fault_addr desribe how we found this to be a pagetable;
+ * level==0 means we have some other reason for revoking write access. */
+extern int sh_remove_write_access(struct vcpu *v, mfn_t readonly_mfn,
+ unsigned int level,
+ unsigned long fault_addr);
/******************************************************************************
* Flags used in the return value of the shadow_set_lXe() functions...
@@ -325,6 +427,26 @@ void sh_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn);
#undef mfn_valid
#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
+
+static inline int
+sh_mfn_is_a_page_table(mfn_t gmfn)
+{
+ struct page_info *page = mfn_to_page(gmfn);
+ struct domain *owner;
+ unsigned long type_info;
+
+ if ( !mfn_valid(gmfn) )
+ return 0;
+
+ owner = page_get_owner(page);
+ if ( owner && shadow_mode_refcounts(owner)
+ && (page->count_info & PGC_page_table) )
+ return 1;
+
+ type_info = page->u.inuse.type_info & PGT_type_mask;
+ return type_info && (type_info <= PGT_l4_page_table);
+}
+
// Provide mfn_t-aware versions of common xen functions
static inline void *
sh_map_domain_page(mfn_t mfn)
@@ -350,6 +472,25 @@ sh_unmap_domain_page_global(void *p)
unmap_domain_page_global(p);
}
+static inline mfn_t
+pagetable_get_mfn(pagetable_t pt)
+{
+ return _mfn(pagetable_get_pfn(pt));
+}
+
+static inline pagetable_t
+pagetable_from_mfn(mfn_t mfn)
+{
+ return pagetable_from_pfn(mfn_x(mfn));
+}
+
+
+/******************************************************************************
+ * Log-dirty mode bitmap handling
+ */
+
+extern void sh_mark_dirty(struct domain *d, mfn_t gmfn);
+
static inline int
sh_mfn_is_dirty(struct domain *d, mfn_t gmfn)
/* Is this guest page dirty? Call only in log-dirty mode. */
@@ -368,25 +509,6 @@ sh_mfn_is_dirty(struct domain *d, mfn_t gmfn)
return 0;
}
-static inline int
-sh_mfn_is_a_page_table(mfn_t gmfn)
-{
- struct page_info *page = mfn_to_page(gmfn);
- struct domain *owner;
- unsigned long type_info;
-
- if ( !mfn_valid(gmfn) )
- return 0;
-
- owner = page_get_owner(page);
- if ( owner && shadow_mode_refcounts(owner)
- && (page->count_info & PGC_page_table) )
- return 1;
-
- type_info = page->u.inuse.type_info & PGT_type_mask;
- return type_info && (type_info <= PGT_l4_page_table);
-}
-
/**************************************************************************/
/* Shadow-page refcounting. */
diff --git a/xen/arch/x86/mm/shadow/types.h b/xen/arch/x86/mm/shadow/types.h
index 4aed70aa8c..e2edebe555 100644
--- a/xen/arch/x86/mm/shadow/types.h
+++ b/xen/arch/x86/mm/shadow/types.h
@@ -477,8 +477,8 @@ struct shadow_walk_t
#define sh_gva_to_gpa INTERNAL_NAME(sh_gva_to_gpa)
#define sh_gva_to_gfn INTERNAL_NAME(sh_gva_to_gfn)
#define sh_update_cr3 INTERNAL_NAME(sh_update_cr3)
-#define sh_remove_write_access INTERNAL_NAME(sh_remove_write_access)
-#define sh_remove_all_mappings INTERNAL_NAME(sh_remove_all_mappings)
+#define sh_rm_write_access_from_l1 INTERNAL_NAME(sh_rm_write_access_from_l1)
+#define sh_rm_mappings_from_l1 INTERNAL_NAME(sh_rm_mappings_from_l1)
#define sh_remove_l1_shadow INTERNAL_NAME(sh_remove_l1_shadow)
#define sh_remove_l2_shadow INTERNAL_NAME(sh_remove_l2_shadow)
#define sh_remove_l3_shadow INTERNAL_NAME(sh_remove_l3_shadow)
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index 93d96df4fd..d561677fed 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -307,7 +307,7 @@ void audit_domains(void);
int new_guest_cr3(unsigned long pfn);
void make_cr3(struct vcpu *v, unsigned long mfn);
-
+void update_cr3(struct vcpu *v);
void propagate_page_fault(unsigned long addr, u16 error_code);
int __sync_lazy_execstate(void);
diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h
index 4b9094cb7c..46027d94ac 100644
--- a/xen/include/asm-x86/shadow.h
+++ b/xen/include/asm-x86/shadow.h
@@ -29,20 +29,8 @@
#include <xen/domain_page.h>
#include <asm/flushtlb.h>
-/* How to make sure a page is not referred to in a shadow PT */
-/* This will need to be a for_each_vcpu if we go to per-vcpu shadows */
-#define shadow_drop_references(_d, _p) \
- shadow_remove_all_mappings((_d)->vcpu[0], _mfn(page_to_mfn(_p)))
-#define shadow_sync_and_drop_references(_d, _p) \
- shadow_remove_all_mappings((_d)->vcpu[0], _mfn(page_to_mfn(_p)))
-
-/* How to add and remove entries in the p2m mapping. */
-#define guest_physmap_add_page(_d, _p, _m) \
- shadow_guest_physmap_add_page((_d), (_p), (_m))
-#define guest_physmap_remove_page(_d, _p, _m ) \
- shadow_guest_physmap_remove_page((_d), (_p), (_m))
-
-/* Shadow PT operation mode : shadow-mode variable in arch_domain. */
+/*****************************************************************************
+ * Macros to tell which shadow paging mode a domain is in */
#define SHM2_shift 10
/* We're in one of the shadow modes */
@@ -64,107 +52,24 @@
#define shadow_mode_external(_d) ((_d)->arch.shadow.mode & SHM2_external)
/* Xen traps & emulates all reads of all page table pages:
- * not yet supported
- */
+ * not yet supported */
#define shadow_mode_trap_reads(_d) ({ (void)(_d); 0; })
-// How do we tell that we have a 32-bit PV guest in a 64-bit Xen?
-#ifdef __x86_64__
-#define pv_32bit_guest(_v) 0 // not yet supported
-#else
-#define pv_32bit_guest(_v) !is_hvm_vcpu(v)
-#endif
-
-/* The shadow lock.
- *
- * This lock is per-domain. It is intended to allow us to make atomic
- * updates to the software TLB that the shadow tables provide.
- *
- * Specifically, it protects:
- * - all changes to shadow page table pages
- * - the shadow hash table
- * - the shadow page allocator
- * - all changes to guest page table pages; if/when the notion of
- * out-of-sync pages is added to this code, then the shadow lock is
- * protecting all guest page table pages which are not listed as
- * currently as both guest-writable and out-of-sync...
- * XXX -- need to think about this relative to writable page tables.
- * - all changes to the page_info->tlbflush_timestamp
- * - the page_info->count fields on shadow pages
- * - the shadow dirty bit array and count
- * - XXX
- */
-#ifndef CONFIG_SMP
-#error shadow.h currently requires CONFIG_SMP
-#endif
-
-#define shadow_lock_init(_d) \
- do { \
- spin_lock_init(&(_d)->arch.shadow.lock); \
- (_d)->arch.shadow.locker = -1; \
- (_d)->arch.shadow.locker_function = "nobody"; \
- } while (0)
-
-#define shadow_locked_by_me(_d) \
- (current->processor == (_d)->arch.shadow.locker)
-
-#define shadow_lock(_d) \
- do { \
- if ( unlikely((_d)->arch.shadow.locker == current->processor) ) \
- { \
- printk("Error: shadow lock held by %s\n", \
- (_d)->arch.shadow.locker_function); \
- BUG(); \
- } \
- spin_lock(&(_d)->arch.shadow.lock); \
- ASSERT((_d)->arch.shadow.locker == -1); \
- (_d)->arch.shadow.locker = current->processor; \
- (_d)->arch.shadow.locker_function = __func__; \
- } while (0)
-
-#define shadow_unlock(_d) \
- do { \
- ASSERT((_d)->arch.shadow.locker == current->processor); \
- (_d)->arch.shadow.locker = -1; \
- (_d)->arch.shadow.locker_function = "nobody"; \
- spin_unlock(&(_d)->arch.shadow.lock); \
- } while (0)
-
-/*
- * Levels of self-test and paranoia
- * XXX should go in config files somewhere?
- */
-#define SHADOW_AUDIT_HASH 0x01 /* Check current hash bucket */
-#define SHADOW_AUDIT_HASH_FULL 0x02 /* Check every hash bucket */
-#define SHADOW_AUDIT_ENTRIES 0x04 /* Check this walk's shadows */
-#define SHADOW_AUDIT_ENTRIES_FULL 0x08 /* Check every shadow */
-#define SHADOW_AUDIT_ENTRIES_MFNS 0x10 /* Check gfn-mfn map in shadows */
-#define SHADOW_AUDIT_P2M 0x20 /* Check the p2m table */
-
-#ifdef NDEBUG
-#define SHADOW_AUDIT 0
-#define SHADOW_AUDIT_ENABLE 0
-#else
-#define SHADOW_AUDIT 0x15 /* Basic audit of all except p2m. */
-#define SHADOW_AUDIT_ENABLE shadow_audit_enable
-extern int shadow_audit_enable;
-#endif
-/*
- * Levels of optimization
- * XXX should go in config files somewhere?
- */
-#define SHOPT_WRITABLE_HEURISTIC 0x01 /* Guess at RW PTEs via linear maps */
-#define SHOPT_EARLY_UNSHADOW 0x02 /* Unshadow l1s on fork or exit */
-#define SHOPT_FAST_FAULT_PATH 0x04 /* Fast-path MMIO and not-present */
-#define SHOPT_PREFETCH 0x08 /* Shadow multiple entries per fault */
-#define SHOPT_LINUX_L3_TOPLEVEL 0x10 /* Pin l3es on early 64bit linux */
-#define SHOPT_SKIP_VERIFY 0x20 /* Skip PTE v'fy when safe to do so */
+/******************************************************************************
+ * The equivalent for a particular vcpu of a shadowed domain. */
-#define SHADOW_OPTIMIZATIONS 0x3f
+/* Is this vcpu using the P2M table to translate between GFNs and MFNs?
+ *
+ * This is true of translated HVM domains on a vcpu which has paging
+ * enabled. (HVM vcpus with paging disabled are using the p2m table as
+ * its paging table, so no translation occurs in this case.)
+ * It is also true for all vcpus of translated PV domains. */
+#define shadow_vcpu_mode_translate(_v) ((_v)->arch.shadow.translate_enabled)
-/* With shadow pagetables, the different kinds of address start
+/******************************************************************************
+ * With shadow pagetables, the different kinds of address start
* to get get confusing.
*
* Virtual addresses are what they usually are: the addresses that are used
@@ -214,38 +119,16 @@ static inline _type _name##_x(_name##_t n) { return n; }
#endif
TYPE_SAFE(unsigned long,mfn)
-#define SH_PRI_mfn "05lx"
-
-static inline mfn_t
-pagetable_get_mfn(pagetable_t pt)
-{
- return _mfn(pagetable_get_pfn(pt));
-}
-
-static inline pagetable_t
-pagetable_from_mfn(mfn_t mfn)
-{
- return pagetable_from_pfn(mfn_x(mfn));
-}
-static inline int
-shadow_vcpu_mode_translate(struct vcpu *v)
-{
- // Returns true if this VCPU needs to be using the P2M table to translate
- // between GFNs and MFNs.
- //
- // This is true of translated HVM domains on a vcpu which has paging
- // enabled. (HVM vcpu's with paging disabled are using the p2m table as
- // its paging table, so no translation occurs in this case.)
- //
- // It is also true for translated PV domains.
- //
- return v->arch.shadow.translate_enabled;
-}
+/* Macro for printk formats: use as printk("%"SH_PRI_mfn"\n", mfn_x(foo)); */
+#define SH_PRI_mfn "05lx"
-/**************************************************************************/
-/* Mode-specific entry points into the shadow code */
+/*****************************************************************************
+ * Mode-specific entry points into the shadow code.
+ *
+ * These shouldn't be used directly by callers; rather use the functions
+ * below which will indirect through this table as appropriate. */
struct sh_emulate_ctxt;
struct shadow_paging_mode {
@@ -254,7 +137,7 @@ struct shadow_paging_mode {
int (*invlpg )(struct vcpu *v, unsigned long va);
paddr_t (*gva_to_gpa )(struct vcpu *v, unsigned long va);
unsigned long (*gva_to_gfn )(struct vcpu *v, unsigned long va);
- void (*update_cr3 )(struct vcpu *v);
+ void (*update_cr3 )(struct vcpu *v, int do_locking);
int (*map_and_validate_gl1e )(struct vcpu *v, mfn_t gmfn,
void *new_guest_entry, u32 size);
int (*map_and_validate_gl2e )(struct vcpu *v, mfn_t gmfn,
@@ -286,35 +169,30 @@ struct shadow_paging_mode {
unsigned long *gl1mfn);
void (*guest_get_eff_l1e )(struct vcpu *v, unsigned long va,
void *eff_l1e);
-#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
int (*guess_wrmap )(struct vcpu *v,
unsigned long vaddr, mfn_t gmfn);
-#endif
/* For outsiders to tell what mode we're in */
unsigned int shadow_levels;
unsigned int guest_levels;
};
-static inline int shadow_guest_paging_levels(struct vcpu *v)
-{
- ASSERT(v->arch.shadow.mode != NULL);
- return v->arch.shadow.mode->guest_levels;
-}
-/**************************************************************************/
-/* Entry points into the shadow code */
+/*****************************************************************************
+ * Entry points into the shadow code */
-/* Enable arbitrary shadow mode. */
-int shadow_enable(struct domain *d, u32 mode);
+/* Set up the shadow-specific parts of a domain struct at start of day.
+ * Called for every domain from arch_domain_create() */
+void shadow_domain_init(struct domain *d);
-/* Turning on shadow test mode */
-int shadow_test_enable(struct domain *d);
+/* Enable an arbitrary shadow mode. Call once at domain creation. */
+int shadow_enable(struct domain *d, u32 mode);
-/* Handler for shadow control ops: enabling and disabling shadow modes,
- * and log-dirty bitmap ops all happen through here. */
+/* Handler for shadow control ops: operations from user-space to enable
+ * and disable ephemeral shadow modes (test mode and log-dirty mode) and
+ * manipulate the log-dirty bitmap. */
int shadow_domctl(struct domain *d,
- xen_domctl_shadow_op_t *sc,
- XEN_GUEST_HANDLE(xen_domctl_t) u_domctl);
+ xen_domctl_shadow_op_t *sc,
+ XEN_GUEST_HANDLE(xen_domctl_t) u_domctl);
/* Call when destroying a domain */
void shadow_teardown(struct domain *d);
@@ -322,164 +200,96 @@ void shadow_teardown(struct domain *d);
/* Call once all of the references to the domain have gone away */
void shadow_final_teardown(struct domain *d);
-
-/* Mark a page as dirty in the bitmap */
-void sh_do_mark_dirty(struct domain *d, mfn_t gmfn);
+/* Mark a page as dirty in the log-dirty bitmap: called when Xen
+ * makes changes to guest memory on its behalf. */
+void shadow_mark_dirty(struct domain *d, mfn_t gmfn);
+/* Cleaner version so we don't pepper shadow_mode tests all over the place */
static inline void mark_dirty(struct domain *d, unsigned long gmfn)
{
- if ( likely(!shadow_mode_log_dirty(d)) )
- return;
-
- shadow_lock(d);
- sh_do_mark_dirty(d, _mfn(gmfn));
- shadow_unlock(d);
-}
-
-/* Internal version, for when the shadow lock is already held */
-static inline void sh_mark_dirty(struct domain *d, mfn_t gmfn)
-{
- ASSERT(shadow_locked_by_me(d));
if ( unlikely(shadow_mode_log_dirty(d)) )
- sh_do_mark_dirty(d, gmfn);
+ shadow_mark_dirty(d, _mfn(gmfn));
}
-static inline int
-shadow_fault(unsigned long va, struct cpu_user_regs *regs)
-/* Called from pagefault handler in Xen, and from the HVM trap handlers
+/* Handle page-faults caused by the shadow pagetable mechanisms.
+ * Called from pagefault handler in Xen, and from the HVM trap handlers
* for pagefaults. Returns 1 if this fault was an artefact of the
* shadow code (and the guest should retry) or 0 if it is not (and the
* fault should be handled elsewhere or passed to the guest). */
+static inline int shadow_fault(unsigned long va, struct cpu_user_regs *regs)
{
struct vcpu *v = current;
perfc_incrc(shadow_fault);
return v->arch.shadow.mode->page_fault(v, va, regs);
}
-static inline int
-shadow_invlpg(struct vcpu *v, unsigned long va)
-/* Called when the guest requests an invlpg. Returns 1 if the invlpg
- * instruction should be issued on the hardware, or 0 if it's safe not
- * to do so. */
+/* Handle invlpg requests on shadowed vcpus.
+ * Returns 1 if the invlpg instruction should be issued on the hardware,
+ * or 0 if it's safe not to do so. */
+static inline int shadow_invlpg(struct vcpu *v, unsigned long va)
{
return v->arch.shadow.mode->invlpg(v, va);
}
-static inline paddr_t
-shadow_gva_to_gpa(struct vcpu *v, unsigned long va)
-/* Called to translate a guest virtual address to what the *guest*
- * pagetables would map it to. */
+/* Translate a guest virtual address to the physical address that the
+ * *guest* pagetables would map it to. */
+static inline paddr_t shadow_gva_to_gpa(struct vcpu *v, unsigned long va)
{
if ( unlikely(!shadow_vcpu_mode_translate(v)) )
return (paddr_t) va;
return v->arch.shadow.mode->gva_to_gpa(v, va);
}
-static inline unsigned long
-shadow_gva_to_gfn(struct vcpu *v, unsigned long va)
-/* Called to translate a guest virtual address to what the *guest*
- * pagetables would map it to. */
+/* Translate a guest virtual address to the frame number that the
+ * *guest* pagetables would map it to. */
+static inline unsigned long shadow_gva_to_gfn(struct vcpu *v, unsigned long va)
{
if ( unlikely(!shadow_vcpu_mode_translate(v)) )
return va >> PAGE_SHIFT;
return v->arch.shadow.mode->gva_to_gfn(v, va);
}
-static inline void
-shadow_update_cr3(struct vcpu *v)
-/* Updates all the things that are derived from the guest's CR3.
- * Called when the guest changes CR3. */
-{
- shadow_lock(v->domain);
- v->arch.shadow.mode->update_cr3(v);
- shadow_unlock(v->domain);
-}
-
-
-/* Should be called after CR3 is updated.
- * Updates vcpu->arch.cr3 and, for HVM guests, vcpu->arch.hvm_vcpu.cpu_cr3.
- *
- * Also updates other state derived from CR3 (vcpu->arch.guest_vtable,
- * shadow_vtable, etc).
- *
- * Uses values found in vcpu->arch.(guest_table and guest_table_user), and
- * for HVM guests, arch.monitor_table and hvm's guest CR3.
- *
- * Update ref counts to shadow tables appropriately.
- */
-static inline void update_cr3(struct vcpu *v)
+/* Update all the things that are derived from the guest's CR3.
+ * Called when the guest changes CR3; the caller can then use
+ * v->arch.cr3 as the value to load into the host CR3 to schedule this vcpu
+ * and v->arch.hvm_vcpu.hw_cr3 as the value to put in the vmcb/vmcs when
+ * entering the HVM guest. */
+static inline void shadow_update_cr3(struct vcpu *v)
{
- unsigned long cr3_mfn=0;
-
- if ( shadow_mode_enabled(v->domain) )
- {
- shadow_update_cr3(v);
- return;
- }
-
-#if CONFIG_PAGING_LEVELS == 4
- if ( !(v->arch.flags & TF_kernel_mode) )
- cr3_mfn = pagetable_get_pfn(v->arch.guest_table_user);
- else
-#endif
- cr3_mfn = pagetable_get_pfn(v->arch.guest_table);
-
- make_cr3(v, cr3_mfn);
+ v->arch.shadow.mode->update_cr3(v, 1);
}
-extern void sh_update_paging_modes(struct vcpu *v);
-
-/* Should be called to initialise paging structures if the paging mode
+/* Update all the things that are derived from the guest's CR0/CR3/CR4.
+ * Called to initialize paging structures if the paging mode
* has changed, and when bringing up a VCPU for the first time. */
-static inline void shadow_update_paging_modes(struct vcpu *v)
-{
- ASSERT(shadow_mode_enabled(v->domain));
- shadow_lock(v->domain);
- sh_update_paging_modes(v);
- shadow_unlock(v->domain);
-}
+void shadow_update_paging_modes(struct vcpu *v);
-static inline void
-shadow_detach_old_tables(struct vcpu *v)
-{
- if ( v->arch.shadow.mode )
- v->arch.shadow.mode->detach_old_tables(v);
-}
-static inline mfn_t
-shadow_make_monitor_table(struct vcpu *v)
-{
- return v->arch.shadow.mode->make_monitor_table(v);
-}
-
-static inline void
-shadow_destroy_monitor_table(struct vcpu *v, mfn_t mmfn)
-{
- v->arch.shadow.mode->destroy_monitor_table(v, mmfn);
-}
+/*****************************************************************************
+ * Access to the guest pagetables */
+/* Get a mapping of a PV guest's l1e for this virtual address. */
static inline void *
guest_map_l1e(struct vcpu *v, unsigned long addr, unsigned long *gl1mfn)
{
- if ( likely(!shadow_mode_translate(v->domain)) )
- {
- l2_pgentry_t l2e;
- ASSERT(!shadow_mode_external(v->domain));
- /* Find this l1e and its enclosing l1mfn in the linear map */
- if ( __copy_from_user(&l2e,
- &__linear_l2_table[l2_linear_offset(addr)],
- sizeof(l2_pgentry_t)) != 0 )
- return NULL;
- /* Check flags that it will be safe to read the l1e */
- if ( (l2e_get_flags(l2e) & (_PAGE_PRESENT | _PAGE_PSE))
- != _PAGE_PRESENT )
- return NULL;
- *gl1mfn = l2e_get_pfn(l2e);
- return &__linear_l1_table[l1_linear_offset(addr)];
- }
+ l2_pgentry_t l2e;
- return v->arch.shadow.mode->guest_map_l1e(v, addr, gl1mfn);
+ if ( unlikely(shadow_mode_translate(v->domain)) )
+ return v->arch.shadow.mode->guest_map_l1e(v, addr, gl1mfn);
+
+ /* Find this l1e and its enclosing l1mfn in the linear map */
+ if ( __copy_from_user(&l2e,
+ &__linear_l2_table[l2_linear_offset(addr)],
+ sizeof(l2_pgentry_t)) != 0 )
+ return NULL;
+ /* Check flags that it will be safe to read the l1e */
+ if ( (l2e_get_flags(l2e) & (_PAGE_PRESENT | _PAGE_PSE))
+ != _PAGE_PRESENT )
+ return NULL;
+ *gl1mfn = l2e_get_pfn(l2e);
+ return &__linear_l1_table[l1_linear_offset(addr)];
}
+/* Pull down the mapping we got from guest_map_l1e() */
static inline void
guest_unmap_l1e(struct vcpu *v, void *p)
{
@@ -487,6 +297,7 @@ guest_unmap_l1e(struct vcpu *v, void *p)
unmap_domain_page(p);
}
+/* Read the guest's l1e that maps this address. */
static inline void
guest_get_eff_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e)
{
@@ -503,6 +314,8 @@ guest_get_eff_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e)
v->arch.shadow.mode->guest_get_eff_l1e(v, addr, eff_l1e);
}
+/* Read the guest's l1e that maps this address, from the kernel-mode
+ * pagetables. */
static inline void
guest_get_eff_kern_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e)
{
@@ -518,82 +331,36 @@ guest_get_eff_kern_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e)
TOGGLE_MODE();
}
-
-/* Validate a pagetable change from the guest and update the shadows. */
-extern int shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn,
- void *new_guest_entry);
-extern int __shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn,
- void *entry, u32 size);
-
-/* Update the shadows in response to a pagetable write from a HVM guest */
-extern void shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn,
- void *entry, u32 size);
-
-/* Remove all writeable mappings of a guest frame from the shadows.
- * Returns non-zero if we need to flush TLBs.
- * level and fault_addr desribe how we found this to be a pagetable;
- * level==0 means we have some other reason for revoking write access. */
-extern int shadow_remove_write_access(struct vcpu *v, mfn_t readonly_mfn,
- unsigned int level,
- unsigned long fault_addr);
-
-/* Remove all mappings of the guest mfn from the shadows.
- * Returns non-zero if we need to flush TLBs. */
-extern int shadow_remove_all_mappings(struct vcpu *v, mfn_t target_mfn);
-
-/* Remove all mappings from the shadows. */
-extern void shadow_blow_tables(struct domain *d);
-
-void
-shadow_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn);
-/* This is a HVM page that we thing is no longer a pagetable.
- * Unshadow it, and recursively unshadow pages that reference it. */
-
-/* Remove all shadows of the guest mfn. */
-extern void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all);
-static inline void shadow_remove_all_shadows(struct vcpu *v, mfn_t gmfn)
+/* Write a new value into the guest pagetable, and update the shadows
+ * appropriately. Returns 0 if we page-faulted, 1 for success. */
+int shadow_write_guest_entry(struct vcpu *v, intpte_t *p,
+ intpte_t new, mfn_t gmfn);
+
+/* Cmpxchg a new value into the guest pagetable, and update the shadows
+ * appropriately. Returns 0 if we page-faulted, 1 if not.
+ * N.B. caller should check the value of "old" to see if the
+ * cmpxchg itself was successful. */
+int shadow_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p,
+ intpte_t *old, intpte_t new, mfn_t gmfn);
+
+/* Remove all mappings of the guest page from the shadows.
+ * This is called from common code. It does not flush TLBs. */
+int sh_remove_all_mappings(struct vcpu *v, mfn_t target_mfn);
+static inline void
+shadow_drop_references(struct domain *d, struct page_info *p)
{
- int was_locked = shadow_locked_by_me(v->domain);
- if ( !was_locked )
- shadow_lock(v->domain);
- sh_remove_shadows(v, gmfn, 0, 1);
- if ( !was_locked )
- shadow_unlock(v->domain);
+ /* See the comment about locking in sh_remove_all_mappings */
+ sh_remove_all_mappings(d->vcpu[0], _mfn(page_to_mfn(p)));
}
-/* Add a page to a domain */
-void
-shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn,
- unsigned long mfn);
-
-/* Remove a page from a domain */
-void
-shadow_guest_physmap_remove_page(struct domain *d, unsigned long gfn,
- unsigned long mfn);
-
-/*
- * Allocation of shadow pages
- */
-
-/* Return the minumum acceptable number of shadow pages a domain needs */
-unsigned int shadow_min_acceptable_pages(struct domain *d);
-
-/* Set the pool of shadow pages to the required number of MB.
- * Input will be rounded up to at least min_acceptable_shadow_pages().
- * Returns 0 for success, 1 for failure. */
-unsigned int shadow_set_allocation(struct domain *d,
- unsigned int megabytes,
- int *preempted);
-
-/* Return the size of the shadow pool, rounded up to the nearest MB */
-static inline unsigned int shadow_get_allocation(struct domain *d)
+/* Remove all shadows of the guest mfn. */
+void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all);
+static inline void shadow_remove_all_shadows(struct vcpu *v, mfn_t gmfn)
{
- unsigned int pg = d->arch.shadow.total_pages;
- return ((pg >> (20 - PAGE_SHIFT))
- + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0));
+ /* See the comment about locking in sh_remove_shadows */
+ sh_remove_shadows(v, gmfn, 0 /* Be thorough */, 1 /* Must succeed */);
}
-
/**************************************************************************/
/* Guest physmap (p2m) support
*
@@ -602,9 +369,20 @@ static inline unsigned int shadow_get_allocation(struct domain *d)
* guests, so we steal the address space that would have normally
* been used by the read-only MPT map.
*/
-
#define phys_to_machine_mapping ((l1_pgentry_t *)RO_MPT_VIRT_START)
+/* Add a page to a domain's p2m table */
+void shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn,
+ unsigned long mfn);
+
+/* Remove a page from a domain's p2m table */
+void shadow_guest_physmap_remove_page(struct domain *d, unsigned long gfn,
+ unsigned long mfn);
+
+/* Aliases, called from common code. */
+#define guest_physmap_add_page shadow_guest_physmap_add_page
+#define guest_physmap_remove_page shadow_guest_physmap_remove_page
+
/* Read the current domain's P2M table. */
static inline mfn_t sh_gfn_to_mfn_current(unsigned long gfn)
{
@@ -627,8 +405,8 @@ static inline mfn_t sh_gfn_to_mfn_current(unsigned long gfn)
return _mfn(INVALID_MFN);
}
-/* Walk another domain's P2M table, mapping pages as we go */
-extern mfn_t sh_gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn);
+/* Read another domain's P2M table, mapping pages as we go */
+mfn_t sh_gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn);
/* General conversion function from gfn to mfn */
static inline mfn_t
@@ -666,6 +444,7 @@ mmio_space(paddr_t gpa)
return !mfn_valid(mfn_x(sh_gfn_to_mfn_current(gfn)));
}
+/* Translate the frame number held in an l1e from guest to machine */
static inline l1_pgentry_t
gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e)
{
@@ -685,4 +464,3 @@ gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e)
* indent-tabs-mode: nil
* End:
*/
-